diff --git a/info.ipynb b/info.ipynb index c9657c51..9f4fb4e3 100644 --- a/info.ipynb +++ b/info.ipynb @@ -2,20 +2,485 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "b6ea6c3b", "metadata": {}, "outputs": [ { - "ename": "TypeError", - "evalue": "'numpy.ndarray' object is not callable", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mTypeError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[25]\u001b[39m\u001b[32m, line 20\u001b[39m\n\u001b[32m 18\u001b[39m X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=\u001b[32m0.2\u001b[39m, random_state=\u001b[32m42\u001b[39m)\n\u001b[32m 19\u001b[39m X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=\u001b[32m0.2\u001b[39m, random_state=\u001b[32m42\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m20\u001b[39m X_train = \u001b[43mX_train\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 23\u001b[39m n_features = \u001b[32m10\u001b[39m\n\u001b[32m 24\u001b[39m fig=plt.figure( figsize=(\u001b[32m15\u001b[39m, \u001b[32m15\u001b[39m) )\n", - "\u001b[31mTypeError\u001b[39m: 'numpy.ndarray' object is not callable" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ageworkclassfnlwgteducationeducation.nummarital.statusoccupationrelationshipracesexcapital.gaincapital.losshours.per.weeknative.country
3254171?287372Doctorate16Married-civ-spouse?HusbandWhiteMale0010United-States
3254245State-gov252208HS-grad9SeparatedAdm-clericalOwn-childWhiteFemale0040United-States
3254341?202822HS-grad9Separated?Not-in-familyBlackFemale0032United-States
3254472?129912HS-grad9Married-civ-spouse?HusbandWhiteMale0025United-States
3254545Local-gov119199Assoc-acdm12DivorcedProf-specialtyUnmarriedWhiteFemale0048United-States
3254631Private199655Masters14DivorcedOther-serviceNot-in-familyOtherFemale0030United-States
3254739Local-gov111499Assoc-acdm12Married-civ-spouseAdm-clericalWifeWhiteFemale0020United-States
3254837Private198216Assoc-acdm12DivorcedTech-supportNot-in-familyWhiteFemale0040United-States
3254943Private260761HS-grad9Married-civ-spouseMachine-op-inspctHusbandWhiteMale0040Mexico
3255043State-gov255835Some-college10DivorcedAdm-clericalOther-relativeWhiteFemale0040United-States
3255143Self-emp-not-inc27242Some-college10Married-civ-spouseCraft-repairHusbandWhiteMale0050United-States
3255232Private3406610th6Married-civ-spouseHandlers-cleanersHusbandAmer-Indian-EskimoMale0040United-States
3255343Private84661Assoc-voc11Married-civ-spouseSalesHusbandWhiteMale0045United-States
3255432Private116138Masters14Never-marriedTech-supportNot-in-familyAsian-Pac-IslanderMale0011Taiwan
3255553Private321865Masters14Married-civ-spouseExec-managerialHusbandWhiteMale0040United-States
3255622Private310152Some-college10Never-marriedProtective-servNot-in-familyWhiteMale0040United-States
3255727Private257302Assoc-acdm12Married-civ-spouseTech-supportWifeWhiteFemale0038United-States
3255840Private154374HS-grad9Married-civ-spouseMachine-op-inspctHusbandWhiteMale0040United-States
3255958Private151910HS-grad9WidowedAdm-clericalUnmarriedWhiteFemale0040United-States
3256022Private201490HS-grad9Never-marriedAdm-clericalOwn-childWhiteMale0020United-States
\n", + "
" + ], + "text/plain": [ + " age workclass fnlwgt education education.num \\\n", + "32541 71 ? 287372 Doctorate 16 \n", + "32542 45 State-gov 252208 HS-grad 9 \n", + "32543 41 ? 202822 HS-grad 9 \n", + "32544 72 ? 129912 HS-grad 9 \n", + "32545 45 Local-gov 119199 Assoc-acdm 12 \n", + "32546 31 Private 199655 Masters 14 \n", + "32547 39 Local-gov 111499 Assoc-acdm 12 \n", + "32548 37 Private 198216 Assoc-acdm 12 \n", + "32549 43 Private 260761 HS-grad 9 \n", + "32550 43 State-gov 255835 Some-college 10 \n", + "32551 43 Self-emp-not-inc 27242 Some-college 10 \n", + "32552 32 Private 34066 10th 6 \n", + "32553 43 Private 84661 Assoc-voc 11 \n", + "32554 32 Private 116138 Masters 14 \n", + "32555 53 Private 321865 Masters 14 \n", + "32556 22 Private 310152 Some-college 10 \n", + "32557 27 Private 257302 Assoc-acdm 12 \n", + "32558 40 Private 154374 HS-grad 9 \n", + "32559 58 Private 151910 HS-grad 9 \n", + "32560 22 Private 201490 HS-grad 9 \n", + "\n", + " marital.status occupation relationship \\\n", + "32541 Married-civ-spouse ? Husband \n", + "32542 Separated Adm-clerical Own-child \n", + "32543 Separated ? Not-in-family \n", + "32544 Married-civ-spouse ? Husband \n", + "32545 Divorced Prof-specialty Unmarried \n", + "32546 Divorced Other-service Not-in-family \n", + "32547 Married-civ-spouse Adm-clerical Wife \n", + "32548 Divorced Tech-support Not-in-family \n", + "32549 Married-civ-spouse Machine-op-inspct Husband \n", + "32550 Divorced Adm-clerical Other-relative \n", + "32551 Married-civ-spouse Craft-repair Husband \n", + "32552 Married-civ-spouse Handlers-cleaners Husband \n", + "32553 Married-civ-spouse Sales Husband \n", + "32554 Never-married Tech-support Not-in-family \n", + "32555 Married-civ-spouse Exec-managerial Husband \n", + "32556 Never-married Protective-serv Not-in-family \n", + "32557 Married-civ-spouse Tech-support Wife \n", + "32558 Married-civ-spouse Machine-op-inspct Husband \n", + "32559 Widowed Adm-clerical Unmarried \n", + "32560 Never-married Adm-clerical Own-child \n", + "\n", + " race sex capital.gain capital.loss hours.per.week \\\n", + "32541 White Male 0 0 10 \n", + "32542 White Female 0 0 40 \n", + "32543 Black Female 0 0 32 \n", + "32544 White Male 0 0 25 \n", + "32545 White Female 0 0 48 \n", + "32546 Other Female 0 0 30 \n", + "32547 White Female 0 0 20 \n", + "32548 White Female 0 0 40 \n", + "32549 White Male 0 0 40 \n", + "32550 White Female 0 0 40 \n", + "32551 White Male 0 0 50 \n", + "32552 Amer-Indian-Eskimo Male 0 0 40 \n", + "32553 White Male 0 0 45 \n", + "32554 Asian-Pac-Islander Male 0 0 11 \n", + "32555 White Male 0 0 40 \n", + "32556 White Male 0 0 40 \n", + "32557 White Female 0 0 38 \n", + "32558 White Male 0 0 40 \n", + "32559 White Female 0 0 40 \n", + "32560 White Male 0 0 20 \n", + "\n", + " native.country \n", + "32541 United-States \n", + "32542 United-States \n", + "32543 United-States \n", + "32544 United-States \n", + "32545 United-States \n", + "32546 United-States \n", + "32547 United-States \n", + "32548 United-States \n", + "32549 Mexico \n", + "32550 United-States \n", + "32551 United-States \n", + "32552 United-States \n", + "32553 United-States \n", + "32554 Taiwan \n", + "32555 United-States \n", + "32556 United-States \n", + "32557 United-States \n", + "32558 United-States \n", + "32559 United-States \n", + "32560 United-States " + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -39,27 +504,8 @@ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)\n", "\n", - "\n", - "\n", - "n_features = 10\n", - "fig=plt.figure( figsize=(15, 15) )\n", - "plt_num = 1\n", - "for i in range(n_features):\n", - " for j in range(n_features):\n", - " ax = fig.add_subplot(n_features, n_features, plt_num)\n", - " if(i == j):\n", - " ax.hist(X_train[:, i], bins=25, color='gray')\n", - " else:\n", - " ax.scatter(X_train[:, j], X_train[:, i], c=np.array(colors)[y_train], s=30, alpha=0.3)\n", - " \n", - " if(i == n_features-1):\n", - " ax.set_xlabel(f'$x_{{{j}}}$', fontsize=22)\n", - " \n", - " if(j==0):\n", - " ax.set_ylabel(f'$x_{{{i}}}$', fontsize=22)\n", - "\n", - " ax.grid(True)\n", - " plt_num +=1\n" + "X.tail(20)\n", + "\n" ] } ],