Merge branch 'main' of https://gitea.jany.se/Jany/MLPproject
This commit is contained in:
527
info.ipynb
527
info.ipynb
@@ -7,480 +7,15 @@
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>age</th>\n",
|
||||
" <th>workclass</th>\n",
|
||||
" <th>fnlwgt</th>\n",
|
||||
" <th>education</th>\n",
|
||||
" <th>education.num</th>\n",
|
||||
" <th>marital.status</th>\n",
|
||||
" <th>occupation</th>\n",
|
||||
" <th>relationship</th>\n",
|
||||
" <th>race</th>\n",
|
||||
" <th>sex</th>\n",
|
||||
" <th>capital.gain</th>\n",
|
||||
" <th>capital.loss</th>\n",
|
||||
" <th>hours.per.week</th>\n",
|
||||
" <th>native.country</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>32541</th>\n",
|
||||
" <td>71</td>\n",
|
||||
" <td>?</td>\n",
|
||||
" <td>287372</td>\n",
|
||||
" <td>Doctorate</td>\n",
|
||||
" <td>16</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>?</td>\n",
|
||||
" <td>Husband</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32542</th>\n",
|
||||
" <td>45</td>\n",
|
||||
" <td>State-gov</td>\n",
|
||||
" <td>252208</td>\n",
|
||||
" <td>HS-grad</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>Separated</td>\n",
|
||||
" <td>Adm-clerical</td>\n",
|
||||
" <td>Own-child</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32543</th>\n",
|
||||
" <td>41</td>\n",
|
||||
" <td>?</td>\n",
|
||||
" <td>202822</td>\n",
|
||||
" <td>HS-grad</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>Separated</td>\n",
|
||||
" <td>?</td>\n",
|
||||
" <td>Not-in-family</td>\n",
|
||||
" <td>Black</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>32</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32544</th>\n",
|
||||
" <td>72</td>\n",
|
||||
" <td>?</td>\n",
|
||||
" <td>129912</td>\n",
|
||||
" <td>HS-grad</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>?</td>\n",
|
||||
" <td>Husband</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>25</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32545</th>\n",
|
||||
" <td>45</td>\n",
|
||||
" <td>Local-gov</td>\n",
|
||||
" <td>119199</td>\n",
|
||||
" <td>Assoc-acdm</td>\n",
|
||||
" <td>12</td>\n",
|
||||
" <td>Divorced</td>\n",
|
||||
" <td>Prof-specialty</td>\n",
|
||||
" <td>Unmarried</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>48</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32546</th>\n",
|
||||
" <td>31</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>199655</td>\n",
|
||||
" <td>Masters</td>\n",
|
||||
" <td>14</td>\n",
|
||||
" <td>Divorced</td>\n",
|
||||
" <td>Other-service</td>\n",
|
||||
" <td>Not-in-family</td>\n",
|
||||
" <td>Other</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>30</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32547</th>\n",
|
||||
" <td>39</td>\n",
|
||||
" <td>Local-gov</td>\n",
|
||||
" <td>111499</td>\n",
|
||||
" <td>Assoc-acdm</td>\n",
|
||||
" <td>12</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>Adm-clerical</td>\n",
|
||||
" <td>Wife</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32548</th>\n",
|
||||
" <td>37</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>198216</td>\n",
|
||||
" <td>Assoc-acdm</td>\n",
|
||||
" <td>12</td>\n",
|
||||
" <td>Divorced</td>\n",
|
||||
" <td>Tech-support</td>\n",
|
||||
" <td>Not-in-family</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32549</th>\n",
|
||||
" <td>43</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>260761</td>\n",
|
||||
" <td>HS-grad</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>Machine-op-inspct</td>\n",
|
||||
" <td>Husband</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>Mexico</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32550</th>\n",
|
||||
" <td>43</td>\n",
|
||||
" <td>State-gov</td>\n",
|
||||
" <td>255835</td>\n",
|
||||
" <td>Some-college</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>Divorced</td>\n",
|
||||
" <td>Adm-clerical</td>\n",
|
||||
" <td>Other-relative</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32551</th>\n",
|
||||
" <td>43</td>\n",
|
||||
" <td>Self-emp-not-inc</td>\n",
|
||||
" <td>27242</td>\n",
|
||||
" <td>Some-college</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>Craft-repair</td>\n",
|
||||
" <td>Husband</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>50</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32552</th>\n",
|
||||
" <td>32</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>34066</td>\n",
|
||||
" <td>10th</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>Handlers-cleaners</td>\n",
|
||||
" <td>Husband</td>\n",
|
||||
" <td>Amer-Indian-Eskimo</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32553</th>\n",
|
||||
" <td>43</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>84661</td>\n",
|
||||
" <td>Assoc-voc</td>\n",
|
||||
" <td>11</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>Sales</td>\n",
|
||||
" <td>Husband</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>45</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32554</th>\n",
|
||||
" <td>32</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>116138</td>\n",
|
||||
" <td>Masters</td>\n",
|
||||
" <td>14</td>\n",
|
||||
" <td>Never-married</td>\n",
|
||||
" <td>Tech-support</td>\n",
|
||||
" <td>Not-in-family</td>\n",
|
||||
" <td>Asian-Pac-Islander</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>11</td>\n",
|
||||
" <td>Taiwan</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32555</th>\n",
|
||||
" <td>53</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>321865</td>\n",
|
||||
" <td>Masters</td>\n",
|
||||
" <td>14</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>Exec-managerial</td>\n",
|
||||
" <td>Husband</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32556</th>\n",
|
||||
" <td>22</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>310152</td>\n",
|
||||
" <td>Some-college</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>Never-married</td>\n",
|
||||
" <td>Protective-serv</td>\n",
|
||||
" <td>Not-in-family</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32557</th>\n",
|
||||
" <td>27</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>257302</td>\n",
|
||||
" <td>Assoc-acdm</td>\n",
|
||||
" <td>12</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>Tech-support</td>\n",
|
||||
" <td>Wife</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>38</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32558</th>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>154374</td>\n",
|
||||
" <td>HS-grad</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>Married-civ-spouse</td>\n",
|
||||
" <td>Machine-op-inspct</td>\n",
|
||||
" <td>Husband</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32559</th>\n",
|
||||
" <td>58</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>151910</td>\n",
|
||||
" <td>HS-grad</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>Widowed</td>\n",
|
||||
" <td>Adm-clerical</td>\n",
|
||||
" <td>Unmarried</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>40</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>32560</th>\n",
|
||||
" <td>22</td>\n",
|
||||
" <td>Private</td>\n",
|
||||
" <td>201490</td>\n",
|
||||
" <td>HS-grad</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>Never-married</td>\n",
|
||||
" <td>Adm-clerical</td>\n",
|
||||
" <td>Own-child</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" <td>United-States</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" age workclass fnlwgt education education.num \\\n",
|
||||
"32541 71 ? 287372 Doctorate 16 \n",
|
||||
"32542 45 State-gov 252208 HS-grad 9 \n",
|
||||
"32543 41 ? 202822 HS-grad 9 \n",
|
||||
"32544 72 ? 129912 HS-grad 9 \n",
|
||||
"32545 45 Local-gov 119199 Assoc-acdm 12 \n",
|
||||
"32546 31 Private 199655 Masters 14 \n",
|
||||
"32547 39 Local-gov 111499 Assoc-acdm 12 \n",
|
||||
"32548 37 Private 198216 Assoc-acdm 12 \n",
|
||||
"32549 43 Private 260761 HS-grad 9 \n",
|
||||
"32550 43 State-gov 255835 Some-college 10 \n",
|
||||
"32551 43 Self-emp-not-inc 27242 Some-college 10 \n",
|
||||
"32552 32 Private 34066 10th 6 \n",
|
||||
"32553 43 Private 84661 Assoc-voc 11 \n",
|
||||
"32554 32 Private 116138 Masters 14 \n",
|
||||
"32555 53 Private 321865 Masters 14 \n",
|
||||
"32556 22 Private 310152 Some-college 10 \n",
|
||||
"32557 27 Private 257302 Assoc-acdm 12 \n",
|
||||
"32558 40 Private 154374 HS-grad 9 \n",
|
||||
"32559 58 Private 151910 HS-grad 9 \n",
|
||||
"32560 22 Private 201490 HS-grad 9 \n",
|
||||
"\n",
|
||||
" marital.status occupation relationship \\\n",
|
||||
"32541 Married-civ-spouse ? Husband \n",
|
||||
"32542 Separated Adm-clerical Own-child \n",
|
||||
"32543 Separated ? Not-in-family \n",
|
||||
"32544 Married-civ-spouse ? Husband \n",
|
||||
"32545 Divorced Prof-specialty Unmarried \n",
|
||||
"32546 Divorced Other-service Not-in-family \n",
|
||||
"32547 Married-civ-spouse Adm-clerical Wife \n",
|
||||
"32548 Divorced Tech-support Not-in-family \n",
|
||||
"32549 Married-civ-spouse Machine-op-inspct Husband \n",
|
||||
"32550 Divorced Adm-clerical Other-relative \n",
|
||||
"32551 Married-civ-spouse Craft-repair Husband \n",
|
||||
"32552 Married-civ-spouse Handlers-cleaners Husband \n",
|
||||
"32553 Married-civ-spouse Sales Husband \n",
|
||||
"32554 Never-married Tech-support Not-in-family \n",
|
||||
"32555 Married-civ-spouse Exec-managerial Husband \n",
|
||||
"32556 Never-married Protective-serv Not-in-family \n",
|
||||
"32557 Married-civ-spouse Tech-support Wife \n",
|
||||
"32558 Married-civ-spouse Machine-op-inspct Husband \n",
|
||||
"32559 Widowed Adm-clerical Unmarried \n",
|
||||
"32560 Never-married Adm-clerical Own-child \n",
|
||||
"\n",
|
||||
" race sex capital.gain capital.loss hours.per.week \\\n",
|
||||
"32541 White Male 0 0 10 \n",
|
||||
"32542 White Female 0 0 40 \n",
|
||||
"32543 Black Female 0 0 32 \n",
|
||||
"32544 White Male 0 0 25 \n",
|
||||
"32545 White Female 0 0 48 \n",
|
||||
"32546 Other Female 0 0 30 \n",
|
||||
"32547 White Female 0 0 20 \n",
|
||||
"32548 White Female 0 0 40 \n",
|
||||
"32549 White Male 0 0 40 \n",
|
||||
"32550 White Female 0 0 40 \n",
|
||||
"32551 White Male 0 0 50 \n",
|
||||
"32552 Amer-Indian-Eskimo Male 0 0 40 \n",
|
||||
"32553 White Male 0 0 45 \n",
|
||||
"32554 Asian-Pac-Islander Male 0 0 11 \n",
|
||||
"32555 White Male 0 0 40 \n",
|
||||
"32556 White Male 0 0 40 \n",
|
||||
"32557 White Female 0 0 38 \n",
|
||||
"32558 White Male 0 0 40 \n",
|
||||
"32559 White Female 0 0 40 \n",
|
||||
"32560 White Male 0 0 20 \n",
|
||||
"\n",
|
||||
" native.country \n",
|
||||
"32541 United-States \n",
|
||||
"32542 United-States \n",
|
||||
"32543 United-States \n",
|
||||
"32544 United-States \n",
|
||||
"32545 United-States \n",
|
||||
"32546 United-States \n",
|
||||
"32547 United-States \n",
|
||||
"32548 United-States \n",
|
||||
"32549 Mexico \n",
|
||||
"32550 United-States \n",
|
||||
"32551 United-States \n",
|
||||
"32552 United-States \n",
|
||||
"32553 United-States \n",
|
||||
"32554 Taiwan \n",
|
||||
"32555 United-States \n",
|
||||
"32556 United-States \n",
|
||||
"32557 United-States \n",
|
||||
"32558 United-States \n",
|
||||
"32559 United-States \n",
|
||||
"32560 United-States "
|
||||
]
|
||||
},
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"ename": "TypeError",
|
||||
"evalue": "'numpy.ndarray' object is not callable",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||
"\u001b[31mTypeError\u001b[39m Traceback (most recent call last)",
|
||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[25]\u001b[39m\u001b[32m, line 20\u001b[39m\n\u001b[32m 18\u001b[39m X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=\u001b[32m0.2\u001b[39m, random_state=\u001b[32m42\u001b[39m)\n\u001b[32m 19\u001b[39m X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=\u001b[32m0.2\u001b[39m, random_state=\u001b[32m42\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m20\u001b[39m X_train = \u001b[43mX_train\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 23\u001b[39m n_features = \u001b[32m10\u001b[39m\n\u001b[32m 24\u001b[39m fig=plt.figure( figsize=(\u001b[32m15\u001b[39m, \u001b[32m15\u001b[39m) )\n",
|
||||
"\u001b[31mTypeError\u001b[39m: 'numpy.ndarray' object is not callable"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -489,23 +24,51 @@
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.pipeline import Pipeline\n",
|
||||
"from sklearn.tree import DecisionTreeClassifier, plot_tree\n",
|
||||
"from sklearn.preprocessing import LabelEncoder\n",
|
||||
"\n",
|
||||
"df = pd.read_csv('./Datasets/adult.csv', comment = '#')\n",
|
||||
"\n",
|
||||
"# First, create a copy of the dataframe to avoid modifying the original\n",
|
||||
"df_encoded = df.copy()\n",
|
||||
"\n",
|
||||
"data = pd.read_csv('./Datasets/adult.csv', comment = '#')\n",
|
||||
"# Apply label encoding to categorical columns\n",
|
||||
"label_encoder = LabelEncoder()\n",
|
||||
"categorical_columns = ['workclass', 'marital.status', 'occupation', \n",
|
||||
" 'relationship', 'race', 'sex', 'income']\n",
|
||||
"\n",
|
||||
"# Features\n",
|
||||
"X = data.drop(columns=['income'])\n",
|
||||
"\n",
|
||||
"# Labels\n",
|
||||
"y = data['income']\n",
|
||||
"for column in categorical_columns:\n",
|
||||
" df_encoded[column] = label_encoder.fit_transform(df_encoded[column])\n",
|
||||
"\n",
|
||||
"# Now properly separate features and target\n",
|
||||
"X = df_encoded.drop(columns=['income', 'native.country', 'education'])\n",
|
||||
"y = df_encoded['income']\n",
|
||||
"\n",
|
||||
"# Split the data\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
||||
"X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)\n",
|
||||
"\n",
|
||||
"X.tail(20)\n",
|
||||
"\n"
|
||||
"\n",
|
||||
"\n",
|
||||
"n_features = 10\n",
|
||||
"fig=plt.figure( figsize=(15, 15) )\n",
|
||||
"plt_num = 1\n",
|
||||
"for i in range(n_features):\n",
|
||||
" for j in range(n_features):\n",
|
||||
" ax = fig.add_subplot(n_features, n_features, plt_num)\n",
|
||||
" if(i == j):\n",
|
||||
" ax.hist(X_train[:, i], bins=25, color='gray')\n",
|
||||
" else:\n",
|
||||
" ax.scatter(X_train[:, j], X_train[:, i], c=np.array(colors)[y_train], s=30, alpha=0.3)\n",
|
||||
" \n",
|
||||
" if(i == n_features-1):\n",
|
||||
" ax.set_xlabel(f'$x_{{{j}}}$', fontsize=22)\n",
|
||||
" \n",
|
||||
" if(j==0):\n",
|
||||
" ax.set_ylabel(f'$x_{{{i}}}$', fontsize=22)\n",
|
||||
"\n",
|
||||
" ax.grid(True)\n",
|
||||
" plt_num +=1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user