534 lines
19 KiB
Plaintext
534 lines
19 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 34,
|
|
"id": "b6ea6c3b",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>age</th>\n",
|
|
" <th>workclass</th>\n",
|
|
" <th>fnlwgt</th>\n",
|
|
" <th>education</th>\n",
|
|
" <th>education.num</th>\n",
|
|
" <th>marital.status</th>\n",
|
|
" <th>occupation</th>\n",
|
|
" <th>relationship</th>\n",
|
|
" <th>race</th>\n",
|
|
" <th>sex</th>\n",
|
|
" <th>capital.gain</th>\n",
|
|
" <th>capital.loss</th>\n",
|
|
" <th>hours.per.week</th>\n",
|
|
" <th>native.country</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>32541</th>\n",
|
|
" <td>71</td>\n",
|
|
" <td>?</td>\n",
|
|
" <td>287372</td>\n",
|
|
" <td>Doctorate</td>\n",
|
|
" <td>16</td>\n",
|
|
" <td>Married-civ-spouse</td>\n",
|
|
" <td>?</td>\n",
|
|
" <td>Husband</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Male</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>10</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32542</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>State-gov</td>\n",
|
|
" <td>252208</td>\n",
|
|
" <td>HS-grad</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>Separated</td>\n",
|
|
" <td>Adm-clerical</td>\n",
|
|
" <td>Own-child</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Female</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>40</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32543</th>\n",
|
|
" <td>41</td>\n",
|
|
" <td>?</td>\n",
|
|
" <td>202822</td>\n",
|
|
" <td>HS-grad</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>Separated</td>\n",
|
|
" <td>?</td>\n",
|
|
" <td>Not-in-family</td>\n",
|
|
" <td>Black</td>\n",
|
|
" <td>Female</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>32</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32544</th>\n",
|
|
" <td>72</td>\n",
|
|
" <td>?</td>\n",
|
|
" <td>129912</td>\n",
|
|
" <td>HS-grad</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>Married-civ-spouse</td>\n",
|
|
" <td>?</td>\n",
|
|
" <td>Husband</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Male</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>25</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32545</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>Local-gov</td>\n",
|
|
" <td>119199</td>\n",
|
|
" <td>Assoc-acdm</td>\n",
|
|
" <td>12</td>\n",
|
|
" <td>Divorced</td>\n",
|
|
" <td>Prof-specialty</td>\n",
|
|
" <td>Unmarried</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Female</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>48</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32546</th>\n",
|
|
" <td>31</td>\n",
|
|
" <td>Private</td>\n",
|
|
" <td>199655</td>\n",
|
|
" <td>Masters</td>\n",
|
|
" <td>14</td>\n",
|
|
" <td>Divorced</td>\n",
|
|
" <td>Other-service</td>\n",
|
|
" <td>Not-in-family</td>\n",
|
|
" <td>Other</td>\n",
|
|
" <td>Female</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>30</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32547</th>\n",
|
|
" <td>39</td>\n",
|
|
" <td>Local-gov</td>\n",
|
|
" <td>111499</td>\n",
|
|
" <td>Assoc-acdm</td>\n",
|
|
" <td>12</td>\n",
|
|
" <td>Married-civ-spouse</td>\n",
|
|
" <td>Adm-clerical</td>\n",
|
|
" <td>Wife</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Female</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>20</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32548</th>\n",
|
|
" <td>37</td>\n",
|
|
" <td>Private</td>\n",
|
|
" <td>198216</td>\n",
|
|
" <td>Assoc-acdm</td>\n",
|
|
" <td>12</td>\n",
|
|
" <td>Divorced</td>\n",
|
|
" <td>Tech-support</td>\n",
|
|
" <td>Not-in-family</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Female</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>40</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32549</th>\n",
|
|
" <td>43</td>\n",
|
|
" <td>Private</td>\n",
|
|
" <td>260761</td>\n",
|
|
" <td>HS-grad</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>Married-civ-spouse</td>\n",
|
|
" <td>Machine-op-inspct</td>\n",
|
|
" <td>Husband</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Male</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>40</td>\n",
|
|
" <td>Mexico</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32550</th>\n",
|
|
" <td>43</td>\n",
|
|
" <td>State-gov</td>\n",
|
|
" <td>255835</td>\n",
|
|
" <td>Some-college</td>\n",
|
|
" <td>10</td>\n",
|
|
" <td>Divorced</td>\n",
|
|
" <td>Adm-clerical</td>\n",
|
|
" <td>Other-relative</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Female</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>40</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32551</th>\n",
|
|
" <td>43</td>\n",
|
|
" <td>Self-emp-not-inc</td>\n",
|
|
" <td>27242</td>\n",
|
|
" <td>Some-college</td>\n",
|
|
" <td>10</td>\n",
|
|
" <td>Married-civ-spouse</td>\n",
|
|
" <td>Craft-repair</td>\n",
|
|
" <td>Husband</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Male</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>50</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32552</th>\n",
|
|
" <td>32</td>\n",
|
|
" <td>Private</td>\n",
|
|
" <td>34066</td>\n",
|
|
" <td>10th</td>\n",
|
|
" <td>6</td>\n",
|
|
" <td>Married-civ-spouse</td>\n",
|
|
" <td>Handlers-cleaners</td>\n",
|
|
" <td>Husband</td>\n",
|
|
" <td>Amer-Indian-Eskimo</td>\n",
|
|
" <td>Male</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>40</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32553</th>\n",
|
|
" <td>43</td>\n",
|
|
" <td>Private</td>\n",
|
|
" <td>84661</td>\n",
|
|
" <td>Assoc-voc</td>\n",
|
|
" <td>11</td>\n",
|
|
" <td>Married-civ-spouse</td>\n",
|
|
" <td>Sales</td>\n",
|
|
" <td>Husband</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Male</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>45</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32554</th>\n",
|
|
" <td>32</td>\n",
|
|
" <td>Private</td>\n",
|
|
" <td>116138</td>\n",
|
|
" <td>Masters</td>\n",
|
|
" <td>14</td>\n",
|
|
" <td>Never-married</td>\n",
|
|
" <td>Tech-support</td>\n",
|
|
" <td>Not-in-family</td>\n",
|
|
" <td>Asian-Pac-Islander</td>\n",
|
|
" <td>Male</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>11</td>\n",
|
|
" <td>Taiwan</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32555</th>\n",
|
|
" <td>53</td>\n",
|
|
" <td>Private</td>\n",
|
|
" <td>321865</td>\n",
|
|
" <td>Masters</td>\n",
|
|
" <td>14</td>\n",
|
|
" <td>Married-civ-spouse</td>\n",
|
|
" <td>Exec-managerial</td>\n",
|
|
" <td>Husband</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Male</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>40</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32556</th>\n",
|
|
" <td>22</td>\n",
|
|
" <td>Private</td>\n",
|
|
" <td>310152</td>\n",
|
|
" <td>Some-college</td>\n",
|
|
" <td>10</td>\n",
|
|
" <td>Never-married</td>\n",
|
|
" <td>Protective-serv</td>\n",
|
|
" <td>Not-in-family</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Male</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>40</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32557</th>\n",
|
|
" <td>27</td>\n",
|
|
" <td>Private</td>\n",
|
|
" <td>257302</td>\n",
|
|
" <td>Assoc-acdm</td>\n",
|
|
" <td>12</td>\n",
|
|
" <td>Married-civ-spouse</td>\n",
|
|
" <td>Tech-support</td>\n",
|
|
" <td>Wife</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Female</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>38</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32558</th>\n",
|
|
" <td>40</td>\n",
|
|
" <td>Private</td>\n",
|
|
" <td>154374</td>\n",
|
|
" <td>HS-grad</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>Married-civ-spouse</td>\n",
|
|
" <td>Machine-op-inspct</td>\n",
|
|
" <td>Husband</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Male</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>40</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32559</th>\n",
|
|
" <td>58</td>\n",
|
|
" <td>Private</td>\n",
|
|
" <td>151910</td>\n",
|
|
" <td>HS-grad</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>Widowed</td>\n",
|
|
" <td>Adm-clerical</td>\n",
|
|
" <td>Unmarried</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Female</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>40</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>32560</th>\n",
|
|
" <td>22</td>\n",
|
|
" <td>Private</td>\n",
|
|
" <td>201490</td>\n",
|
|
" <td>HS-grad</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>Never-married</td>\n",
|
|
" <td>Adm-clerical</td>\n",
|
|
" <td>Own-child</td>\n",
|
|
" <td>White</td>\n",
|
|
" <td>Male</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>20</td>\n",
|
|
" <td>United-States</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" age workclass fnlwgt education education.num \\\n",
|
|
"32541 71 ? 287372 Doctorate 16 \n",
|
|
"32542 45 State-gov 252208 HS-grad 9 \n",
|
|
"32543 41 ? 202822 HS-grad 9 \n",
|
|
"32544 72 ? 129912 HS-grad 9 \n",
|
|
"32545 45 Local-gov 119199 Assoc-acdm 12 \n",
|
|
"32546 31 Private 199655 Masters 14 \n",
|
|
"32547 39 Local-gov 111499 Assoc-acdm 12 \n",
|
|
"32548 37 Private 198216 Assoc-acdm 12 \n",
|
|
"32549 43 Private 260761 HS-grad 9 \n",
|
|
"32550 43 State-gov 255835 Some-college 10 \n",
|
|
"32551 43 Self-emp-not-inc 27242 Some-college 10 \n",
|
|
"32552 32 Private 34066 10th 6 \n",
|
|
"32553 43 Private 84661 Assoc-voc 11 \n",
|
|
"32554 32 Private 116138 Masters 14 \n",
|
|
"32555 53 Private 321865 Masters 14 \n",
|
|
"32556 22 Private 310152 Some-college 10 \n",
|
|
"32557 27 Private 257302 Assoc-acdm 12 \n",
|
|
"32558 40 Private 154374 HS-grad 9 \n",
|
|
"32559 58 Private 151910 HS-grad 9 \n",
|
|
"32560 22 Private 201490 HS-grad 9 \n",
|
|
"\n",
|
|
" marital.status occupation relationship \\\n",
|
|
"32541 Married-civ-spouse ? Husband \n",
|
|
"32542 Separated Adm-clerical Own-child \n",
|
|
"32543 Separated ? Not-in-family \n",
|
|
"32544 Married-civ-spouse ? Husband \n",
|
|
"32545 Divorced Prof-specialty Unmarried \n",
|
|
"32546 Divorced Other-service Not-in-family \n",
|
|
"32547 Married-civ-spouse Adm-clerical Wife \n",
|
|
"32548 Divorced Tech-support Not-in-family \n",
|
|
"32549 Married-civ-spouse Machine-op-inspct Husband \n",
|
|
"32550 Divorced Adm-clerical Other-relative \n",
|
|
"32551 Married-civ-spouse Craft-repair Husband \n",
|
|
"32552 Married-civ-spouse Handlers-cleaners Husband \n",
|
|
"32553 Married-civ-spouse Sales Husband \n",
|
|
"32554 Never-married Tech-support Not-in-family \n",
|
|
"32555 Married-civ-spouse Exec-managerial Husband \n",
|
|
"32556 Never-married Protective-serv Not-in-family \n",
|
|
"32557 Married-civ-spouse Tech-support Wife \n",
|
|
"32558 Married-civ-spouse Machine-op-inspct Husband \n",
|
|
"32559 Widowed Adm-clerical Unmarried \n",
|
|
"32560 Never-married Adm-clerical Own-child \n",
|
|
"\n",
|
|
" race sex capital.gain capital.loss hours.per.week \\\n",
|
|
"32541 White Male 0 0 10 \n",
|
|
"32542 White Female 0 0 40 \n",
|
|
"32543 Black Female 0 0 32 \n",
|
|
"32544 White Male 0 0 25 \n",
|
|
"32545 White Female 0 0 48 \n",
|
|
"32546 Other Female 0 0 30 \n",
|
|
"32547 White Female 0 0 20 \n",
|
|
"32548 White Female 0 0 40 \n",
|
|
"32549 White Male 0 0 40 \n",
|
|
"32550 White Female 0 0 40 \n",
|
|
"32551 White Male 0 0 50 \n",
|
|
"32552 Amer-Indian-Eskimo Male 0 0 40 \n",
|
|
"32553 White Male 0 0 45 \n",
|
|
"32554 Asian-Pac-Islander Male 0 0 11 \n",
|
|
"32555 White Male 0 0 40 \n",
|
|
"32556 White Male 0 0 40 \n",
|
|
"32557 White Female 0 0 38 \n",
|
|
"32558 White Male 0 0 40 \n",
|
|
"32559 White Female 0 0 40 \n",
|
|
"32560 White Male 0 0 20 \n",
|
|
"\n",
|
|
" native.country \n",
|
|
"32541 United-States \n",
|
|
"32542 United-States \n",
|
|
"32543 United-States \n",
|
|
"32544 United-States \n",
|
|
"32545 United-States \n",
|
|
"32546 United-States \n",
|
|
"32547 United-States \n",
|
|
"32548 United-States \n",
|
|
"32549 Mexico \n",
|
|
"32550 United-States \n",
|
|
"32551 United-States \n",
|
|
"32552 United-States \n",
|
|
"32553 United-States \n",
|
|
"32554 Taiwan \n",
|
|
"32555 United-States \n",
|
|
"32556 United-States \n",
|
|
"32557 United-States \n",
|
|
"32558 United-States \n",
|
|
"32559 United-States \n",
|
|
"32560 United-States "
|
|
]
|
|
},
|
|
"execution_count": 34,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.pipeline import Pipeline\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"data = pd.read_csv('./Datasets/adult.csv', comment = '#')\n",
|
|
"\n",
|
|
"# Features\n",
|
|
"X = data.drop(columns=['income'])\n",
|
|
"\n",
|
|
"# Labels\n",
|
|
"y = data['income']\n",
|
|
"\n",
|
|
"\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
|
"X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)\n",
|
|
"\n",
|
|
"X.tail(20)\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.11"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|