{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "b6ea6c3b", "metadata": {}, "outputs": [ { "ename": "InvalidIndexError", "evalue": "(slice(None, None, None), 0)", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mKeyError\u001b[39m Traceback (most recent call last)", "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/MLP/Projects/MLPproject/.venv/lib/python3.12/site-packages/pandas/core/indexes/base.py:3812\u001b[39m, in \u001b[36mIndex.get_loc\u001b[39m\u001b[34m(self, key)\u001b[39m\n\u001b[32m 3811\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m3812\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_engine\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 3813\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", "\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/index.pyx:167\u001b[39m, in \u001b[36mpandas._libs.index.IndexEngine.get_loc\u001b[39m\u001b[34m()\u001b[39m\n", "\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/index.pyx:196\u001b[39m, in \u001b[36mpandas._libs.index.IndexEngine.get_loc\u001b[39m\u001b[34m()\u001b[39m\n", "\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/hashtable_class_helper.pxi:7088\u001b[39m, in \u001b[36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[39m\u001b[34m()\u001b[39m\n", "\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/hashtable_class_helper.pxi:7096\u001b[39m, in \u001b[36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[39m\u001b[34m()\u001b[39m\n", "\u001b[31mKeyError\u001b[39m: (slice(None, None, None), 0)", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[31mInvalidIndexError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 39\u001b[39m\n\u001b[32m 37\u001b[39m ax = fig.add_subplot(n_features, n_features, plt_num)\n\u001b[32m 38\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m(i == j):\n\u001b[32m---> \u001b[39m\u001b[32m39\u001b[39m ax.hist(\u001b[43mX_train\u001b[49m\u001b[43m[\u001b[49m\u001b[43m:\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m, bins=\u001b[32m25\u001b[39m, color=\u001b[33m'\u001b[39m\u001b[33mgray\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m 40\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 41\u001b[39m ax.scatter(X_train[:, j], X_train[:, i], c=np.array(colors)[y_train], s=\u001b[32m30\u001b[39m, alpha=\u001b[32m0.3\u001b[39m)\n", "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/MLP/Projects/MLPproject/.venv/lib/python3.12/site-packages/pandas/core/frame.py:4113\u001b[39m, in \u001b[36mDataFrame.__getitem__\u001b[39m\u001b[34m(self, key)\u001b[39m\n\u001b[32m 4111\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.columns.nlevels > \u001b[32m1\u001b[39m:\n\u001b[32m 4112\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._getitem_multilevel(key)\n\u001b[32m-> \u001b[39m\u001b[32m4113\u001b[39m indexer = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4114\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[32m 4115\u001b[39m indexer = [indexer]\n", "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/MLP/Projects/MLPproject/.venv/lib/python3.12/site-packages/pandas/core/indexes/base.py:3818\u001b[39m, in \u001b[36mIndex.get_loc\u001b[39m\u001b[34m(self, key)\u001b[39m\n\u001b[32m 3813\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[32m 3814\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[32m 3815\u001b[39m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc.Iterable)\n\u001b[32m 3816\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[32m 3817\u001b[39m ):\n\u001b[32m-> \u001b[39m\u001b[32m3818\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[32m 3819\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01merr\u001b[39;00m\n\u001b[32m 3820\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[32m 3821\u001b[39m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[32m 3822\u001b[39m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[32m 3823\u001b[39m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n", "\u001b[31mInvalidIndexError\u001b[39m: (slice(None, None, None), 0)" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAKEAAACTCAYAAADm43kQAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAACbdJREFUeJzt3UtIVG0YB/D/aM5MQTNd8VKjEdKVyC5YupFAEArJVfYtcoi0gjY2kCVFYi2EigjEqI3OokUXsFoUSkQRpBGUgmktrFCDxq6eycgJ9PkW4XzfpFOeacZHp/8PzmJO7znv+3b+HOeceeGxiIiASFGC9gCIGEJSxxCSOoaQ1DGEpI4hJHUMIaljCEkdQ0jqGEJSZzqEDx48QGFhIdLS0mCxWHDjxo3fHnP//n2sX78eNpsNmZmZ8Hq9EQyV4pXpEH79+hVr165FXV3dhNq/fv0a27Ztw5YtW9De3o7y8nKUlpaiubnZ9GApPln+ZAGDxWLB9evXUVRUFLbN4cOHcevWLTx79iy4b+fOnRgYGEBTU1OkXVMcmRHrDlpbW5Gfnx+yr6CgAOXl5WGPCQQCCAQCwc8jIyP49OkT5s+fD4vFEquh0m+ICL58+YK0tDQkJETvcSLmIfT5fEhOTg7Zl5ycDL/fj2/fvmHmzJljjqmpqUF1dXWsh0YR6uvrw+LFi6N2vpiHMBKVlZXweDzBz4ZhID09HX19fXA4HIoj+7v5/X64XC7Mnj07queNeQhTUlLQ398fsq+/vx8Oh2PcuyAA2Gw22Gy2MfsdDgdDOAVE+ytRzN8T5uTk4O7duyH77ty5g5ycnFh3TdOE6RAODg6ivb0d7e3tAH68gmlvb0dvby+AH39KS0pKgu3379+PV69eoaKiAi9evMD58+dx9epVHDx4MDozoOlPTLp3754AGLO53W4REXG73ZKXlzfmmKysLLFarbJ06VJpaGgw1adhGAJADMMwO1yKolhdhz96TzhZ/H4/nE4nDMPgd0JFsboO/O2Y1DGEpI4hJHUMIaljCEkdQ0jqGEJSxxCSOoaQ1DGEpI4hJHUMIaljCEkdQ0jqGEJSxxCSOoaQ1DGEpI4hJHUMIaljCEkdQ0jqGEJSxxCSOoaQ1DGEpI4hJHUMIaljCEkdQ0jqGEJSxxCSOoaQ1DGEpI4hJHUMIaljCEkdQ0jqIgphXV0dlixZArvdjk2bNuHx48dh23q9XlgslpDNbrdHPGCKP6ZDeOXKFXg8HlRVVeHp06dYu3YtCgoK8O7du7DHOBwOvH37Nrj19PT80aApvpgO4dmzZ1FWVobdu3dj1apVuHDhAmbNmoX6+vqwx1gsFqSkpAS3n0vP0t/NVAi/f/+OJ0+ehBTRTkhIQH5+PlpbW8MeNzg4iIyMDLhcLmzfvh2dnZ2/7CcQCMDv94dsFL9MhfDDhw8YHh4et4i2z+cb95jly5ejvr4eN2/exKVLlzAyMoLc3Fy8efMmbD81NTVwOp3BzeVymRkmTTOTUmq2pKQEWVlZyMvLQ2NjIxYuXIiLFy+GPaayshKGYQS3vr6+WA+TFJkqur1gwQIkJiaOW0Q7JSVlQudISkrCunXr0N3dHbZNuKLbFJ9M3QmtVis2bNgQUkR7ZGQEd+/enXAR7eHhYXR0dCA1NdXcSClumboTAoDH44Hb7cbGjRuRnZ2Nc+fO4evXr9i9ezcAoKSkBIsWLUJNTQ0A4MSJE9i8eTMyMzMxMDCA06dPo6enB6WlpdGdCU1bpkNYXFyM9+/f4/jx4/D5fMjKykJTU1PwYaW3txcJCf/dYD9//oyysjL4fD7MnTsXGzZsQEtLC1atWhW9WdC0xqLbNGEsuk1xiyEkdQwhqWMISR1DSOoYQlLHEJI6hpDUMYSkjiEkdQwhqWMISR1DSOoYQlLHEJI6hpDUMYSkjiEkdQwhqWMISR1DSOoYQlLHEJI6hpDUMYSkjiEkdQwhqWMISR1DSOoYQlLHEJI6hpDUMYSkjiEkdQwhqWMISR1DSOoYQlIX86LbAHDt2jWsWLECdrsda9aswe3btyMaLMWnmBfdbmlpwT///IM9e/agra0NRUVFKCoqwrNnz/548BQnxKTs7Gw5cOBA8PPw8LCkpaVJTU3NuO137Ngh27ZtC9m3adMm2bdv34T7NAxDAIhhGGaHS1EUq+tgqqzYaNHtysrK4L7fFd1ubW2Fx+MJ2VdQUIAbN26E7ScQCCAQCAQ/G4YBACy+rWz0/1+iXATMVAh/VXT7xYsX4x7j8/lMFekGfhTdrq6uHrOfxbenho8fP8LpdEbtfKYLLE6GysrKkLvnwMAAMjIy0NvbG9XJTya/3w+Xy4W+vr5pW5/PMAykp6dj3rx5UT1vzItup6SkmC7SHa7ottPpnLYXcJTD4Zj2c/h/FdeonM9M40iKbufk5IS0B4A7d+5MuEg3/QXMPslcvnxZbDabeL1e6erqkr1798qcOXPE5/OJiMiuXbvkyJEjwfYPHz6UGTNmyJkzZ+T58+dSVVUlSUlJ0tHRMeE+4+HpmHMIz3QIRURqa2slPT1drFarZGdny6NHj4L/lpeXJ263O6T91atXZdmyZWK1WmX16tVy69YtU/0NDQ1JVVWVDA0NRTLcKYFzCG9aFN2m+MbfjkkdQ0jqGEJSxxCSuikTwnhYHmZmDl6vFxaLJWSz2+2TONqxHjx4gMLCQqSlpcFisfzy9/1R9+/fx/r162Gz2ZCZmQmv12u+46g+a0fo8uXLYrVapb6+Xjo7O6WsrEzmzJkj/f3947Z/+PChJCYmyqlTp6Srq0uOHTtm+t1jtJmdQ0NDgzgcDnn79m1wG33XquX27dty9OhRaWxsFABy/fr1X7Z/9eqVzJo1Szwej3R1dUltba0kJiZKU1OTqX6nRAg1lodFm9k5NDQ0iNPpnKTRmTeREFZUVMjq1atD9hUXF0tBQYGpvtT/HI8uD8vPzw/um8jysP+3B34sDwvXPtYimQMADA4OIiMjAy6XC9u3b0dnZ+dkDDdqonUd1EP4q+Vh4ZZ7RbI8LJYimcPy5ctRX1+Pmzdv4tKlSxgZGUFubi7evHkzGUOOinDXwe/349u3bxM+z5RcyvU3yMnJCVnEkZubi5UrV+LixYs4efKk4sgmn/qdcLKWh8VSJHP4WVJSEtatW4fu7u5YDDEmwl0Hh8OBmTNnTvg86iGMh+VhkczhZ8PDw+jo6EBqamqshhl1UbsOZp+aYkFjeVi0mZ1DdXW1NDc3y8uXL+XJkyeyc+dOsdvt0tnZqTUF+fLli7S1tUlbW5sAkLNnz0pbW5v09PSIiMiRI0dk165dwfajr2gOHTokz58/l7q6uun7ikZk8peHxYKZOZSXlwfbJicny9atW+Xp06cKo/7PvXv3BMCYbXTcbrdb8vLyxhyTlZUlVqtVli5dKg0NDab75VIuUqf+nZCIISR1DCGpYwhJHUNI6hhCUscQkjqGkNQxhKSOISR1DCGpYwhJ3b+8OUfiN/PNKAAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.tree import DecisionTreeClassifier, plot_tree\n", "from sklearn.preprocessing import LabelEncoder\n", "\n", "df = pd.read_csv('./Datasets/adult.csv', comment = '#')\n", "\n", "# First, create a copy of the dataframe to avoid modifying the original\n", "df_encoded = df.copy()\n", "\n", "# Apply label encoding to categorical columns\n", "label_encoder = LabelEncoder()\n", "categorical_columns = ['workclass', 'marital.status', 'occupation', \n", " 'relationship', 'race', 'sex', 'income']\n", "\n", "for column in categorical_columns:\n", " df_encoded[column] = label_encoder.fit_transform(df_encoded[column])\n", "\n", "# Now properly separate features and target\n", "X = df_encoded.drop(columns=['income', 'native.country', 'education'])\n", "y = df_encoded['income']\n", "\n", "# Split the data\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)\n", "\n", "\n", "\n", "n_features = 10\n", "fig=plt.figure( figsize=(15, 15) )\n", "plt_num = 1\n", "for i in range(n_features):\n", " for j in range(n_features):\n", " ax = fig.add_subplot(n_features, n_features, plt_num)\n", " if(i == j):\n", " ax.hist(X_train[:, i], bins=25, color='gray')\n", " else:\n", " ax.scatter(X_train[:, j], X_train[:, i], c=np.array(colors)[y_train], s=30, alpha=0.3)\n", " \n", " if(i == n_features-1):\n", " ax.set_xlabel(f'$x_{{{j}}}$', fontsize=22)\n", " \n", " if(j==0):\n", " ax.set_ylabel(f'$x_{{{i}}}$', fontsize=22)\n", "\n", " ax.grid(True)\n", " plt_num +=1\n" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.12" } }, "nbformat": 4, "nbformat_minor": 5 }