Added preliminary analysis and decision tree model files.

This commit is contained in:
2025-10-21 13:38:30 +02:00
parent 6b51a45a76
commit 448eb25e6f
4 changed files with 514 additions and 7 deletions

364
Analysis.ipynb Normal file

File diff suppressed because one or more lines are too long

123
Decision_tree.ipynb Normal file

File diff suppressed because one or more lines are too long

BIN
decision_tree.pdf Normal file

Binary file not shown.

View File

@@ -2,20 +2,40 @@
"cells": [
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 1,
"id": "b6ea6c3b",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "'numpy.ndarray' object is not callable",
"ename": "InvalidIndexError",
"evalue": "(slice(None, None, None), 0)",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mTypeError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[25]\u001b[39m\u001b[32m, line 20\u001b[39m\n\u001b[32m 18\u001b[39m X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=\u001b[32m0.2\u001b[39m, random_state=\u001b[32m42\u001b[39m)\n\u001b[32m 19\u001b[39m X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=\u001b[32m0.2\u001b[39m, random_state=\u001b[32m42\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m20\u001b[39m X_train = \u001b[43mX_train\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 23\u001b[39m n_features = \u001b[32m10\u001b[39m\n\u001b[32m 24\u001b[39m fig=plt.figure( figsize=(\u001b[32m15\u001b[39m, \u001b[32m15\u001b[39m) )\n",
"\u001b[31mTypeError\u001b[39m: 'numpy.ndarray' object is not callable"
"\u001b[31mKeyError\u001b[39m Traceback (most recent call last)",
"\u001b[36mFile \u001b[39m\u001b[32m~/Documents/MLP/Projects/MLPproject/.venv/lib/python3.12/site-packages/pandas/core/indexes/base.py:3812\u001b[39m, in \u001b[36mIndex.get_loc\u001b[39m\u001b[34m(self, key)\u001b[39m\n\u001b[32m 3811\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m3812\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_engine\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 3813\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
"\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/index.pyx:167\u001b[39m, in \u001b[36mpandas._libs.index.IndexEngine.get_loc\u001b[39m\u001b[34m()\u001b[39m\n",
"\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/index.pyx:196\u001b[39m, in \u001b[36mpandas._libs.index.IndexEngine.get_loc\u001b[39m\u001b[34m()\u001b[39m\n",
"\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/hashtable_class_helper.pxi:7088\u001b[39m, in \u001b[36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[39m\u001b[34m()\u001b[39m\n",
"\u001b[36mFile \u001b[39m\u001b[32mpandas/_libs/hashtable_class_helper.pxi:7096\u001b[39m, in \u001b[36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[39m\u001b[34m()\u001b[39m\n",
"\u001b[31mKeyError\u001b[39m: (slice(None, None, None), 0)",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[31mInvalidIndexError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 39\u001b[39m\n\u001b[32m 37\u001b[39m ax = fig.add_subplot(n_features, n_features, plt_num)\n\u001b[32m 38\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m(i == j):\n\u001b[32m---> \u001b[39m\u001b[32m39\u001b[39m ax.hist(\u001b[43mX_train\u001b[49m\u001b[43m[\u001b[49m\u001b[43m:\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m, bins=\u001b[32m25\u001b[39m, color=\u001b[33m'\u001b[39m\u001b[33mgray\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m 40\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 41\u001b[39m ax.scatter(X_train[:, j], X_train[:, i], c=np.array(colors)[y_train], s=\u001b[32m30\u001b[39m, alpha=\u001b[32m0.3\u001b[39m)\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/Documents/MLP/Projects/MLPproject/.venv/lib/python3.12/site-packages/pandas/core/frame.py:4113\u001b[39m, in \u001b[36mDataFrame.__getitem__\u001b[39m\u001b[34m(self, key)\u001b[39m\n\u001b[32m 4111\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.columns.nlevels > \u001b[32m1\u001b[39m:\n\u001b[32m 4112\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._getitem_multilevel(key)\n\u001b[32m-> \u001b[39m\u001b[32m4113\u001b[39m indexer = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4114\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[32m 4115\u001b[39m indexer = [indexer]\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/Documents/MLP/Projects/MLPproject/.venv/lib/python3.12/site-packages/pandas/core/indexes/base.py:3818\u001b[39m, in \u001b[36mIndex.get_loc\u001b[39m\u001b[34m(self, key)\u001b[39m\n\u001b[32m 3813\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[32m 3814\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[32m 3815\u001b[39m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc.Iterable)\n\u001b[32m 3816\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[32m 3817\u001b[39m ):\n\u001b[32m-> \u001b[39m\u001b[32m3818\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[32m 3819\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01merr\u001b[39;00m\n\u001b[32m 3820\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[32m 3821\u001b[39m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[32m 3822\u001b[39m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[32m 3823\u001b[39m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n",
"\u001b[31mInvalidIndexError\u001b[39m: (slice(None, None, None), 0)"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAKEAAACTCAYAAADm43kQAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjcsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvTLEjVAAAAAlwSFlzAAAPYQAAD2EBqD+naQAACbdJREFUeJzt3UtIVG0YB/D/aM5MQTNd8VKjEdKVyC5YupFAEArJVfYtcoi0gjY2kCVFYi2EigjEqI3OokUXsFoUSkQRpBGUgmktrFCDxq6eycgJ9PkW4XzfpFOeacZHp/8PzmJO7znv+3b+HOeceeGxiIiASFGC9gCIGEJSxxCSOoaQ1DGEpI4hJHUMIaljCEkdQ0jqGEJSZzqEDx48QGFhIdLS0mCxWHDjxo3fHnP//n2sX78eNpsNmZmZ8Hq9EQyV4pXpEH79+hVr165FXV3dhNq/fv0a27Ztw5YtW9De3o7y8nKUlpaiubnZ9GApPln+ZAGDxWLB9evXUVRUFLbN4cOHcevWLTx79iy4b+fOnRgYGEBTU1OkXVMcmRHrDlpbW5Gfnx+yr6CgAOXl5WGPCQQCCAQCwc8jIyP49OkT5s+fD4vFEquh0m+ICL58+YK0tDQkJETvcSLmIfT5fEhOTg7Zl5ycDL/fj2/fvmHmzJljjqmpqUF1dXWsh0YR6uvrw+LFi6N2vpiHMBKVlZXweDzBz4ZhID09HX19fXA4HIoj+7v5/X64XC7Mnj07queNeQhTUlLQ398fsq+/vx8Oh2PcuyAA2Gw22Gy2MfsdDgdDOAVE+ytRzN8T5uTk4O7duyH77ty5g5ycnFh3TdOE6RAODg6ivb0d7e3tAH68gmlvb0dvby+AH39KS0pKgu3379+PV69eoaKiAi9evMD58+dx9epVHDx4MDozoOlPTLp3754AGLO53W4REXG73ZKXlzfmmKysLLFarbJ06VJpaGgw1adhGAJADMMwO1yKolhdhz96TzhZ/H4/nE4nDMPgd0JFsboO/O2Y1DGEpI4hJHUMIaljCEkdQ0jqGEJSxxCSOoaQ1DGEpI4hJHUMIaljCEkdQ0jqGEJSxxCSOoaQ1DGEpI4hJHUMIaljCEkdQ0jqGEJSxxCSOoaQ1DGEpI4hJHUMIaljCEkdQ0jqGEJSxxCSOoaQ1DGEpI4hJHUMIaljCEkdQ0jqIgphXV0dlixZArvdjk2bNuHx48dh23q9XlgslpDNbrdHPGCKP6ZDeOXKFXg8HlRVVeHp06dYu3YtCgoK8O7du7DHOBwOvH37Nrj19PT80aApvpgO4dmzZ1FWVobdu3dj1apVuHDhAmbNmoX6+vqwx1gsFqSkpAS3n0vP0t/NVAi/f/+OJ0+ehBTRTkhIQH5+PlpbW8MeNzg4iIyMDLhcLmzfvh2dnZ2/7CcQCMDv94dsFL9MhfDDhw8YHh4et4i2z+cb95jly5ejvr4eN2/exKVLlzAyMoLc3Fy8efMmbD81NTVwOp3BzeVymRkmTTOTUmq2pKQEWVlZyMvLQ2NjIxYuXIiLFy+GPaayshKGYQS3vr6+WA+TFJkqur1gwQIkJiaOW0Q7JSVlQudISkrCunXr0N3dHbZNuKLbFJ9M3QmtVis2bNgQUkR7ZGQEd+/enXAR7eHhYXR0dCA1NdXcSClumboTAoDH44Hb7cbGjRuRnZ2Nc+fO4evXr9i9ezcAoKSkBIsWLUJNTQ0A4MSJE9i8eTMyMzMxMDCA06dPo6enB6WlpdGdCU1bpkNYXFyM9+/f4/jx4/D5fMjKykJTU1PwYaW3txcJCf/dYD9//oyysjL4fD7MnTsXGzZsQEtLC1atWhW9WdC0xqLbNGEsuk1xiyEkdQwhqWMISR1DSOoYQlLHEJI6hpDUMYSkjiEkdQwhqWMISR1DSOoYQlLHEJI6hpDUMYSkjiEkdQwhqWMISR1DSOoYQlLHEJI6hpDUMYSkjiEkdQwhqWMISR1DSOoYQlLHEJI6hpDUMYSkjiEkdQwhqWMISR1DSOoYQlIX86LbAHDt2jWsWLECdrsda9aswe3btyMaLMWnmBfdbmlpwT///IM9e/agra0NRUVFKCoqwrNnz/548BQnxKTs7Gw5cOBA8PPw8LCkpaVJTU3NuO137Ngh27ZtC9m3adMm2bdv34T7NAxDAIhhGGaHS1EUq+tgqqzYaNHtysrK4L7fFd1ubW2Fx+MJ2VdQUIAbN26E7ScQCCAQCAQ/G4YBACy+rWz0/1+iXATMVAh/VXT7xYsX4x7j8/lMFekGfhTdrq6uHrOfxbenho8fP8LpdEbtfKYLLE6GysrKkLvnwMAAMjIy0NvbG9XJTya/3w+Xy4W+vr5pW5/PMAykp6dj3rx5UT1vzItup6SkmC7SHa7ottPpnLYXcJTD4Zj2c/h/FdeonM9M40iKbufk5IS0B4A7d+5MuEg3/QXMPslcvnxZbDabeL1e6erqkr1798qcOXPE5/OJiMiuXbvkyJEjwfYPHz6UGTNmyJkzZ+T58+dSVVUlSUlJ0tHRMeE+4+HpmHMIz3QIRURqa2slPT1drFarZGdny6NHj4L/lpeXJ263O6T91atXZdmyZWK1WmX16tVy69YtU/0NDQ1JVVWVDA0NRTLcKYFzCG9aFN2m+MbfjkkdQ0jqGEJSxxCSuikTwnhYHmZmDl6vFxaLJWSz2+2TONqxHjx4gMLCQqSlpcFisfzy9/1R9+/fx/r162Gz2ZCZmQmv12u+46g+a0fo8uXLYrVapb6+Xjo7O6WsrEzmzJkj/f3947Z/+PChJCYmyqlTp6Srq0uOHTtm+t1jtJmdQ0NDgzgcDnn79m1wG33XquX27dty9OhRaWxsFABy/fr1X7Z/9eqVzJo1Szwej3R1dUltba0kJiZKU1OTqX6nRAg1lodFm9k5NDQ0iNPpnKTRmTeREFZUVMjq1atD9hUXF0tBQYGpvtT/HI8uD8vPzw/um8jysP+3B34sDwvXPtYimQMADA4OIiMjAy6XC9u3b0dnZ+dkDDdqonUd1EP4q+Vh4ZZ7RbI8LJYimcPy5ctRX1+Pmzdv4tKlSxgZGUFubi7evHkzGUOOinDXwe/349u3bxM+z5RcyvU3yMnJCVnEkZubi5UrV+LixYs4efKk4sgmn/qdcLKWh8VSJHP4WVJSEtatW4fu7u5YDDEmwl0Hh8OBmTNnTvg86iGMh+VhkczhZ8PDw+jo6EBqamqshhl1UbsOZp+aYkFjeVi0mZ1DdXW1NDc3y8uXL+XJkyeyc+dOsdvt0tnZqTUF+fLli7S1tUlbW5sAkLNnz0pbW5v09PSIiMiRI0dk165dwfajr2gOHTokz58/l7q6uun7ikZk8peHxYKZOZSXlwfbJicny9atW+Xp06cKo/7PvXv3BMCYbXTcbrdb8vLyxhyTlZUlVqtVli5dKg0NDab75VIuUqf+nZCIISR1DCGpYwhJHUNI6hhCUscQkjqGkNQxhKSOISR1DCGpYwhJ3b+8OUfiN/PNKAAAAABJRU5ErkJggg==",
"text/plain": [
"<Figure size 1500x1500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
@@ -88,7 +108,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
"version": "3.12.12"
}
},
"nbformat": 4,