Compare commits

...

2 Commits

Author SHA1 Message Date
6b51a45a76 Merge branch 'main' of https://gitea.jany.se/Jany/MLPproject 2025-10-21 09:59:59 +02:00
abd4cc5959 nothing 2025-10-21 09:54:12 +02:00

View File

@@ -2,72 +2,19 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 34,
"id": "b6ea6c3b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" age workclass fnlwgt education.num marital.status \\\n",
"0 90 ? 77053 9 Widowed \n",
"1 82 Private 132870 9 Widowed \n",
"2 66 ? 186061 10 Widowed \n",
"3 54 Private 140359 4 Divorced \n",
"4 41 Private 264663 10 Separated \n",
"... ... ... ... ... ... \n",
"32556 22 Private 310152 10 Never-married \n",
"32557 27 Private 257302 12 Married-civ-spouse \n",
"32558 40 Private 154374 9 Married-civ-spouse \n",
"32559 58 Private 151910 9 Widowed \n",
"32560 22 Private 201490 9 Never-married \n",
"\n",
" occupation relationship race sex capital.gain \\\n",
"0 ? Not-in-family White Female 0 \n",
"1 Exec-managerial Not-in-family White Female 0 \n",
"2 ? Unmarried Black Female 0 \n",
"3 Machine-op-inspct Unmarried White Female 0 \n",
"4 Prof-specialty Own-child White Female 0 \n",
"... ... ... ... ... ... \n",
"32556 Protective-serv Not-in-family White Male 0 \n",
"32557 Tech-support Wife White Female 0 \n",
"32558 Machine-op-inspct Husband White Male 0 \n",
"32559 Adm-clerical Unmarried White Female 0 \n",
"32560 Adm-clerical Own-child White Male 0 \n",
"\n",
" capital.loss hours.per.week native.country income \n",
"0 4356 40 United-States <=50K \n",
"1 4356 18 United-States <=50K \n",
"2 4356 40 United-States <=50K \n",
"3 3900 40 United-States <=50K \n",
"4 3900 40 United-States <=50K \n",
"... ... ... ... ... \n",
"32556 0 40 United-States <=50K \n",
"32557 0 38 United-States <=50K \n",
"32558 0 40 United-States >50K \n",
"32559 0 40 United-States <=50K \n",
"32560 0 20 United-States <=50K \n",
"\n",
"[32561 rows x 14 columns]\n"
]
},
{
"ename": "ValueError",
"evalue": "could not convert string to float: '?'",
"ename": "TypeError",
"evalue": "'numpy.ndarray' object is not callable",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mValueError\u001b[39m Traceback (most recent call last)",
"\u001b[32m/tmp/ipykernel_3669/1374041598.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m 37\u001b[39m print(X)\n\u001b[32m 38\u001b[39m \n\u001b[32m 39\u001b[39m \u001b[38;5;66;03m# Create and train the decision tree classifier\u001b[39;00m\n\u001b[32m 40\u001b[39m clf = DecisionTreeClassifier(random_state=\u001b[32m42\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m41\u001b[39m clf.fit(X, y)\n\u001b[32m 42\u001b[39m \n\u001b[32m 43\u001b[39m \u001b[38;5;66;03m# Visualize the tree\u001b[39;00m\n\u001b[32m 44\u001b[39m plt.figure(figsize=(\u001b[32m20\u001b[39m,\u001b[32m10\u001b[39m))\n",
"\u001b[32m~/Documents/MLPproject/.venv/lib/python3.12/site-packages/sklearn/base.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m(estimator, *args, **kwargs)\u001b[39m\n\u001b[32m 1361\u001b[39m skip_parameter_validation=(\n\u001b[32m 1362\u001b[39m prefer_skip_nested_validation \u001b[38;5;28;01mor\u001b[39;00m global_skip_validation\n\u001b[32m 1363\u001b[39m )\n\u001b[32m 1364\u001b[39m ):\n\u001b[32m-> \u001b[39m\u001b[32m1365\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m fit_method(estimator, *args, **kwargs)\n",
"\u001b[32m~/Documents/MLPproject/.venv/lib/python3.12/site-packages/sklearn/tree/_classes.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m(self, X, y, sample_weight, check_input)\u001b[39m\n\u001b[32m 1020\u001b[39m self : DecisionTreeClassifier\n\u001b[32m 1021\u001b[39m Fitted estimator.\n\u001b[32m 1022\u001b[39m \"\"\"\n\u001b[32m 1023\u001b[39m \n\u001b[32m-> \u001b[39m\u001b[32m1024\u001b[39m super()._fit(\n\u001b[32m 1025\u001b[39m X,\n\u001b[32m 1026\u001b[39m y,\n\u001b[32m 1027\u001b[39m sample_weight=sample_weight,\n",
"\u001b[32m~/Documents/MLPproject/.venv/lib/python3.12/site-packages/sklearn/tree/_classes.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m(self, X, y, sample_weight, check_input, missing_values_in_feature_mask)\u001b[39m\n\u001b[32m 248\u001b[39m check_X_params = dict(\n\u001b[32m 249\u001b[39m dtype=DTYPE, accept_sparse=\u001b[33m\"csc\"\u001b[39m, ensure_all_finite=\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[32m 250\u001b[39m )\n\u001b[32m 251\u001b[39m check_y_params = dict(ensure_2d=\u001b[38;5;28;01mFalse\u001b[39;00m, dtype=\u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[32m--> \u001b[39m\u001b[32m252\u001b[39m X, y = validate_data(\n\u001b[32m 253\u001b[39m self, X, y, validate_separately=(check_X_params, check_y_params)\n\u001b[32m 254\u001b[39m )\n\u001b[32m 255\u001b[39m \n",
"\u001b[32m~/Documents/MLPproject/.venv/lib/python3.12/site-packages/sklearn/utils/validation.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m(_estimator, X, y, reset, validate_separately, skip_check_array, **check_params)\u001b[39m\n\u001b[32m 2962\u001b[39m \u001b[38;5;66;03m# :(\u001b[39;00m\n\u001b[32m 2963\u001b[39m check_X_params, check_y_params = validate_separately\n\u001b[32m 2964\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"estimator\"\u001b[39m \u001b[38;5;28;01mnot\u001b[39;00m \u001b[38;5;28;01min\u001b[39;00m check_X_params:\n\u001b[32m 2965\u001b[39m check_X_params = {**default_check_params, **check_X_params}\n\u001b[32m-> \u001b[39m\u001b[32m2966\u001b[39m X = check_array(X, input_name=\u001b[33m\"X\"\u001b[39m, **check_X_params)\n\u001b[32m 2967\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"estimator\"\u001b[39m \u001b[38;5;28;01mnot\u001b[39;00m \u001b[38;5;28;01min\u001b[39;00m check_y_params:\n\u001b[32m 2968\u001b[39m check_y_params = {**default_check_params, **check_y_params}\n\u001b[32m 2969\u001b[39m y = check_array(y, input_name=\u001b[33m\"y\"\u001b[39m, **check_y_params)\n",
"\u001b[32m~/Documents/MLPproject/.venv/lib/python3.12/site-packages/sklearn/utils/validation.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_all_finite, ensure_non_negative, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[39m\n\u001b[32m 1050\u001b[39m )\n\u001b[32m 1051\u001b[39m array = xp.astype(array, dtype, copy=\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[32m 1052\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1053\u001b[39m array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)\n\u001b[32m-> \u001b[39m\u001b[32m1054\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m ComplexWarning \u001b[38;5;28;01mas\u001b[39;00m complex_warning:\n\u001b[32m 1055\u001b[39m raise ValueError(\n\u001b[32m 1056\u001b[39m \u001b[33m\"Complex data not supported\\n{}\\n\"\u001b[39m.format(array)\n\u001b[32m 1057\u001b[39m ) \u001b[38;5;28;01mfrom\u001b[39;00m complex_warning\n",
"\u001b[32m~/Documents/MLPproject/.venv/lib/python3.12/site-packages/sklearn/utils/_array_api.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m(array, dtype, order, copy, xp, device)\u001b[39m\n\u001b[32m 753\u001b[39m \u001b[38;5;66;03m# Use NumPy API to support order\u001b[39;00m\n\u001b[32m 754\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m copy \u001b[38;5;28;01mis\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m 755\u001b[39m array = numpy.array(array, order=order, dtype=dtype)\n\u001b[32m 756\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m757\u001b[39m array = numpy.asarray(array, order=order, dtype=dtype)\n\u001b[32m 758\u001b[39m \n\u001b[32m 759\u001b[39m \u001b[38;5;66;03m# At this point array is a NumPy ndarray. We convert it to an array\u001b[39;00m\n\u001b[32m 760\u001b[39m \u001b[38;5;66;03m# container that is consistent with the input's namespace.\u001b[39;00m\n",
"\u001b[32m~/Documents/MLPproject/.venv/lib/python3.12/site-packages/pandas/core/generic.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m(self, dtype, copy)\u001b[39m\n\u001b[32m 2167\u001b[39m )\n\u001b[32m 2168\u001b[39m values = self._values\n\u001b[32m 2169\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m copy \u001b[38;5;28;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 2170\u001b[39m \u001b[38;5;66;03m# Note: branch avoids `copy=None` for NumPy 1.x support\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m2171\u001b[39m arr = np.asarray(values, dtype=dtype)\n\u001b[32m 2172\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 2173\u001b[39m arr = np.array(values, dtype=dtype, copy=copy)\n\u001b[32m 2174\u001b[39m \n",
"\u001b[31mValueError\u001b[39m: could not convert string to float: '?'"
"\u001b[31mTypeError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[25]\u001b[39m\u001b[32m, line 20\u001b[39m\n\u001b[32m 18\u001b[39m X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=\u001b[32m0.2\u001b[39m, random_state=\u001b[32m42\u001b[39m)\n\u001b[32m 19\u001b[39m X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=\u001b[32m0.2\u001b[39m, random_state=\u001b[32m42\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m20\u001b[39m X_train = \u001b[43mX_train\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 23\u001b[39m n_features = \u001b[32m10\u001b[39m\n\u001b[32m 24\u001b[39m fig=plt.figure( figsize=(\u001b[32m15\u001b[39m, \u001b[32m15\u001b[39m) )\n",
"\u001b[31mTypeError\u001b[39m: 'numpy.ndarray' object is not callable"
]
}
],
@@ -101,32 +48,27 @@
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)\n",
"\n",
"print(X)\n",
"\n",
"# Create and train the decision tree classifier\n",
"clf = DecisionTreeClassifier(random_state=42)\n",
"clf.fit(X, y)\n",
"\n",
"# Visualize the tree\n",
"plt.figure(figsize=(20,10))\n",
"plot_tree(clf, feature_names=X.columns, class_names=['No', 'Yes'], \n",
" filled=True, rounded=True, fontsize=18)\n",
"plt.title(\"Rod Breaking Decision Tree\",fontsize=30)\n",
"plt.show()\n",
"n_features = 10\n",
"fig=plt.figure( figsize=(15, 15) )\n",
"plt_num = 1\n",
"for i in range(n_features):\n",
" for j in range(n_features):\n",
" ax = fig.add_subplot(n_features, n_features, plt_num)\n",
" if(i == j):\n",
" ax.hist(X_train[:, i], bins=25, color='gray')\n",
" else:\n",
" ax.scatter(X_train[:, j], X_train[:, i], c=np.array(colors)[y_train], s=30, alpha=0.3)\n",
" \n",
" if(i == n_features-1):\n",
" ax.set_xlabel(f'$x_{{{j}}}$', fontsize=22)\n",
" \n",
" if(j==0):\n",
" ax.set_ylabel(f'$x_{{{i}}}$', fontsize=22)\n",
"\n",
"# Print feature importances\n",
"importances = pd.DataFrame({\n",
" 'feature': X.columns,\n",
" 'importance': clf.feature_importances_\n",
"})\n",
"print(\"\\nFeature Importances:\")\n",
"print(importances.sort_values('importance', ascending=False))\n",
"\n",
"# Example prediction\n",
"print(\"\\nPrediction for a new sample:\")\n",
"new_sample = pd.DataFrame([[350, 0, 0, 0.09]], columns=X.columns) # Force=300, Coating=Yes, Defects=No, Moisture=0.05\n",
"prediction = clf.predict(new_sample)\n",
"print(f\"Will the rod break? {'Yes' if prediction[0] == 1 else 'No'}\")"
" ax.grid(True)\n",
" plt_num +=1\n"
]
}
],