Merge branch 'main' of https://gitea.jany.se/Jany/MLPproject

2025-10-22 10:47:20 +02:00
parent 94c7df0712 2ff73d660b
commit b53c1dd1c6
1 changed files with 74 additions and 0 deletions
--- a/info.ipynb
+++ b/info.ipynb
@@ -10,6 +10,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
 <<<<<<< HEAD
      "       age  workclass  education.num  marital.status  occupation  \\\n",
      "0       90          0              9               6           0   \n",
      "1       82          4              9               6           4   \n",
@@ -67,6 +68,66 @@
      "6             race    0.023650\n",
      "3   marital.status    0.011255\n",
      "7              sex    0.006286\n"
 =======
      "       age workclass  fnlwgt  education.num      marital.status  \\\n",
      "0       90         ?   77053              9             Widowed   \n",
      "1       82   Private  132870              9             Widowed   \n",
      "2       66         ?  186061             10             Widowed   \n",
      "3       54   Private  140359              4            Divorced   \n",
      "4       41   Private  264663             10           Separated   \n",
      "...    ...       ...     ...            ...                 ...   \n",
      "32556   22   Private  310152             10       Never-married   \n",
      "32557   27   Private  257302             12  Married-civ-spouse   \n",
      "32558   40   Private  154374              9  Married-civ-spouse   \n",
      "32559   58   Private  151910              9             Widowed   \n",
      "32560   22   Private  201490              9       Never-married   \n",
      "\n",
      "              occupation   relationship   race     sex  capital.gain  \\\n",
      "0                      ?  Not-in-family  White  Female             0   \n",
      "1        Exec-managerial  Not-in-family  White  Female             0   \n",
      "2                      ?      Unmarried  Black  Female             0   \n",
      "3      Machine-op-inspct      Unmarried  White  Female             0   \n",
      "4         Prof-specialty      Own-child  White  Female             0   \n",
      "...                  ...            ...    ...     ...           ...   \n",
      "32556    Protective-serv  Not-in-family  White    Male             0   \n",
      "32557       Tech-support           Wife  White  Female             0   \n",
      "32558  Machine-op-inspct        Husband  White    Male             0   \n",
      "32559       Adm-clerical      Unmarried  White  Female             0   \n",
      "32560       Adm-clerical      Own-child  White    Male             0   \n",
      "\n",
      "       capital.loss  hours.per.week native.country income  \n",
      "0              4356              40  United-States  <=50K  \n",
      "1              4356              18  United-States  <=50K  \n",
      "2              4356              40  United-States  <=50K  \n",
      "3              3900              40  United-States  <=50K  \n",
      "4              3900              40  United-States  <=50K  \n",
      "...             ...             ...            ...    ...  \n",
      "32556             0              40  United-States  <=50K  \n",
      "32557             0              38  United-States  <=50K  \n",
      "32558             0              40  United-States   >50K  \n",
      "32559             0              40  United-States  <=50K  \n",
      "32560             0              20  United-States  <=50K  \n",
      "\n",
      "[32561 rows x 14 columns]\n"
     ]
    },
    {
     "ename": "ValueError",
     "evalue": "could not convert string to float: '?'",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mValueError\u001b[39m                                Traceback (most recent call last)",
      "\u001b[32m/tmp/ipykernel_3669/1374041598.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m     37\u001b[39m print(X)\n\u001b[32m     38\u001b[39m \n\u001b[32m     39\u001b[39m \u001b[38;5;66;03m# Create and train the decision tree classifier\u001b[39;00m\n\u001b[32m     40\u001b[39m clf = DecisionTreeClassifier(random_state=\u001b[32m42\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m41\u001b[39m clf.fit(X, y)\n\u001b[32m     42\u001b[39m \n\u001b[32m     43\u001b[39m \u001b[38;5;66;03m# Visualize the tree\u001b[39;00m\n\u001b[32m     44\u001b[39m plt.figure(figsize=(\u001b[32m20\u001b[39m,\u001b[32m10\u001b[39m))\n",
      "\u001b[32m~/Documents/MLPproject/.venv/lib/python3.12/site-packages/sklearn/base.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m(estimator, *args, **kwargs)\u001b[39m\n\u001b[32m   1361\u001b[39m                 skip_parameter_validation=(\n\u001b[32m   1362\u001b[39m                     prefer_skip_nested_validation \u001b[38;5;28;01mor\u001b[39;00m global_skip_validation\n\u001b[32m   1363\u001b[39m                 )\n\u001b[32m   1364\u001b[39m             ):\n\u001b[32m-> \u001b[39m\u001b[32m1365\u001b[39m                 \u001b[38;5;28;01mreturn\u001b[39;00m fit_method(estimator, *args, **kwargs)\n",
      "\u001b[32m~/Documents/MLPproject/.venv/lib/python3.12/site-packages/sklearn/tree/_classes.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m(self, X, y, sample_weight, check_input)\u001b[39m\n\u001b[32m   1020\u001b[39m         self : DecisionTreeClassifier\n\u001b[32m   1021\u001b[39m             Fitted estimator.\n\u001b[32m   1022\u001b[39m         \"\"\"\n\u001b[32m   1023\u001b[39m \n\u001b[32m-> \u001b[39m\u001b[32m1024\u001b[39m         super()._fit(\n\u001b[32m   1025\u001b[39m             X,\n\u001b[32m   1026\u001b[39m             y,\n\u001b[32m   1027\u001b[39m             sample_weight=sample_weight,\n",
      "\u001b[32m~/Documents/MLPproject/.venv/lib/python3.12/site-packages/sklearn/tree/_classes.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m(self, X, y, sample_weight, check_input, missing_values_in_feature_mask)\u001b[39m\n\u001b[32m    248\u001b[39m             check_X_params = dict(\n\u001b[32m    249\u001b[39m                 dtype=DTYPE, accept_sparse=\u001b[33m\"csc\"\u001b[39m, ensure_all_finite=\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[32m    250\u001b[39m             )\n\u001b[32m    251\u001b[39m             check_y_params = dict(ensure_2d=\u001b[38;5;28;01mFalse\u001b[39;00m, dtype=\u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[32m--> \u001b[39m\u001b[32m252\u001b[39m             X, y = validate_data(\n\u001b[32m    253\u001b[39m                 self, X, y, validate_separately=(check_X_params, check_y_params)\n\u001b[32m    254\u001b[39m             )\n\u001b[32m    255\u001b[39m \n",
      "\u001b[32m~/Documents/MLPproject/.venv/lib/python3.12/site-packages/sklearn/utils/validation.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m(_estimator, X, y, reset, validate_separately, skip_check_array, **check_params)\u001b[39m\n\u001b[32m   2962\u001b[39m             \u001b[38;5;66;03m# :(\u001b[39;00m\n\u001b[32m   2963\u001b[39m             check_X_params, check_y_params = validate_separately\n\u001b[32m   2964\u001b[39m             \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"estimator\"\u001b[39m \u001b[38;5;28;01mnot\u001b[39;00m \u001b[38;5;28;01min\u001b[39;00m check_X_params:\n\u001b[32m   2965\u001b[39m                 check_X_params = {**default_check_params, **check_X_params}\n\u001b[32m-> \u001b[39m\u001b[32m2966\u001b[39m             X = check_array(X, input_name=\u001b[33m\"X\"\u001b[39m, **check_X_params)\n\u001b[32m   2967\u001b[39m             \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"estimator\"\u001b[39m \u001b[38;5;28;01mnot\u001b[39;00m \u001b[38;5;28;01min\u001b[39;00m check_y_params:\n\u001b[32m   2968\u001b[39m                 check_y_params = {**default_check_params, **check_y_params}\n\u001b[32m   2969\u001b[39m             y = check_array(y, input_name=\u001b[33m\"y\"\u001b[39m, **check_y_params)\n",
      "\u001b[32m~/Documents/MLPproject/.venv/lib/python3.12/site-packages/sklearn/utils/validation.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_all_finite, ensure_non_negative, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[39m\n\u001b[32m   1050\u001b[39m                         )\n\u001b[32m   1051\u001b[39m                     array = xp.astype(array, dtype, copy=\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[32m   1052\u001b[39m                 \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m   1053\u001b[39m                     array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)\n\u001b[32m-> \u001b[39m\u001b[32m1054\u001b[39m             \u001b[38;5;28;01mexcept\u001b[39;00m ComplexWarning \u001b[38;5;28;01mas\u001b[39;00m complex_warning:\n\u001b[32m   1055\u001b[39m                 raise ValueError(\n\u001b[32m   1056\u001b[39m                     \u001b[33m\"Complex data not supported\\n{}\\n\"\u001b[39m.format(array)\n\u001b[32m   1057\u001b[39m                 ) \u001b[38;5;28;01mfrom\u001b[39;00m complex_warning\n",
      "\u001b[32m~/Documents/MLPproject/.venv/lib/python3.12/site-packages/sklearn/utils/_array_api.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m(array, dtype, order, copy, xp, device)\u001b[39m\n\u001b[32m    753\u001b[39m         \u001b[38;5;66;03m# Use NumPy API to support order\u001b[39;00m\n\u001b[32m    754\u001b[39m         \u001b[38;5;28;01mif\u001b[39;00m copy \u001b[38;5;28;01mis\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m    755\u001b[39m             array = numpy.array(array, order=order, dtype=dtype)\n\u001b[32m    756\u001b[39m         \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m757\u001b[39m             array = numpy.asarray(array, order=order, dtype=dtype)\n\u001b[32m    758\u001b[39m \n\u001b[32m    759\u001b[39m         \u001b[38;5;66;03m# At this point array is a NumPy ndarray. We convert it to an array\u001b[39;00m\n\u001b[32m    760\u001b[39m         \u001b[38;5;66;03m# container that is consistent with the input's namespace.\u001b[39;00m\n",
      "\u001b[32m~/Documents/MLPproject/.venv/lib/python3.12/site-packages/pandas/core/generic.py\u001b[39m in \u001b[36m?\u001b[39m\u001b[34m(self, dtype, copy)\u001b[39m\n\u001b[32m   2167\u001b[39m             )\n\u001b[32m   2168\u001b[39m         values = self._values\n\u001b[32m   2169\u001b[39m         \u001b[38;5;28;01mif\u001b[39;00m copy \u001b[38;5;28;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m   2170\u001b[39m             \u001b[38;5;66;03m# Note: branch avoids `copy=None` for NumPy 1.x support\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m2171\u001b[39m             arr = np.asarray(values, dtype=dtype)\n\u001b[32m   2172\u001b[39m         \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m   2173\u001b[39m             arr = np.array(values, dtype=dtype, copy=copy)\n\u001b[32m   2174\u001b[39m \n",
      "\u001b[31mValueError\u001b[39m: could not convert string to float: '?'"
 >>>>>>> 2ff73d660ba62540b0a320049b9314f62843e037
     ]
    }
   ],
@@ -93,7 +154,11 @@
    "    df_encoded[column] = label_encoder.fit_transform(df_encoded[column])\n",
    "\n",
    "# Now properly separate features and target\n",
 <<<<<<< HEAD
    "X = df_encoded.drop(columns=['income', 'native.country', 'education','fnlwgt'])\n",
 =======
    "X = df_encoded.drop(columns=['income', 'native.country', 'education'])\n",
 >>>>>>> 2ff73d660ba62540b0a320049b9314f62843e037
    "y = df_encoded['income']\n",
    "\n",
    "# Split the data\n",
@@ -120,7 +185,16 @@
    "})\n",
    "print(\"\\nFeature Importances:\")\n",
    "print(importances.sort_values('importance', ascending=False))\n",
 <<<<<<< HEAD
    "\n"
 =======
    "\n",
    "# Example prediction\n",
    "print(\"\\nPrediction for a new sample:\")\n",
    "new_sample = pd.DataFrame([[350, 0, 0, 0.09]], columns=X.columns)  # Force=300, Coating=Yes, Defects=No, Moisture=0.05\n",
    "prediction = clf.predict(new_sample)\n",
    "print(f\"Will the rod break? {'Yes' if prediction[0] == 1 else 'No'}\")"
 >>>>>>> 2ff73d660ba62540b0a320049b9314f62843e037
   ]
  }
 ],