Small bug fixes

This commit is contained in:
2025-10-25 18:38:53 +02:00
parent c0ffaa45c9
commit 5c1b77358f
2 changed files with 81 additions and 60 deletions

View File

@@ -59,27 +59,22 @@
"from sklearn.preprocessing import LabelEncoder\n",
"import seaborn as sns\n",
"\n",
"\n",
"df = pd.read_csv('./Datasets/adult.csv', comment = '#')\n",
"\n",
"\n",
"# First, create a copy of the dataframe to avoid modifying the original\n",
"df_encoded = df.copy()\n",
"df_encoded.drop(['fnlwgt', 'education'], axis=1, inplace=True)\n",
"\n",
"\n",
"# drop all rows that contain '?'\n",
"for column in df_encoded.columns:\n",
" df_encoded = df_encoded[df_encoded[column] != '?']\n",
"df_encoded = df_encoded[(df_encoded != '?').all(axis=1)]\n",
"\n",
"# Apply label encoding to categorical columns\n",
"label_encoder = LabelEncoder()\n",
"categorical_columns = ['workclass', 'marital.status', 'occupation', \n",
" 'relationship', 'race', 'sex', 'native.country', 'income']\n",
"\n",
"for column in categorical_columns:\n",
" df_encoded[column] = label_encoder.fit_transform(df_encoded[column])\n",
"\n",
" le = LabelEncoder()\n",
" df_encoded[column] = le.fit_transform(df_encoded[column])\n",
"\n",
"matrix = df_encoded.corr()\n",
"\n",

File diff suppressed because one or more lines are too long