Small bug fixes
This commit is contained in:
@@ -59,27 +59,22 @@
|
||||
"from sklearn.preprocessing import LabelEncoder\n",
|
||||
"import seaborn as sns\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"df = pd.read_csv('./Datasets/adult.csv', comment = '#')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# First, create a copy of the dataframe to avoid modifying the original\n",
|
||||
"df_encoded = df.copy()\n",
|
||||
"df_encoded.drop(['fnlwgt', 'education'], axis=1, inplace=True)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# drop all rows that contain '?'\n",
|
||||
"for column in df_encoded.columns:\n",
|
||||
" df_encoded = df_encoded[df_encoded[column] != '?']\n",
|
||||
"df_encoded = df_encoded[(df_encoded != '?').all(axis=1)]\n",
|
||||
"\n",
|
||||
"# Apply label encoding to categorical columns\n",
|
||||
"label_encoder = LabelEncoder()\n",
|
||||
"categorical_columns = ['workclass', 'marital.status', 'occupation', \n",
|
||||
" 'relationship', 'race', 'sex', 'native.country', 'income']\n",
|
||||
"\n",
|
||||
"for column in categorical_columns:\n",
|
||||
" df_encoded[column] = label_encoder.fit_transform(df_encoded[column])\n",
|
||||
"\n",
|
||||
" le = LabelEncoder()\n",
|
||||
" df_encoded[column] = le.fit_transform(df_encoded[column])\n",
|
||||
"\n",
|
||||
"matrix = df_encoded.corr()\n",
|
||||
"\n",
|
||||
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user