Small bug fixes
This commit is contained in:
@@ -59,27 +59,22 @@
|
|||||||
"from sklearn.preprocessing import LabelEncoder\n",
|
"from sklearn.preprocessing import LabelEncoder\n",
|
||||||
"import seaborn as sns\n",
|
"import seaborn as sns\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
|
||||||
"df = pd.read_csv('./Datasets/adult.csv', comment = '#')\n",
|
"df = pd.read_csv('./Datasets/adult.csv', comment = '#')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
|
||||||
"# First, create a copy of the dataframe to avoid modifying the original\n",
|
"# First, create a copy of the dataframe to avoid modifying the original\n",
|
||||||
"df_encoded = df.copy()\n",
|
"df_encoded = df.copy()\n",
|
||||||
"df_encoded.drop(['fnlwgt', 'education'], axis=1, inplace=True)\n",
|
"df_encoded.drop(['fnlwgt', 'education'], axis=1, inplace=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
|
||||||
"# drop all rows that contain '?'\n",
|
"# drop all rows that contain '?'\n",
|
||||||
"for column in df_encoded.columns:\n",
|
"df_encoded = df_encoded[(df_encoded != '?').all(axis=1)]\n",
|
||||||
" df_encoded = df_encoded[df_encoded[column] != '?']\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"# Apply label encoding to categorical columns\n",
|
"# Apply label encoding to categorical columns\n",
|
||||||
"label_encoder = LabelEncoder()\n",
|
|
||||||
"categorical_columns = ['workclass', 'marital.status', 'occupation', \n",
|
"categorical_columns = ['workclass', 'marital.status', 'occupation', \n",
|
||||||
" 'relationship', 'race', 'sex', 'native.country', 'income']\n",
|
" 'relationship', 'race', 'sex', 'native.country', 'income']\n",
|
||||||
"\n",
|
"\n",
|
||||||
"for column in categorical_columns:\n",
|
"for column in categorical_columns:\n",
|
||||||
" df_encoded[column] = label_encoder.fit_transform(df_encoded[column])\n",
|
" le = LabelEncoder()\n",
|
||||||
"\n",
|
" df_encoded[column] = le.fit_transform(df_encoded[column])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"matrix = df_encoded.corr()\n",
|
"matrix = df_encoded.corr()\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user