Compare commits

..

3 Commits

Author SHA1 Message Date
af4291fe1e Merge branch 'main' of https://gitea.jany.se/Jany/MLPproject 2025-10-22 12:34:23 +02:00
342dfe7e1f Continued work on the report 2025-10-22 12:34:00 +02:00
14d403bc1f Continued work on the report 2025-10-22 12:33:56 +02:00
7 changed files with 37 additions and 32 deletions

View File

@@ -219,7 +219,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.12.12" "version": "3.13.7"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -32,7 +32,7 @@
\@writefile{toc}{\contentsline {section}{\numberline {6}}{1}{section.6}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {6}}{1}{section.6}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{References}{1}{section.6}\protected@file@percent } \@writefile{toc}{\contentsline {section}{References}{1}{section.6}\protected@file@percent }
\ttl@finishall \ttl@finishall
\newlabel{LastPage}{{}{1}{}{page.1}{}} \newlabel{LastPage}{{}{2}{}{page.2}{}}
\xdef\lastpage@lastpage{1} \xdef\lastpage@lastpage{2}
\xdef\lastpage@lastpageHy{1} \xdef\lastpage@lastpageHy{2}
\gdef \@abspage@last{1} \gdef \@abspage@last{2}

View File

@@ -1,6 +1,6 @@
# Fdb version 4 # Fdb version 4
["pdflatex"] 1761125791.81013 "/home/jaknyst/Documents/MLPproject/Report/MLPproject.tex" "MLPproject.pdf" "MLPproject" 1761125793.25759 0 ["pdflatex"] 1761129238.9062 "/home/jaknyst/Documents/MLPproject/Report/MLPproject.tex" "MLPproject.pdf" "MLPproject" 1761129240.39559 0
"/home/jaknyst/Documents/MLPproject/Report/MLPproject.tex" 1761125791.50152 4318 6372d460a7a87caa250e0c6d0d25be18 "" "/home/jaknyst/Documents/MLPproject/Report/MLPproject.tex" 1761129236.61965 6049 c7e7910f66e6a5624dd2e411a4d86264 ""
"/usr/share/texlive/texmf-dist/fonts/enc/dvips/base/8r.enc" 1721433600 4850 80dc9bab7f31fb78a000ccfed0e27cab "" "/usr/share/texlive/texmf-dist/fonts/enc/dvips/base/8r.enc" 1721433600 4850 80dc9bab7f31fb78a000ccfed0e27cab ""
"/usr/share/texlive/texmf-dist/fonts/map/fontname/texfonts.map" 1577235249 3524 cb3e574dea2d1052e39280babc910dc8 "" "/usr/share/texlive/texmf-dist/fonts/map/fontname/texfonts.map" 1577235249 3524 cb3e574dea2d1052e39280babc910dc8 ""
"/usr/share/texlive/texmf-dist/fonts/tfm/adobe/helvetic/phvb7t.tfm" 1136768653 2240 eb56c13537f4d8a0bd3fafc25572b1bd "" "/usr/share/texlive/texmf-dist/fonts/tfm/adobe/helvetic/phvb7t.tfm" 1136768653 2240 eb56c13537f4d8a0bd3fafc25572b1bd ""
@@ -132,10 +132,10 @@
"/usr/share/texlive/texmf-dist/web2c/texmf.cnf" 1721433600 40900 887e0dc8cac988a9e9c574af364cf837 "" "/usr/share/texlive/texmf-dist/web2c/texmf.cnf" 1721433600 40900 887e0dc8cac988a9e9c574af364cf837 ""
"/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map" 1760290233.68077 4602002 62dba5fc29055c16380d7393a2adb07a "" "/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map" 1760290233.68077 4602002 62dba5fc29055c16380d7393a2adb07a ""
"/var/lib/texmf/web2c/pdftex/pdflatex.fmt" 1760289849 7753794 892d611f76aecccd13eb485815d0543e "" "/var/lib/texmf/web2c/pdftex/pdflatex.fmt" 1760289849 7753794 892d611f76aecccd13eb485815d0543e ""
"MLPproject.aux" 1761125793.07252 2066 17a2ed4a62d84d4f27ea168ff2ae527f "pdflatex" "MLPproject.aux" 1761129240.22236 2066 62a9c5cb48c4e643fe8b2fb1bba9b77b "pdflatex"
"MLPproject.out" 1761125793.07452 1374 b4a2caeadb43696bbe5350199c3331b3 "pdflatex" "MLPproject.out" 1761129240.22436 1374 b4a2caeadb43696bbe5350199c3331b3 "pdflatex"
"MLPproject.tex" 1761125791.50152 4318 6372d460a7a87caa250e0c6d0d25be18 "" "MLPproject.tex" 1761129236.61965 6049 c7e7910f66e6a5624dd2e411a4d86264 ""
"MLPproject.toc" 1761125793.07552 796 63fff7a313a297867c97aa2dfbafdb7f "pdflatex" "MLPproject.toc" 1761129240.22436 796 63fff7a313a297867c97aa2dfbafdb7f "pdflatex"
"SelfArx.cls" 1761123180.54708 7316 506603b27aab6da8087bc0f1ee693041 "" "SelfArx.cls" 1761123180.54708 7316 506603b27aab6da8087bc0f1ee693041 ""
(generated) (generated)
"MLPproject.aux" "MLPproject.aux"

View File

@@ -1,4 +1,4 @@
This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2025.10.12) 22 OCT 2025 11:36 This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2025.10.12) 22 OCT 2025 12:33
entering extended mode entering extended mode
restricted \write18 enabled. restricted \write18 enabled.
file:line:error style messages enabled. file:line:error style messages enabled.
@@ -651,40 +651,43 @@ LaTeX Font Info: Font shape `OT1/phv/m/it' in size <8> not available
\tf@toc=\write4 \tf@toc=\write4
\openout4 = `MLPproject.toc'. \openout4 = `MLPproject.toc'.
LaTeX Font Info: Trying to load font information for TS1+ptm on input line 77. LaTeX Font Info: Trying to load font information for TS1+ptm on input line 75.
(/usr/share/texlive/texmf-dist/tex/latex/psnfss/ts1ptm.fd (/usr/share/texlive/texmf-dist/tex/latex/psnfss/ts1ptm.fd
File: ts1ptm.fd 2001/06/04 font definitions for TS1/ptm. File: ts1ptm.fd 2001/06/04 font definitions for TS1/ptm.
) )
Underfull \hbox (badness 1448) in paragraph at lines 114--118 Underfull \hbox (badness 1448) in paragraph at lines 116--120
[]\OT1/ptm/m/n/10 (+20) UC Davis ChemWiki, Prop-a-ga-tion of Er-ror, Avail- []\OT1/ptm/m/n/10 (+20) UC Davis ChemWiki, Prop-a-ga-tion of Er-ror, Avail-
[] []
Underfull \hbox (badness 7649) in paragraph at lines 114--118 Underfull \hbox (badness 7649) in paragraph at lines 116--120
\OT1/ptm/m/n/10 (+20) able at: [][]$https : / / chem . libretexts . org / Textbook[]Maps / \OT1/ptm/m/n/10 (+20) able at: [][]$https : / / chem . libretexts . org / Textbook[]Maps /
[] []
Underfull \hbox (badness 10000) in paragraph at lines 114--118 Underfull \hbox (badness 10000) in paragraph at lines 116--120
\OT1/ptm/m/n/10 (+20) Analytical[]Chemistry / Supplemental[]Modules[] \OT1/ptm/m/n/10 (+20) Analytical[]Chemistry / Supplemental[]Modules[]
[] []
Underfull \hbox (badness 10000) in paragraph at lines 114--118 Underfull \hbox (badness 10000) in paragraph at lines 116--120
\OT1/ptm/m/n/10 (+20) (Analytical[]Chemistry ) /Quantifying[]Nature / \OT1/ptm/m/n/10 (+20) (Analytical[]Chemistry ) /Quantifying[]Nature /
[] []
Underfull \hbox (badness 10000) in paragraph at lines 114--118 Underfull \hbox (badness 10000) in paragraph at lines 116--120
\OT1/ptm/m/n/10 (+20) Signi^^Lcant[]Digits / Propagation[]of[]Error$[][], (Ac-cessed: \OT1/ptm/m/n/10 (+20) Signi^^Lcant[]Digits / Propagation[]of[]Error$[][], (Ac-cessed:
[] []
[1{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}{/usr/share/texlive/texmf-dist/fonts/enc/dvips/base/8r.enc}
]
Package caption Warning: Unused \captionsetup[subfigure] on input line 32. Package caption Warning: Unused \captionsetup[subfigure] on input line 32.
See the caption package documentation for explanation. See the caption package documentation for explanation.
[1{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}{/usr/share/texlive/texmf-dist/fonts/enc/dvips/base/8r.enc} [2
] ]
enddocument/afterlastpage: lastpage setting LastPage. enddocument/afterlastpage: lastpage setting LastPage.
@@ -693,18 +696,18 @@ Package rerunfilecheck Info: File `MLPproject.out' has not changed.
(rerunfilecheck) Checksum: B4A2CAEADB43696BBE5350199C3331B3;1374. (rerunfilecheck) Checksum: B4A2CAEADB43696BBE5350199C3331B3;1374.
) )
Here is how much of TeX's memory you used: Here is how much of TeX's memory you used:
18880 strings out of 476041 18917 strings out of 476041
320048 string characters out of 5793173 320357 string characters out of 5793173
1873388 words of memory out of 6000000 1878388 words of memory out of 6000000
38855 multiletter control sequences out of 15000+600000 38854 multiletter control sequences out of 15000+600000
566907 words of font info for 222 fonts, out of 8000000 for 9000 568376 words of font info for 247 fonts, out of 8000000 for 9000
1137 hyphenation exceptions out of 8191 1137 hyphenation exceptions out of 8191
75i,9n,77p,1049b,470s stack positions out of 10000i,1000n,20000p,200000b,200000s 75i,12n,77p,1049b,626s stack positions out of 10000i,1000n,20000p,200000b,200000s
</usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvb8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvr8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvro8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/times/utmb8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/times/utmr8a.pfb> </usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvb8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvr8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvro8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/times/utmb8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/times/utmr8a.pfb>
Output written on MLPproject.pdf (1 page, 56703 bytes). Output written on MLPproject.pdf (2 pages, 59091 bytes).
PDF statistics: PDF statistics:
110 PDF objects out of 1000 (max. 8388607) 115 PDF objects out of 1000 (max. 8388607)
86 compressed objects within 1 object stream 90 compressed objects within 1 object stream
15 named destinations out of 1000 (max. 500000) 16 named destinations out of 1000 (max. 500000)
98385 words of extra memory for PDF output out of 106986 (max. 10000000) 98385 words of extra memory for PDF output out of 106986 (max. 10000000)

Binary file not shown.

Binary file not shown.

View File

@@ -72,10 +72,12 @@
\subsection{Dataset} \subsection{Dataset}
%https://www.kaggle.com/datasets/mosapabdelghany/adult-income-prediction-dataset %https://www.kaggle.com/datasets/mosapabdelghany/adult-income-prediction-dataset
The dataset we decided to study is a labeled income prediction dataset. This dataset includes 14 features with The dataset we decided to study is a labeled income prediction dataset. This dataset includes 14 features with information about the people in the srudy and a label with the income as either more than 50 000\$ per year or less than or equal to 50 000 \$ per year. This means that we are looking at a binary classification problem. A lot of the features are discrete where only a set number of options available. This includes features such as marital status, education and working class. The dataset features around 32500 data points.
\subsection{Data cleaning and feature engineering} \subsection{Data cleaning and feature engineering}
§ There were a couple of things with our dataset that had to be modified in order for it to be usable in our ML application. We find that some of the features are redundant or not interesting in our project. We romove the redundant feature education since there is another already numerically encoded feature containing the same data. We also chose to remove the feature 'fnlwgt' since it is a already calculated number that is used by the Census Bureau to estimate population statistics. Since we want to estimate the population statistics based on the other features and not the already calculated weight we remove this feature. We have a mix of numerical and non-numerical features in our dataset. Since the machine learning models cannot use non-numerical data we have to encode the non-numercial data into corresponding numbers. This is with the label encoder built into sci-kit learn and used on all non-numerical data.
\subsection{Handling missing values} \subsection{Handling missing values}
With our numerical version of the dataset we found with the info function in pandas that around 2500 values were NaN values. We reasoned that filling these values with something as the mean of the category does not make very much sense for our application. Since there are many discrete categories a mean value means nothing. Especially since we gave many categories arbitrary numbers
\section{Model selection} \section{Model selection}