Compare commits

...

2 Commits

Author SHA1 Message Date
55eca37d53 Merge branch 'main' of https://gitea.jany.se/Jany/MLPproject 2025-10-27 11:37:08 +01:00
4bb645a352 Added new table 2025-10-27 11:37:06 +01:00
8 changed files with 62 additions and 43 deletions

View File

@@ -39,24 +39,27 @@
\newlabel{fig:@cref}{{[figure][1][]1}{[1][2][]2}}
\bibstyle{model1-num-names}
\bibcite{Steinhaus:Mathematical}{1}
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces The performance metrics of the models on the validation data.\relax }}{3}{table.caption.2}\protected@file@percent }
\newlabel{perfmetric}{{1}{3}{The performance metrics of the models on the validation data.\relax }{table.caption.2}{}}
\newlabel{perfmetric@cref}{{[table][1][]1}{[1][2][]3}}
\@writefile{toc}{\contentsline {section}{References}{3}{figure.caption.3}\protected@file@percent }
\newlabel{fig:featureImportanceDT}{{2(a)}{3}{\relax }{figure.caption.3}{}}
\newlabel{fig:featureImportanceDT@cref}{{[subfigure][1][2]2(a)}{[1][3][]3}}
\newlabel{sub@fig:featureImportanceDT}{{(a)}{3}{\relax }{figure.caption.3}{}}
\newlabel{sub@fig:featureImportanceDT@cref}{{[subfigure][1][2]2(a)}{[1][3][]3}}
\newlabel{fig:featureImportanceRF}{{2(b)}{3}{\relax }{figure.caption.3}{}}
\newlabel{fig:featureImportanceRF@cref}{{[subfigure][2][2]2(b)}{[1][3][]3}}
\newlabel{sub@fig:featureImportanceRF}{{(b)}{3}{\relax }{figure.caption.3}{}}
\newlabel{sub@fig:featureImportanceRF@cref}{{[subfigure][2][2]2(b)}{[1][3][]3}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces The feature importance graphs for the Decision Tree model and the Random Forest model.\relax }}{3}{figure.caption.3}\protected@file@percent }
\newlabel{fig:}{{2}{3}{The feature importance graphs for the Decision Tree model and the Random Forest model.\relax }{figure.caption.3}{}}
\newlabel{fig:@cref}{{[figure][2][]2}{[1][3][]3}}
\bibcite{Greivenkamp:FieldGuide}{2}
\bibcite{Pedrotti:Introduction}{3}
\bibcite{Davis:ChemWiki}{4}
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces The performance metrics of the models on the validation data.\relax }}{3}{table.caption.2}\protected@file@percent }
\newlabel{perfmetric}{{1}{3}{The performance metrics of the models on the validation data.\relax }{table.caption.2}{}}
\newlabel{perfmetric@cref}{{[table][1][]1}{[1][2][]3}}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces The performance metrics of the models on the test data.\relax }}{3}{table.caption.3}\protected@file@percent }
\newlabel{perfmetric}{{2}{3}{The performance metrics of the models on the test data.\relax }{table.caption.3}{}}
\newlabel{perfmetric@cref}{{[table][2][]2}{[1][2][]3}}
\newlabel{fig:featureImportanceDT}{{2(a)}{3}{\relax }{figure.caption.4}{}}
\newlabel{fig:featureImportanceDT@cref}{{[subfigure][1][2]2(a)}{[1][3][]3}}
\newlabel{sub@fig:featureImportanceDT}{{(a)}{3}{\relax }{figure.caption.4}{}}
\newlabel{sub@fig:featureImportanceDT@cref}{{[subfigure][1][2]2(a)}{[1][3][]3}}
\newlabel{fig:featureImportanceRF}{{2(b)}{3}{\relax }{figure.caption.4}{}}
\newlabel{fig:featureImportanceRF@cref}{{[subfigure][2][2]2(b)}{[1][3][]3}}
\newlabel{sub@fig:featureImportanceRF}{{(b)}{3}{\relax }{figure.caption.4}{}}
\newlabel{sub@fig:featureImportanceRF@cref}{{[subfigure][2][2]2(b)}{[1][3][]3}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces The feature importance graphs for the Decision Tree model and the Random Forest model.\relax }}{3}{figure.caption.4}\protected@file@percent }
\newlabel{fig:}{{2}{3}{The feature importance graphs for the Decision Tree model and the Random Forest model.\relax }{figure.caption.4}{}}
\newlabel{fig:@cref}{{[figure][2][]2}{[1][3][]3}}
\@writefile{toc}{\contentsline {section}{References}{3}{figure.caption.4}\protected@file@percent }
\ttl@finishall
\newlabel{LastPage}{{}{4}{}{page.4}{}}
\xdef\lastpage@lastpage{4}

View File

@@ -1,6 +1,6 @@
# Fdb version 4
["pdflatex"] 1761408421.82051 "/home/jaknyst/Documents/MLPproject/Report/MLPproject.tex" "MLPproject.pdf" "MLPproject" 1761408422.38409 0
"/home/jaknyst/Documents/MLPproject/Report/MLPproject.tex" 1761408421.10156 15551 7c91f156b6c79cad294fb22ca0c64f64 ""
["pdflatex"] 1761561403.82895 "/home/jaknyst/Documents/MLPproject/Report/MLPproject.tex" "MLPproject.pdf" "MLPproject" 1761561404.678 0
"/home/jaknyst/Documents/MLPproject/Report/MLPproject.tex" 1761561402.77598 15966 7a773c81e3fe57345294b6a92b54398d ""
"/usr/share/texlive/texmf-dist/fonts/enc/dvips/base/8r.enc" 1721433600 4850 80dc9bab7f31fb78a000ccfed0e27cab ""
"/usr/share/texlive/texmf-dist/fonts/map/fontname/texfonts.map" 1577235249 3524 cb3e574dea2d1052e39280babc910dc8 ""
"/usr/share/texlive/texmf-dist/fonts/tfm/adobe/helvetic/phvb7t.tfm" 1136768653 2240 eb56c13537f4d8a0bd3fafc25572b1bd ""
@@ -132,10 +132,10 @@
"/usr/share/texlive/texmf-dist/web2c/texmf.cnf" 1721433600 40900 887e0dc8cac988a9e9c574af364cf837 ""
"/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map" 1760290233.68077 4602002 62dba5fc29055c16380d7393a2adb07a ""
"/var/lib/texmf/web2c/pdftex/pdflatex.fmt" 1760289849 7753794 892d611f76aecccd13eb485815d0543e ""
"MLPproject.aux" 1761408422.32348 4598 98311111e5f17f774f4333b6899ad750 "pdflatex"
"MLPproject.out" 1761408422.3244 1585 96a5cbe86300c20e741bb675b1dad6de "pdflatex"
"MLPproject.tex" 1761408421.10156 15551 7c91f156b6c79cad294fb22ca0c64f64 ""
"MLPproject.toc" 1761408422.32537 847 58496dd675955cac8bd2874a312fe024 "pdflatex"
"MLPproject.aux" 1761561404.57999 4945 30825f194591ba7a2d8dc8b06523b8ac "pdflatex"
"MLPproject.out" 1761561404.5809 1585 799028b2b371d09d171b836f5fabb437 "pdflatex"
"MLPproject.tex" 1761561402.77598 15966 7a773c81e3fe57345294b6a92b54398d ""
"MLPproject.toc" 1761561404.58203 847 9491dde8a3fac8c42f1383615deb7f6b "pdflatex"
"SelfArx.cls" 1761123180.54708 7316 506603b27aab6da8087bc0f1ee693041 ""
"confusionMatrix.png" 1761329088.9659 21007 cfab042955bc386bac85d53bfe5a6793 ""
"featureImportanceDT.png" 1761328898.24566 60078 4a2e56e2a45ae2ae5e41b9830c1bbcea ""

View File

@@ -1,4 +1,4 @@
This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2025.10.12) 25 OCT 2025 18:07
This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2025.10.12) 27 OCT 2025 11:36
entering extended mode
restricted \write18 enabled.
file:line:error style messages enabled.
@@ -496,6 +496,12 @@ LaTeX Font Info: Trying to load font information for OT1+ptm on input line 54
File: ot1ptm.fd 2001/06/04 font definitions for OT1/ptm.
) (./MLPproject.aux
LaTeX Warning: Label `perfmetric' multiply defined.
LaTeX Warning: Label `perfmetric@cref' multiply defined.
LaTeX Warning: Label `fig:featureImportanceDT' multiply defined.
@@ -702,42 +708,39 @@ Package pdftex.def Info: confusionMatrix.png used on input line 105.
<featureImportanceDT.png, id=87, 416.2752pt x 393.8715pt>
File: featureImportanceDT.png Graphic file (type png)
<use featureImportanceDT.png>
Package pdftex.def Info: featureImportanceDT.png used on input line 141.
Package pdftex.def Info: featureImportanceDT.png used on input line 154.
(pdftex.def) Requested size: 218.17422pt x 206.43103pt.
<featureImportanceRF.png, id=88, 422.0568pt x 393.8715pt>
File: featureImportanceRF.png Graphic file (type png)
<use featureImportanceRF.png>
Package pdftex.def Info: featureImportanceRF.png used on input line 148.
Package pdftex.def Info: featureImportanceRF.png used on input line 161.
(pdftex.def) Requested size: 218.17422pt x 203.60634pt.
Underfull \vbox (badness 10000) has occurred while \output is active []
[3 <./featureImportanceDT.png> <./featureImportanceRF.png>]
Underfull \hbox (badness 1448) in paragraph at lines 183--187
Underfull \hbox (badness 1448) in paragraph at lines 196--200
[]\OT1/ptm/m/n/10 (+20) UC Davis ChemWiki, Prop-a-ga-tion of Er-ror, Avail-
[]
Underfull \hbox (badness 7649) in paragraph at lines 183--187
Underfull \hbox (badness 7649) in paragraph at lines 196--200
\OT1/ptm/m/n/10 (+20) able at: [][]$https : / / chem . libretexts . org / Textbook[]Maps /
[]
Underfull \hbox (badness 10000) in paragraph at lines 183--187
Underfull \hbox (badness 10000) in paragraph at lines 196--200
\OT1/ptm/m/n/10 (+20) Analytical[]Chemistry / Supplemental[]Modules[]
[]
Underfull \hbox (badness 10000) in paragraph at lines 183--187
Underfull \hbox (badness 10000) in paragraph at lines 196--200
\OT1/ptm/m/n/10 (+20) (Analytical[]Chemistry ) /Quantifying[]Nature /
[]
Underfull \hbox (badness 10000) in paragraph at lines 183--187
Underfull \hbox (badness 10000) in paragraph at lines 196--200
\OT1/ptm/m/n/10 (+20) Signi^^Lcant[]Digits / Propagation[]of[]Error$[][], (Ac-cessed:
[]
[4
[3 <./featureImportanceDT.png> <./featureImportanceRF.png>] [4
]
enddocument/afterlastpage: lastpage setting LastPage.
@@ -746,21 +749,21 @@ enddocument/afterlastpage: lastpage setting LastPage.
LaTeX Warning: There were multiply-defined labels.
Package rerunfilecheck Info: File `MLPproject.out' has not changed.
(rerunfilecheck) Checksum: 96A5CBE86300C20E741BB675B1DAD6DE;1585.
(rerunfilecheck) Checksum: 799028B2B371D09D171B836F5FABB437;1585.
)
Here is how much of TeX's memory you used:
19026 strings out of 476041
322014 string characters out of 5793173
19027 strings out of 476041
322029 string characters out of 5793173
1876388 words of memory out of 6000000
38900 multiletter control sequences out of 15000+600000
570518 words of font info for 283 fonts, out of 8000000 for 9000
1137 hyphenation exceptions out of 8191
75i,12n,77p,1611b,627s stack positions out of 10000i,1000n,20000p,200000b,200000s
75i,12n,77p,1611b,619s stack positions out of 10000i,1000n,20000p,200000b,200000s
</usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvb8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvr8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvro8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/times/utmb8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/times/utmr8a.pfb>
Output written on MLPproject.pdf (4 pages, 159792 bytes).
Output written on MLPproject.pdf (4 pages, 160208 bytes).
PDF statistics:
140 PDF objects out of 1000 (max. 8388607)
106 compressed objects within 2 object streams
21 named destinations out of 1000 (max. 500000)
142 PDF objects out of 1000 (max. 8388607)
108 compressed objects within 2 object streams
22 named destinations out of 1000 (max. 500000)
98400 words of extra memory for PDF output out of 106986 (max. 10000000)

View File

@@ -7,4 +7,4 @@
\BOOKMARK [1][-]{section.3}{\376\377\000M\000o\000d\000e\000l\000\040\000s\000e\000l\000e\000c\000t\000i\000o\000n}{}% 7
\BOOKMARK [1][-]{section.4}{\376\377\000M\000o\000d\000e\000l\000\040\000T\000r\000a\000i\000n\000i\000n\000g\000\040\000a\000n\000d\000\040\000H\000y\000p\000e\000r\000p\000a\000r\000a\000m\000e\000t\000e\000r\000\040\000T\000u\000n\000i\000n\000g}{}% 8
\BOOKMARK [1][-]{section.5}{\376\377\000M\000o\000d\000e\000l\000\040\000E\000v\000a\000l\000u\000a\000t\000i\000o\000n\000s}{}% 9
\BOOKMARK [1][-]{figure.caption.3}{\376\377\000R\000e\000f\000e\000r\000e\000n\000c\000e\000s}{}% 10
\BOOKMARK [1][-]{figure.caption.4}{\376\377\000R\000e\000f\000e\000r\000e\000n\000c\000e\000s}{}% 10

Binary file not shown.

Binary file not shown.

View File

@@ -125,6 +125,19 @@ This is a very interesting result and maybe not so weird as it first seems. Ther
DT&0.8483&0.8449&0.8483&0.8462&6.7357
\end{tabular}}
\end{table}
\begin{table}[!htbp]
\centering
\caption{The performance metrics of the models on the test data.}
\label{perfmetric}
\resizebox{\columnwidth}{!}{
\begin{tabular}{c|c|c|c|c|c}
Model&Accuracy&Precision&Recall&F1 Score&Total Time\\
\hline
RF &0.8589&0.8535&0.8589&0.8534&150.8154\\
\hline
DT&0.8483&0.8449&0.8483&0.8462&6.7357
\end{tabular}}
\end{table}
Looking at the values we see that the difference between our models is not that large. The Random forest model is on average about 1 percentage point better than the Decision Tree. We can also see that all metrics are at about 0.85. This means that our models are not very accurate and that the differences between them is not that large at all. Which model that is better depends a lot on what is the priority. While it is clear that the Random Forest has the better performance, even by just a little bit, it is also significanty slower. So for this dataset was it really worth 30x the computational time to get a slightly better result? We are not really sure. The extra computational time is a definite negative but at the size of this dataset we are only talking about a couple of minutes which is not too bad. For another dataset the results may be different and it might be clearer which is really the prefered model.
At a first glance at both the confusion matricies and the performance metrics the models do not look to be that good. But what has to be considered is the data that we are analyzing. We are looking at what possible indicators there are for a person to earn more than a certain amount of money. This is real world data and in the real world there is a lot of unique ways of earning money. While there certainly are some indicators that will clearly tell that somebody is earning a lot of money, there are other factors that are not as telling. This means that some features are less important than others. This can be seen in our models int he feature importance graphs in figure(\ref{fig:featureImportanceDT}) and (\ref{fig:featureImportanceRF}). This also means that there will be plenty of outliers in the data. No matter how good the model is, it cannot possibly catch all of these outliers. If it did it would be overfitted. We simply cannot expect a model to have very good accuracy on this type of data set.

View File

@@ -8,5 +8,5 @@
\contentsline {section}{\numberline {3}Model selection}{1}{section.3}%
\contentsline {section}{\numberline {4}Model Training and Hyperparameter Tuning}{2}{section.4}%
\contentsline {section}{\numberline {5}Model Evaluations}{2}{section.5}%
\contentsline {section}{References}{3}{figure.caption.3}%
\contentsline {section}{References}{3}{figure.caption.4}%
\contentsfinish