Redid figs

This commit is contained in:
2025-10-30 13:22:49 +01:00
parent fa6d05f24a
commit d587a7064e
9 changed files with 64 additions and 129 deletions

View File

@@ -47,13 +47,12 @@
\@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Analyzing Weighted Performance Metrics}{4}{subsection.6.2}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces The performance metrics of the models on the validation data.\relax }}{4}{table.caption.2}\protected@file@percent }
\newlabel{perfmetric}{{1}{4}{The performance metrics of the models on the validation data.\relax }{table.caption.2}{}}
\newlabel{perfmetric@cref}{{[table][1][]1}{[1][4][]4}}
\newlabel{perfmetric@cref}{{[table][1][]1}{[1][3][]4}}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces The performance metrics of the models on the test data.\relax }}{4}{table.caption.3}\protected@file@percent }
\newlabel{perfmetrictest}{{2}{4}{The performance metrics of the models on the test data.\relax }{table.caption.3}{}}
\newlabel{perfmetrictest@cref}{{[table][2][]2}{[1][4][]4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Analyzing the Performance}{5}{subsection.6.3}\protected@file@percent }
\newlabel{perfmetrictest@cref}{{[table][2][]2}{[1][3][]4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Analyzing the Performance}{4}{subsection.6.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {6.4}Overfitting and Underfitting}{5}{subsection.6.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {6.5}Feature Importance}{5}{subsection.6.5}\protected@file@percent }
\newlabel{fig:featureImportanceDT}{{2(a)}{5}{\relax }{figure.caption.4}{}}
\newlabel{fig:featureImportanceDT@cref}{{[subfigure][1][2]2(a)}{[1][5][]5}}
\newlabel{sub@fig:featureImportanceDT}{{(a)}{5}{\relax }{figure.caption.4}{}}
@@ -65,15 +64,10 @@
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces The feature importance graphs for the Decision Tree model and the Random Forest model.\relax }}{5}{figure.caption.4}\protected@file@percent }
\newlabel{fig:}{{2}{5}{The feature importance graphs for the Decision Tree model and the Random Forest model.\relax }{figure.caption.4}{}}
\newlabel{fig:@cref}{{[figure][2][]2}{[1][5][]5}}
\bibstyle{model1-num-names}
\bibcite{Steinhaus:Mathematical}{1}
\bibcite{Greivenkamp:FieldGuide}{2}
\bibcite{Pedrotti:Introduction}{3}
\bibcite{Davis:ChemWiki}{4}
\@writefile{toc}{\contentsline {subsection}{\numberline {6.5}Feature Importance}{5}{subsection.6.5}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {7}Summary}{5}{section.7}\protected@file@percent }
\ttl@finishall
\@writefile{toc}{\contentsline {section}{\numberline {7}Summary}{6}{section.7}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{References}{6}{section.7}\protected@file@percent }
\newlabel{LastPage}{{}{6}{}{page.6}{}}
\xdef\lastpage@lastpage{6}
\xdef\lastpage@lastpageHy{6}
\gdef \@abspage@last{6}
\newlabel{LastPage}{{}{5}{}{page.5}{}}
\xdef\lastpage@lastpage{5}
\xdef\lastpage@lastpageHy{5}
\gdef \@abspage@last{5}

View File

@@ -1,6 +1,6 @@
# Fdb version 4
["pdflatex"] 1761825977.46345 "/home/jaknyst/Documents/MLPproject/Report/MLPproject.tex" "MLPproject.pdf" "MLPproject" 1761825979.44337 0
"/home/jaknyst/Documents/MLPproject/Report/MLPproject.tex" 1761825974.63055 26288 f774e507a7e6764abdedfc00057099d7 ""
["pdflatex"] 1761826831.99817 "/home/jaknyst/Documents/MLPproject/Report/MLPproject.tex" "MLPproject.pdf" "MLPproject" 1761826834.06411 0
"/home/jaknyst/Documents/MLPproject/Report/MLPproject.tex" 1761826831.67703 25484 b909380ef4186262cc37c215d1d67a9a ""
"/usr/share/texlive/texmf-dist/fonts/enc/dvips/base/8r.enc" 1721433600 4850 80dc9bab7f31fb78a000ccfed0e27cab ""
"/usr/share/texlive/texmf-dist/fonts/map/fontname/texfonts.map" 1577235249 3524 cb3e574dea2d1052e39280babc910dc8 ""
"/usr/share/texlive/texmf-dist/fonts/tfm/adobe/helvetic/phvb7t.tfm" 1136768653 2240 eb56c13537f4d8a0bd3fafc25572b1bd ""
@@ -10,7 +10,6 @@
"/usr/share/texlive/texmf-dist/fonts/tfm/adobe/helvetic/phvro7t.tfm" 1136768653 2772 ab6561c8ff5ee69ff6d5961b9356db5a ""
"/usr/share/texlive/texmf-dist/fonts/tfm/adobe/helvetic/phvro8r.tfm" 1136768653 4964 f223217e5e1f85fa3742fb0480aba9e8 ""
"/usr/share/texlive/texmf-dist/fonts/tfm/adobe/times/ptmb7t.tfm" 1136768653 2172 fd0c924230362ff848a33632ed45dc23 ""
"/usr/share/texlive/texmf-dist/fonts/tfm/adobe/times/ptmb8r.tfm" 1136768653 4524 6bce29db5bc272ba5f332261583fee9c ""
"/usr/share/texlive/texmf-dist/fonts/tfm/adobe/times/ptmr7t.tfm" 1136768653 2124 2601a75482e9426d33db523edf23570a ""
"/usr/share/texlive/texmf-dist/fonts/tfm/adobe/times/ptmr8c.tfm" 1136768653 1352 fa28a7e6d323c65ce7d13d5342ff6be2 ""
"/usr/share/texlive/texmf-dist/fonts/tfm/adobe/times/ptmr8r.tfm" 1136768653 4408 25b74d011a4c66b7f212c0cc3c90061b ""
@@ -24,12 +23,10 @@
"/usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvb8a.pfb" 1136849748 35941 f27169cc74234d5bd5e4cca5abafaabb ""
"/usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvr8a.pfb" 1136849748 44648 23115b2a545ebfe2c526c3ca99db8b95 ""
"/usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvro8a.pfb" 1136849748 48169 b4fd9d908b9ee8c65d4305ad39071c5e ""
"/usr/share/texlive/texmf-dist/fonts/type1/urw/times/utmb8a.pfb" 1136849748 44729 811d6c62865936705a31c797a1d5dada ""
"/usr/share/texlive/texmf-dist/fonts/type1/urw/times/utmr8a.pfb" 1136849748 46026 6dab18b61c907687b520c72847215a68 ""
"/usr/share/texlive/texmf-dist/fonts/vf/adobe/helvetic/phvb7t.vf" 1136768653 1372 1c26b449eb4a1a0bcf6ac7cfe376d450 ""
"/usr/share/texlive/texmf-dist/fonts/vf/adobe/helvetic/phvr7t.vf" 1136768653 1372 dc841a9f00a1a11b1443367ae6c5588e ""
"/usr/share/texlive/texmf-dist/fonts/vf/adobe/helvetic/phvro7t.vf" 1136768653 1372 9948cedecdb0445a3b5cf1b8a8082ab8 ""
"/usr/share/texlive/texmf-dist/fonts/vf/adobe/times/ptmb7t.vf" 1136768653 1372 788387fea833ef5963f4c5bffe33eb89 ""
"/usr/share/texlive/texmf-dist/fonts/vf/adobe/times/ptmr7t.vf" 1136768653 1380 0ea3a3370054be6da6acd929ec569f06 ""
"/usr/share/texlive/texmf-dist/fonts/vf/adobe/times/ptmr8c.vf" 1136768653 3556 8a9a6dcbcd146ef985683f677f4758a6 ""
"/usr/share/texlive/texmf-dist/tex/context/base/mkii/supp-pdf.mkii" 1721433600 71627 94eb9990bed73c364d7f53f960cc8c5b ""
@@ -134,10 +131,10 @@
"/var/lib/texmf/web2c/pdftex/pdflatex.fmt" 1760289849 7753794 892d611f76aecccd13eb485815d0543e ""
"CM_dt.png" 1761561428.73434 87433 ef7840e96e2e4e7d41f9d29d01517aa6 ""
"CM_rf.png" 1761561428.73495 88928 d3d0474bb68254ae0bba2e635ab99231 ""
"MLPproject.aux" 1761825979.2759 6515 6007d19cad9448bda42bad5839bdf8e5 "pdflatex"
"MLPproject.out" 1761825979.2779 3852 caf78b736930e985ead6848b8c813653 "pdflatex"
"MLPproject.tex" 1761825974.63055 26288 f774e507a7e6764abdedfc00057099d7 ""
"MLPproject.toc" 1761825979.27929 1917 6fbe84ece41f0003e5c72512bad2b6c9 "pdflatex"
"MLPproject.aux" 1761826833.91004 6260 0b50cad3e5dbdb87ae5a15918247acfb "pdflatex"
"MLPproject.out" 1761826833.91204 3758 71b69fe4b092934da11f6db2b9fc27bd "pdflatex"
"MLPproject.tex" 1761826831.67703 25484 b909380ef4186262cc37c215d1d67a9a ""
"MLPproject.toc" 1761826833.91325 1866 d9b68267ee2cdd579efd7907196d8c4b "pdflatex"
"SelfArx.cls" 1761123180.54708 7316 506603b27aab6da8087bc0f1ee693041 ""
"featureImportanceDT.png" 1761328898.24566 60078 4a2e56e2a45ae2ae5e41b9830c1bbcea ""
"featureImportanceRF.png" 1761328962.51602 61794 6b3eefc625dd3da8a3dbf302174c614c ""

View File

@@ -1043,15 +1043,10 @@ INPUT ./featureImportanceRF.png
INPUT featureImportanceRF.png
INPUT ./featureImportanceRF.png
INPUT ./featureImportanceRF.png
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/adobe/times/ptmr7t.tfm
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/adobe/times/ptmr7t.tfm
INPUT /usr/share/texlive/texmf-dist/fonts/vf/adobe/times/ptmb7t.vf
INPUT /usr/share/texlive/texmf-dist/fonts/tfm/adobe/times/ptmb8r.tfm
INPUT MLPproject.aux
INPUT ./MLPproject.out
INPUT ./MLPproject.out
INPUT /usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvb8a.pfb
INPUT /usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvr8a.pfb
INPUT /usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvro8a.pfb
INPUT /usr/share/texlive/texmf-dist/fonts/type1/urw/times/utmb8a.pfb
INPUT /usr/share/texlive/texmf-dist/fonts/type1/urw/times/utmr8a.pfb

View File

@@ -1,4 +1,4 @@
This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2025.10.12) 30 OCT 2025 13:06
This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2025.10.12) 30 OCT 2025 13:20
entering extended mode
restricted \write18 enabled.
file:line:error style messages enabled.
@@ -689,76 +689,53 @@ File: ts1ptm.fd 2001/06/04 font definitions for TS1/ptm.
] [2]
<CM_dt.png, id=150, 462.528pt x 346.896pt>
<CM_dt.png, id=145, 462.528pt x 346.896pt>
File: CM_dt.png Graphic file (type png)
<use CM_dt.png>
Package pdftex.def Info: CM_dt.png used on input line 118.
(pdftex.def) Requested size: 242.41745pt x 181.81612pt.
<CM_rf.png, id=152, 462.528pt x 346.896pt>
Package pdftex.def Info: CM_dt.png used on input line 112.
(pdftex.def) Requested size: 230.29584pt x 172.7224pt.
<CM_rf.png, id=147, 462.528pt x 346.896pt>
File: CM_rf.png Graphic file (type png)
<use CM_rf.png>
Package pdftex.def Info: CM_rf.png used on input line 125.
(pdftex.def) Requested size: 242.41745pt x 181.81612pt.
[3] [4 <./CM_dt.png> <./CM_rf.png>]
<featureImportanceDT.png, id=173, 416.2752pt x 393.8715pt>
Package pdftex.def Info: CM_rf.png used on input line 119.
(pdftex.def) Requested size: 230.29584pt x 172.7224pt.
LaTeX Warning: `!h' float specifier changed to `!ht'.
[3] [4 <./CM_dt.png> <./CM_rf.png>]
<featureImportanceDT.png, id=169, 416.2752pt x 393.8715pt>
File: featureImportanceDT.png Graphic file (type png)
<use featureImportanceDT.png>
Package pdftex.def Info: featureImportanceDT.png used on input line 182.
(pdftex.def) Requested size: 218.17422pt x 206.43103pt.
<featureImportanceRF.png, id=174, 422.0568pt x 393.8715pt>
Package pdftex.def Info: featureImportanceDT.png used on input line 179.
(pdftex.def) Requested size: 206.0563pt x 194.96999pt.
<featureImportanceRF.png, id=170, 422.0568pt x 393.8715pt>
File: featureImportanceRF.png Graphic file (type png)
<use featureImportanceRF.png>
Package pdftex.def Info: featureImportanceRF.png used on input line 189.
(pdftex.def) Requested size: 218.17422pt x 203.60634pt.
Package pdftex.def Info: featureImportanceRF.png used on input line 186.
(pdftex.def) Requested size: 206.0563pt x 192.29555pt.
[5 <./featureImportanceDT.png> <./featureImportanceRF.png>]
Underfull \hbox (badness 1448) in paragraph at lines 225--229
[]\OT1/ptm/m/n/10 (+20) UC Davis ChemWiki, Prop-a-ga-tion of Er-ror, Avail-
[]
Underfull \hbox (badness 7649) in paragraph at lines 225--229
\OT1/ptm/m/n/10 (+20) able at: [][]$https : / / chem . libretexts . org / Textbook[]Maps /
[]
Underfull \hbox (badness 10000) in paragraph at lines 225--229
\OT1/ptm/m/n/10 (+20) Analytical[]Chemistry / Supplemental[]Modules[]
[]
Underfull \hbox (badness 10000) in paragraph at lines 225--229
\OT1/ptm/m/n/10 (+20) (Analytical[]Chemistry ) /Quantifying[]Nature /
[]
Underfull \hbox (badness 10000) in paragraph at lines 225--229
\OT1/ptm/m/n/10 (+20) Signi^^Lcant[]Digits / Propagation[]of[]Error$[][], (Ac-cessed:
[]
[6
]
enddocument/afterlastpage: lastpage setting LastPage.
(./MLPproject.aux)
LaTeX Warning: There were multiply-defined labels.
Package rerunfilecheck Info: File `MLPproject.out' has not changed.
(rerunfilecheck) Checksum: CAF78B736930E985EAD6848B8C813653;3852.
(rerunfilecheck) Checksum: 71B69FE4B092934DA11F6DB2B9FC27BD;3758.
)
Here is how much of TeX's memory you used:
19092 strings out of 476041
322688 string characters out of 5793173
19063 strings out of 476041
322261 string characters out of 5793173
1878388 words of memory out of 6000000
38919 multiletter control sequences out of 15000+600000
571741 words of font info for 304 fonts, out of 8000000 for 9000
38908 multiletter control sequences out of 15000+600000
569282 words of font info for 295 fonts, out of 8000000 for 9000
1137 hyphenation exceptions out of 8191
75i,13n,77p,1644b,605s stack positions out of 10000i,1000n,20000p,200000b,200000s
</usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvb8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvr8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvro8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/times/utmb8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/times/utmr8a.pfb>
Output written on MLPproject.pdf (6 pages, 301321 bytes).
75i,12n,77p,1644b,605s stack positions out of 10000i,1000n,20000p,200000b,200000s
</usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvb8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvr8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/helvetic/uhvro8a.pfb></usr/share/texlive/texmf-dist/fonts/type1/urw/times/utmr8a.pfb>
Output written on MLPproject.pdf (5 pages, 290735 bytes).
PDF statistics:
228 PDF objects out of 1000 (max. 8388607)
178 compressed objects within 2 object streams
36 named destinations out of 1000 (max. 500000)
98501 words of extra memory for PDF output out of 106986 (max. 10000000)
205 PDF objects out of 1000 (max. 8388607)
159 compressed objects within 2 object streams
31 named destinations out of 1000 (max. 500000)
92349 words of extra memory for PDF output out of 106986 (max. 10000000)

View File

@@ -19,4 +19,3 @@
\BOOKMARK [2][-]{subsection.6.4}{\376\377\000O\000v\000e\000r\000f\000i\000t\000t\000i\000n\000g\000\040\000a\000n\000d\000\040\000U\000n\000d\000e\000r\000f\000i\000t\000t\000i\000n\000g}{section.6}% 19
\BOOKMARK [2][-]{subsection.6.5}{\376\377\000F\000e\000a\000t\000u\000r\000e\000\040\000I\000m\000p\000o\000r\000t\000a\000n\000c\000e}{section.6}% 20
\BOOKMARK [1][-]{section.7}{\376\377\000S\000u\000m\000m\000a\000r\000y}{}% 21
\BOOKMARK [1][-]{section.7}{\376\377\000R\000e\000f\000e\000r\000e\000n\000c\000e\000s}{}% 22

Binary file not shown.

Binary file not shown.

View File

@@ -105,24 +105,18 @@ When performing the hyperparameter tuning, we started out with a rough grid to g
\subsection{Caveats and restrictions}
Although the validation results produced from the script are quite promising there are a couple of important notes to make, not only to better understand the final models but also to avoid pitfalls in potential future projects. Firstly, in our script we decided to not use any standardization as this is a sort of unique case where the models used do not require it. However, it's extremely important to understand that if we were to introduce another model, we would need to standardize the data to ensure that the features contribute equally. Secondly, there are more hyperparameters that one might want to consider as we only used a few of them. The problem with expanding the number of hyperparameters in the grid is that it will exponentially increase the computational load. Therefore we picked a few that we thought were most important. Continuing, the scoring metric used is not always the best choice. We used accuracy, meaning the model tries to correctly label as many datapoints as possible and does not care about keeping a similiar precision for both labels. Our goal of this project is somewhat arbitrary, we mainly want to train and compare models. However if such a model were to be used in a real world application, one might want to change the scoring to better adapt the model to the problem at hand. % Elaborate... Secondly, there are more hyperparameters that one might want to consider... Continuing, the scoring metric used is not always the best choice. In fact, the scoring metric one should use is highly dependent on what one's goal is...
\section{Model Evaluations}
There are two interesting parts to look at after our analysis. One part is to analyze how well the actual models performed and compare the difference between the two models we have chosen to study. We fine tuned our models using the validation part of the data. After running it on the test data we can see how well it actually performs. A great way to get a quick overview of how well a model classifies is to look at the confusion matrix.
\subsection{Analyzing the Confusion Matricies}
\begin{figure}[!hptb]
\begin{figure}[!h]
\centering
\begin{subfigure}[b]{\columnwidth}
\centering
\includegraphics[width=\textwidth]{CM_dt.png}
\includegraphics[width=0.95\textwidth]{CM_dt.png}
\caption{}
\label{fig:featureImportanceDT}
\end{subfigure}
\hfill
\begin{subfigure}[b]{\columnwidth}
\centering
\includegraphics[width=\textwidth]{CM_rf.png}
\includegraphics[width=0.95\textwidth]{CM_rf.png}
\caption{}
\label{fig:featureImportanceRF}
\end{subfigure}
@@ -130,6 +124,13 @@ There are two interesting parts to look at after our analysis. One part is to an
\label{fig:}
\end{figure}
\section{Model Evaluations}
There are two interesting parts to look at after our analysis. One part is to analyze how well the actual models performed and compare the difference between the two models we have chosen to study. We fine tuned our models using the validation part of the data. After running it on the test data we can see how well it actually performs. A great way to get a quick overview of how well a model classifies is to look at the confusion matrix.
\subsection{Analyzing the Confusion Matricies}
As we can see in the confusion matricies there is not that big of a difference between the models. Both did an overall good job at identifying the two classes. There is a difference in how well the models did in identifying the two different classes. Overall they performed a lot better at classifying the poor people than the rich. We can see that for the both models are pretty good at classifying the poor class and worse at the rich class. The Random forest model is slightly better than the Decision Tree. This is a very interesting result and maybe not so weird as it first seems. There were a lot more poor people in our training data set than rich people. This would of course train our model to be better at classifying the poor. As well as looking at the classification matricies it is interesting to look at the actual performance metrics that can be calculated from the matricies.
\subsection{Analyzing Weighted Performance Metrics}
We want to analyze to sets of metrics. First we have the validaton Metrics. These metrics can be seen in table(\ref{perfmetric}). Then we have the actual test metrics which is the result from our model. These can be seen in table(\ref{perfmetrictest}). Of note is that all of these metrics are calculated as weighted metrics which means that they account for the class imbalances seen in the confusion matrcies.
@@ -171,20 +172,16 @@ An important thing to touch on is the poor fit on rich people by our model. We s
\subsection{Overfitting and Underfitting}
We spent some time tuning the hyperparameters to ensure that we did not overfit. If we compare the validation results with the test results we see that the performance metrics do not change much at all. This is what we want to see as this means that we have avoidede overfitting the model. This means that our model could be used on other similar datasets and hopefully give similar perfomances. We also do not want our model to be underfit. This is a bit harder to validate as we want the errors to be as small as possible for both training and testing and as we stated before I believe that this is a difficult dataaset to get a great fit to. Therefore we believe that we have found a model that has a decent enough balance between bias and variance.
\subsection{Feature Importance}
Taking a closer look at the feature importance graphs of the two models we notice an interesting difference. The Decision tree which is only one tree focuses has only a few main features where one is the most important. The rest are not used that much or almost not at all. The Random Forest uses a far wider range of features. They also rank the features a bit differently and the best feature for one model is not the best for the other. We considered removing the worst performing features to see if it would make a difference in the performanes. But since they have diffrent worst performing features we reasoned that to keep the comparison as fair as possible it would be more interesting to leave the features as is.
\begin{figure}[!hptb]
\centering
\begin{subfigure}[b]{0.9\columnwidth}
\begin{subfigure}[b]{0.85\columnwidth}
\centering
\includegraphics[width=\textwidth]{featureImportanceDT.png}
\caption{}
\label{fig:featureImportanceDT}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.9\columnwidth}
\begin{subfigure}[b]{0.85\columnwidth}
\centering
\includegraphics[width=\textwidth]{featureImportanceRF.png}
\caption{}
@@ -194,39 +191,16 @@ Taking a closer look at the feature importance graphs of the two models we notic
\label{fig:}
\end{figure}
\subsection{Feature Importance}
Taking a closer look at the feature importance graphs of the two models we notice an interesting difference. The Decision tree which is only one tree focuses has only a few main features where one is the most important. The rest are not used that much or almost not at all. The Random Forest uses a far wider range of features. They also rank the features a bit differently and the best feature for one model is not the best for the other. We considered removing the worst performing features to see if it would make a difference in the performanes. But since they have diffrent worst performing features we reasoned that to keep the comparison as fair as possible it would be more interesting to leave the features as is.
\section{Summary}
We have succesfully trained two different but similar machine learning models on classifying the monetary status of people based on a bunch of different features. While some trade offs where made in regards to which features where kept and to what we optimized the model for. We still managed to get a respectable result especially regarding the difficult type of data that we had to work with.
%---------
% REFERENCE LIST
%----------------------------
\bibliographystyle{model1-num-names}
\begin{thebibliography}{4}
\bibitem{Steinhaus:Mathematical}
Steinhaus, H.,
Mathematical Snapshots,
3rd Edition. New York: Dover, pp. 93-94,
(1999)
\bibitem{Greivenkamp:FieldGuide}
Greivenkamp,
J. E., Field Guide to Geometrical Optics,
SPIE Press,
Bellingham, WA,
(2004)
\bibitem{Pedrotti:Introduction}
Pedrotti, F.L. and Pedrotti, L.S.,
Introduction to Optics,
3rd Edition,
Addison-Wesley,
(2006)
\bibitem{Davis:ChemWiki}
UC Davis ChemWiki,
Propagation of Error,
Available at: \url{https://chem.libretexts.org/Textbook_Maps/Analytical_Chemistry/Supplemental_Modules_(Analytical_Chemistry)/Quantifying_Nature/Significant_Digits/Propagation_of_Error},
(Accessed: 10th March 2016).
\end{thebibliography}

View File

@@ -16,9 +16,8 @@
\contentsline {section}{\numberline {6}Model Evaluations}{3}{section.6}%
\contentsline {subsection}{\numberline {6.1}Analyzing the Confusion Matricies}{3}{subsection.6.1}%
\contentsline {subsection}{\numberline {6.2}Analyzing Weighted Performance Metrics}{4}{subsection.6.2}%
\contentsline {subsection}{\numberline {6.3}Analyzing the Performance}{5}{subsection.6.3}%
\contentsline {subsection}{\numberline {6.3}Analyzing the Performance}{4}{subsection.6.3}%
\contentsline {subsection}{\numberline {6.4}Overfitting and Underfitting}{5}{subsection.6.4}%
\contentsline {subsection}{\numberline {6.5}Feature Importance}{5}{subsection.6.5}%
\contentsline {section}{\numberline {7}Summary}{5}{section.7}%
\contentsfinish
\contentsline {section}{\numberline {7}Summary}{6}{section.7}%
\contentsline {section}{References}{6}{section.7}%