{"id":430,"date":"2024-02-29T14:40:25","date_gmt":"2024-02-29T19:40:25","guid":{"rendered":"https:\/\/pressbooks.bccampus.ca\/businessanalytics\/chapter\/preprocessing-scaling-and-dummies\/"},"modified":"2024-02-29T14:40:53","modified_gmt":"2024-02-29T19:40:53","slug":"preprocessing-scaling-and-dummies","status":"publish","type":"chapter","link":"https:\/\/pressbooks.bccampus.ca\/businessanalytics\/chapter\/preprocessing-scaling-and-dummies\/","title":{"raw":"Preprocessing Scaling and Dummies","rendered":"Preprocessing Scaling and Dummies"},"content":{"raw":"<div class=\"jp-Cell jp-MarkdownCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\"><\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput\" data-mime-type=\"text\/markdown\">\r\n<h2 id=\"Preprocessing:-Scaling-and-Dummies\">Preprocessing: Scaling and Dummies<a class=\"anchor-link\" href=\"-Scaling-and-Dummies\">\u00b6<\/a><\/h2>\r\nWe will do a few things:\r\n<ul>\r\n \t<li>Open the dataset<\/li>\r\n \t<li>Scale what needs scaling<\/li>\r\n \t<li>Dummy Variables<\/li>\r\n \t<li>Save as a new file<\/li>\r\n<\/ul>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[1]:<\/div>\r\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\r\n<div class=\"CodeMirror cm-s-jupyter\">\r\n<div class=\"highlight hl-ipython3\">\r\n<pre><span class=\"c1\">## Import some libraries<\/span>\r\n<span class=\"c1\">#from sklearn import datasets<\/span>\r\n\r\n<span class=\"c1\">#from sklearn.metrics import accuracy_score<\/span>\r\n<span class=\"c1\">#from sklearn.metrics import silhouette_score<\/span>\r\n<span class=\"c1\">#from sklearn.cluster import KMeans<\/span>\r\n<span class=\"c1\">#from sklearn.cluster import AgglomerativeClustering <\/span>\r\n<span class=\"c1\">#import numpy as np<\/span>\r\n<span class=\"c1\">#import matplotlib.pyplot as plt<\/span>\r\n<span class=\"c1\">#from scipy import stats<\/span>\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">pandas<\/span> \r\n\r\n<span class=\"c1\">#%matplotlib inline<\/span>\r\n\r\n\r\n<span class=\"c1\">## Start with packages<\/span>\r\n\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">numpy<\/span> <span class=\"k\">as<\/span> <span class=\"nn\">np<\/span>\r\n<span class=\"c1\">#import pandas<\/span>\r\n<span class=\"c1\">## this lets me datestamp files: a thing that is really nice<\/span>\r\n<span class=\"kn\">from<\/span> <span class=\"nn\">datetime<\/span> <span class=\"kn\">import<\/span> <span class=\"n\">date<\/span>\r\n\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">os<\/span>\r\n<\/pre>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[2]:<\/div>\r\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\r\n<div class=\"CodeMirror cm-s-jupyter\">\r\n<div class=\"highlight hl-ipython3\">\r\n<pre><span class=\"c1\">## We are also going to read in the student surey we looked at earlier.<\/span>\r\n<span class=\"c1\">## I'm using a more current version, because you guys filled it out.<\/span>\r\n\r\n<span class=\"n\">survey<\/span> <span class=\"o\">=<\/span> <span class=\"n\">pandas<\/span><span class=\"o\">.<\/span><span class=\"n\">read_csv<\/span><span class=\"p\">(<\/span><span class=\"s2\">\"data\\Clean_Survey_2023-01-26.csv\"<\/span><span class=\"p\">)<\/span> \r\n<span class=\"n\">survey<\/span><span class=\"o\">.<\/span><span class=\"n\">describe<\/span><span class=\"p\">()<\/span><span class=\"o\">.<\/span><span class=\"n\">round<\/span><span class=\"p\">(<\/span><span class=\"mi\">2<\/span><span class=\"p\">)<\/span>\r\n<\/pre>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell-outputWrapper\">\r\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\r\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\r\n<div class=\"jp-OutputArea-child\">\r\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[2]:<\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\r\n<div>\r\n\r\n.dataframe tbody tr th:only-of-type {\r\nvertical-align: middle;\r\n}\r\n\r\n.dataframe tbody tr th {\r\nvertical-align: top;\r\n}\r\n\r\n.dataframe thead th {\r\ntext-align: right;\r\n}\r\n<table class=\"dataframe\" border=\"1\">\r\n<thead>\r\n<tr>\r\n<th><\/th>\r\n<th>Age<\/th>\r\n<th>Height<\/th>\r\n<th>Weight.<\/th>\r\n<th>Old_Salary<\/th>\r\n<th>Expected_Salary<\/th>\r\n<th>Expenses<\/th>\r\n<th>Transportation_Cost<\/th>\r\n<th>Entertainment<\/th>\r\n<th>Cellphone_Cost<\/th>\r\n<th>Footsize<\/th>\r\n<th>Alcohol<\/th>\r\n<th>Sleep<\/th>\r\n<th>Social_Network<\/th>\r\n<th>Homework<\/th>\r\n<th>Work<\/th>\r\n<th>Coffee<\/th>\r\n<\/tr>\r\n<\/thead>\r\n<tbody>\r\n<tr>\r\n<th>count<\/th>\r\n<td>164.00<\/td>\r\n<td>154.00<\/td>\r\n<td>161.00<\/td>\r\n<td>156.00<\/td>\r\n<td>146.00<\/td>\r\n<td>154.00<\/td>\r\n<td>144.00<\/td>\r\n<td>157.00<\/td>\r\n<td>160.00<\/td>\r\n<td>145.00<\/td>\r\n<td>139.00<\/td>\r\n<td>163.00<\/td>\r\n<td>150.00<\/td>\r\n<td>155.00<\/td>\r\n<td>159.00<\/td>\r\n<td>158.00<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>mean<\/th>\r\n<td>27.90<\/td>\r\n<td>171.12<\/td>\r\n<td>77.88<\/td>\r\n<td>2659.50<\/td>\r\n<td>4566.69<\/td>\r\n<td>1246.88<\/td>\r\n<td>80.65<\/td>\r\n<td>181.85<\/td>\r\n<td>64.60<\/td>\r\n<td>91.72<\/td>\r\n<td>1.16<\/td>\r\n<td>8.49<\/td>\r\n<td>7.77<\/td>\r\n<td>14.34<\/td>\r\n<td>21.64<\/td>\r\n<td>1.18<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>std<\/th>\r\n<td>8.16<\/td>\r\n<td>10.94<\/td>\r\n<td>42.60<\/td>\r\n<td>1806.06<\/td>\r\n<td>1518.18<\/td>\r\n<td>983.81<\/td>\r\n<td>74.80<\/td>\r\n<td>160.52<\/td>\r\n<td>25.46<\/td>\r\n<td>859.31<\/td>\r\n<td>1.47<\/td>\r\n<td>9.91<\/td>\r\n<td>6.09<\/td>\r\n<td>11.79<\/td>\r\n<td>17.66<\/td>\r\n<td>1.00<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>min<\/th>\r\n<td>10.00<\/td>\r\n<td>147.00<\/td>\r\n<td>43.00<\/td>\r\n<td>0.00<\/td>\r\n<td>0.00<\/td>\r\n<td>0.00<\/td>\r\n<td>0.00<\/td>\r\n<td>0.00<\/td>\r\n<td>0.00<\/td>\r\n<td>4.00<\/td>\r\n<td>0.00<\/td>\r\n<td>4.00<\/td>\r\n<td>0.00<\/td>\r\n<td>0.00<\/td>\r\n<td>0.00<\/td>\r\n<td>0.00<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>25%<\/th>\r\n<td>24.00<\/td>\r\n<td>164.00<\/td>\r\n<td>60.00<\/td>\r\n<td>1400.00<\/td>\r\n<td>3850.00<\/td>\r\n<td>400.00<\/td>\r\n<td>28.00<\/td>\r\n<td>50.00<\/td>\r\n<td>50.00<\/td>\r\n<td>9.00<\/td>\r\n<td>0.00<\/td>\r\n<td>6.00<\/td>\r\n<td>2.00<\/td>\r\n<td>5.00<\/td>\r\n<td>3.00<\/td>\r\n<td>0.31<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>50%<\/th>\r\n<td>26.00<\/td>\r\n<td>170.00<\/td>\r\n<td>70.00<\/td>\r\n<td>2700.00<\/td>\r\n<td>4500.00<\/td>\r\n<td>1000.00<\/td>\r\n<td>50.00<\/td>\r\n<td>100.00<\/td>\r\n<td>60.00<\/td>\r\n<td>9.88<\/td>\r\n<td>0.00<\/td>\r\n<td>7.00<\/td>\r\n<td>7.00<\/td>\r\n<td>10.00<\/td>\r\n<td>20.00<\/td>\r\n<td>1.00<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>75%<\/th>\r\n<td>30.00<\/td>\r\n<td>179.75<\/td>\r\n<td>82.00<\/td>\r\n<td>4000.00<\/td>\r\n<td>5400.00<\/td>\r\n<td>2000.00<\/td>\r\n<td>100.00<\/td>\r\n<td>250.00<\/td>\r\n<td>80.00<\/td>\r\n<td>10.43<\/td>\r\n<td>2.00<\/td>\r\n<td>8.00<\/td>\r\n<td>10.00<\/td>\r\n<td>22.00<\/td>\r\n<td>40.00<\/td>\r\n<td>2.00<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>max<\/th>\r\n<td>90.00<\/td>\r\n<td>220.00<\/td>\r\n<td>506.00<\/td>\r\n<td>8000.00<\/td>\r\n<td>8600.00<\/td>\r\n<td>4000.00<\/td>\r\n<td>300.00<\/td>\r\n<td>600.00<\/td>\r\n<td>140.00<\/td>\r\n<td>10316.00<\/td>\r\n<td>5.00<\/td>\r\n<td>78.00<\/td>\r\n<td>25.00<\/td>\r\n<td>46.00<\/td>\r\n<td>60.00<\/td>\r\n<td>4.00<\/td>\r\n<\/tr>\r\n<\/tbody>\r\n<\/table>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[3]:<\/div>\r\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\r\n<div class=\"CodeMirror cm-s-jupyter\">\r\n<div class=\"highlight hl-ipython3\">\r\n<pre><span class=\"n\">survey<\/span><span class=\"o\">.<\/span><span class=\"n\">head<\/span><span class=\"p\">()<\/span>\r\n<\/pre>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell-outputWrapper\">\r\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\r\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\r\n<div class=\"jp-OutputArea-child\">\r\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[3]:<\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\r\n<div>\r\n\r\n.dataframe tbody tr th:only-of-type {\r\nvertical-align: middle;\r\n}\r\n\r\n.dataframe tbody tr th {\r\nvertical-align: top;\r\n}\r\n\r\n.dataframe thead th {\r\ntext-align: right;\r\n}\r\n<table class=\"dataframe\" border=\"1\">\r\n<thead>\r\n<tr>\r\n<th><\/th>\r\n<th>Timestamp<\/th>\r\n<th>program<\/th>\r\n<th>Gender<\/th>\r\n<th>Age<\/th>\r\n<th>Height<\/th>\r\n<th>Weight.<\/th>\r\n<th>Old_Salary<\/th>\r\n<th>Expected_Salary<\/th>\r\n<th>Organization<\/th>\r\n<th>Live_parents<\/th>\r\n<th>...<\/th>\r\n<th>Footsize<\/th>\r\n<th>Alcohol<\/th>\r\n<th>Tattoo<\/th>\r\n<th>Sleep<\/th>\r\n<th>Social_Network<\/th>\r\n<th>Homework<\/th>\r\n<th>Work<\/th>\r\n<th>Travel<\/th>\r\n<th>Tuition<\/th>\r\n<th>Coffee<\/th>\r\n<\/tr>\r\n<\/thead>\r\n<tbody>\r\n<tr>\r\n<th>0<\/th>\r\n<td>2019\/02\/04 11:26:35 am GMT-8<\/td>\r\n<td>HRMG<\/td>\r\n<td>Female<\/td>\r\n<td>22.0<\/td>\r\n<td>177.80<\/td>\r\n<td>110.0<\/td>\r\n<td>1400.0<\/td>\r\n<td>4000.0<\/td>\r\n<td>3. Non-profit or Government Agency<\/td>\r\n<td>No<\/td>\r\n<td>...<\/td>\r\n<td>11.12<\/td>\r\n<td>0.0<\/td>\r\n<td>Yes<\/td>\r\n<td>8.0<\/td>\r\n<td>10.0<\/td>\r\n<td>10.0<\/td>\r\n<td>0.0<\/td>\r\n<td>Yes<\/td>\r\n<td>No<\/td>\r\n<td>0.0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>1<\/th>\r\n<td>2019\/02\/04 11:27:45 am GMT-8<\/td>\r\n<td>HRMG<\/td>\r\n<td>Female<\/td>\r\n<td>24.0<\/td>\r\n<td>175.00<\/td>\r\n<td>63.0<\/td>\r\n<td>2000.0<\/td>\r\n<td>3750.0<\/td>\r\n<td>2. Small Company<\/td>\r\n<td>Yes<\/td>\r\n<td>...<\/td>\r\n<td>9.50<\/td>\r\n<td>0.0<\/td>\r\n<td>Yes<\/td>\r\n<td>8.0<\/td>\r\n<td>7.0<\/td>\r\n<td>20.0<\/td>\r\n<td>0.0<\/td>\r\n<td>Yes<\/td>\r\n<td>Yes<\/td>\r\n<td>2.0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>2<\/th>\r\n<td>2019\/02\/04 11:27:46 am GMT-8<\/td>\r\n<td>HRMG<\/td>\r\n<td>Male<\/td>\r\n<td>24.0<\/td>\r\n<td>182.88<\/td>\r\n<td>100.0<\/td>\r\n<td>2000.0<\/td>\r\n<td>3500.0<\/td>\r\n<td>2. Small Company<\/td>\r\n<td>Yes<\/td>\r\n<td>...<\/td>\r\n<td>10.50<\/td>\r\n<td>1.0<\/td>\r\n<td>No<\/td>\r\n<td>6.0<\/td>\r\n<td>20.0<\/td>\r\n<td>20.0<\/td>\r\n<td>8.0<\/td>\r\n<td>Yes<\/td>\r\n<td>No<\/td>\r\n<td>0.0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>3<\/th>\r\n<td>2019\/02\/04 11:29:06 am GMT-8<\/td>\r\n<td>HRMG<\/td>\r\n<td>Female<\/td>\r\n<td>34.0<\/td>\r\n<td>NaN<\/td>\r\n<td>61.3<\/td>\r\n<td>2500.0<\/td>\r\n<td>3500.0<\/td>\r\n<td>1. Large Corporation<\/td>\r\n<td>Yes<\/td>\r\n<td>...<\/td>\r\n<td>7.00<\/td>\r\n<td>0.0<\/td>\r\n<td>Yes<\/td>\r\n<td>78.0<\/td>\r\n<td>14.0<\/td>\r\n<td>20.0<\/td>\r\n<td>0.0<\/td>\r\n<td>Yes<\/td>\r\n<td>No<\/td>\r\n<td>1.0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>4<\/th>\r\n<td>2019\/02\/04 11:29:12 am GMT-8<\/td>\r\n<td>HRMG<\/td>\r\n<td>Female<\/td>\r\n<td>24.0<\/td>\r\n<td>160.00<\/td>\r\n<td>54.0<\/td>\r\n<td>0.0<\/td>\r\n<td>3800.0<\/td>\r\n<td>2. Small Company<\/td>\r\n<td>Yes<\/td>\r\n<td>...<\/td>\r\n<td>8.00<\/td>\r\n<td>3.0<\/td>\r\n<td>No<\/td>\r\n<td>67.0<\/td>\r\n<td>6.0<\/td>\r\n<td>11.0<\/td>\r\n<td>0.0<\/td>\r\n<td>Yes<\/td>\r\n<td>Yes<\/td>\r\n<td>1.0<\/td>\r\n<\/tr>\r\n<\/tbody>\r\n<\/table>\r\n5 rows \u00d7 27 columns\r\n\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[4]:<\/div>\r\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\r\n<div class=\"CodeMirror cm-s-jupyter\">\r\n<div class=\"highlight hl-ipython3\">\r\n<pre><span class=\"c1\">### We will deal with a smaller version of this dataset:<\/span>\r\n\r\n<span class=\"c1\">##df stands for data.frame -&gt; the pandas structure<\/span>\r\n<span class=\"c1\">##.dropna() will get rid of blanks, for today.<\/span>\r\n\r\n<span class=\"n\">df<\/span> <span class=\"o\">=<\/span> <span class=\"n\">survey<\/span><span class=\"p\">[[<\/span><span class=\"s2\">\"Age\"<\/span><span class=\"p\">,<\/span> <span class=\"s2\">\"Height\"<\/span><span class=\"p\">,<\/span> <span class=\"s2\">\"Coffee\"<\/span><span class=\"p\">,<\/span> <span class=\"s2\">\"Tattoo\"<\/span><span class=\"p\">]]<\/span><span class=\"o\">.<\/span><span class=\"n\">dropna<\/span><span class=\"p\">()<\/span>\r\n\r\n<span class=\"n\">df<\/span><span class=\"o\">.<\/span><span class=\"n\">head<\/span><span class=\"p\">()<\/span>\r\n<\/pre>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell-outputWrapper\">\r\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\r\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\r\n<div class=\"jp-OutputArea-child\">\r\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[4]:<\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\r\n<div>\r\n\r\n.dataframe tbody tr th:only-of-type {\r\nvertical-align: middle;\r\n}\r\n\r\n.dataframe tbody tr th {\r\nvertical-align: top;\r\n}\r\n\r\n.dataframe thead th {\r\ntext-align: right;\r\n}\r\n<table class=\"dataframe\" border=\"1\">\r\n<thead>\r\n<tr>\r\n<th><\/th>\r\n<th>Age<\/th>\r\n<th>Height<\/th>\r\n<th>Coffee<\/th>\r\n<th>Tattoo<\/th>\r\n<\/tr>\r\n<\/thead>\r\n<tbody>\r\n<tr>\r\n<th>0<\/th>\r\n<td>22.0<\/td>\r\n<td>177.80<\/td>\r\n<td>0.0<\/td>\r\n<td>Yes<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>1<\/th>\r\n<td>24.0<\/td>\r\n<td>175.00<\/td>\r\n<td>2.0<\/td>\r\n<td>Yes<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>2<\/th>\r\n<td>24.0<\/td>\r\n<td>182.88<\/td>\r\n<td>0.0<\/td>\r\n<td>No<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>4<\/th>\r\n<td>24.0<\/td>\r\n<td>160.00<\/td>\r\n<td>1.0<\/td>\r\n<td>No<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>5<\/th>\r\n<td>24.0<\/td>\r\n<td>164.00<\/td>\r\n<td>0.0<\/td>\r\n<td>No<\/td>\r\n<\/tr>\r\n<\/tbody>\r\n<\/table>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-MarkdownCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\"><\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput\" data-mime-type=\"text\/markdown\">\r\n<h2 id=\"Look-at-some-Summary-stats\">Look at some Summary stats<a class=\"anchor-link\" href=\"#Look-at-some-Summary-stats\">\u00b6<\/a><\/h2>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[5]:<\/div>\r\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\r\n<div class=\"CodeMirror cm-s-jupyter\">\r\n<div class=\"highlight hl-ipython3\">\r\n<pre><span class=\"c1\">##Let's look at our summary statistics!<\/span>\r\n\r\n<span class=\"n\">df<\/span><span class=\"o\">.<\/span><span class=\"n\">describe<\/span><span class=\"p\">(<\/span><span class=\"n\">include<\/span> <span class=\"o\">=<\/span> <span class=\"s1\">'all'<\/span><span class=\"p\">)<\/span><span class=\"o\">.<\/span><span class=\"n\">round<\/span><span class=\"p\">(<\/span><span class=\"mi\">2<\/span><span class=\"p\">)<\/span>\r\n<\/pre>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell-outputWrapper\">\r\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\r\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\r\n<div class=\"jp-OutputArea-child\">\r\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[5]:<\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\r\n<div>\r\n\r\n.dataframe tbody tr th:only-of-type {\r\nvertical-align: middle;\r\n}\r\n\r\n.dataframe tbody tr th {\r\nvertical-align: top;\r\n}\r\n\r\n.dataframe thead th {\r\ntext-align: right;\r\n}\r\n<table class=\"dataframe\" border=\"1\">\r\n<thead>\r\n<tr>\r\n<th><\/th>\r\n<th>Age<\/th>\r\n<th>Height<\/th>\r\n<th>Coffee<\/th>\r\n<th>Tattoo<\/th>\r\n<\/tr>\r\n<\/thead>\r\n<tbody>\r\n<tr>\r\n<th>count<\/th>\r\n<td>148.00<\/td>\r\n<td>148.00<\/td>\r\n<td>148.00<\/td>\r\n<td>148<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>unique<\/th>\r\n<td>NaN<\/td>\r\n<td>NaN<\/td>\r\n<td>NaN<\/td>\r\n<td>2<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>top<\/th>\r\n<td>NaN<\/td>\r\n<td>NaN<\/td>\r\n<td>NaN<\/td>\r\n<td>No<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>freq<\/th>\r\n<td>NaN<\/td>\r\n<td>NaN<\/td>\r\n<td>NaN<\/td>\r\n<td>109<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>mean<\/th>\r\n<td>27.89<\/td>\r\n<td>170.99<\/td>\r\n<td>1.18<\/td>\r\n<td>NaN<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>std<\/th>\r\n<td>8.06<\/td>\r\n<td>11.13<\/td>\r\n<td>1.00<\/td>\r\n<td>NaN<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>min<\/th>\r\n<td>19.00<\/td>\r\n<td>147.00<\/td>\r\n<td>0.00<\/td>\r\n<td>NaN<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>25%<\/th>\r\n<td>24.00<\/td>\r\n<td>163.00<\/td>\r\n<td>0.44<\/td>\r\n<td>NaN<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>50%<\/th>\r\n<td>26.00<\/td>\r\n<td>170.00<\/td>\r\n<td>1.00<\/td>\r\n<td>NaN<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>75%<\/th>\r\n<td>30.00<\/td>\r\n<td>180.00<\/td>\r\n<td>2.00<\/td>\r\n<td>NaN<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>max<\/th>\r\n<td>90.00<\/td>\r\n<td>220.00<\/td>\r\n<td>4.00<\/td>\r\n<td>NaN<\/td>\r\n<\/tr>\r\n<\/tbody>\r\n<\/table>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-MarkdownCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\"><\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput\" data-mime-type=\"text\/markdown\">\r\n<h2 id=\"Scaling\">Scaling<a class=\"anchor-link\" href=\"#Scaling\">\u00b6<\/a><\/h2>\r\nWe will deal with the Quantitative columns seperately.\r\n\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[6]:<\/div>\r\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\r\n<div class=\"CodeMirror cm-s-jupyter\">\r\n<div class=\"highlight hl-ipython3\">\r\n<pre><span class=\"c1\">## make a smallef dataframe with just quant columns:<\/span>\r\n<span class=\"n\">dfq<\/span> <span class=\"o\">=<\/span> <span class=\"n\">df<\/span><span class=\"p\">[[<\/span><span class=\"s2\">\"Age\"<\/span><span class=\"p\">,<\/span> <span class=\"s2\">\"Height\"<\/span><span class=\"p\">,<\/span> <span class=\"s2\">\"Coffee\"<\/span><span class=\"p\">]]<\/span>\r\n\r\n<span class=\"n\">dfq<\/span><span class=\"o\">.<\/span><span class=\"n\">head<\/span><span class=\"p\">()<\/span>\r\n<\/pre>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell-outputWrapper\">\r\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\r\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\r\n<div class=\"jp-OutputArea-child\">\r\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[6]:<\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\r\n<div>\r\n\r\n.dataframe tbody tr th:only-of-type {\r\nvertical-align: middle;\r\n}\r\n\r\n.dataframe tbody tr th {\r\nvertical-align: top;\r\n}\r\n\r\n.dataframe thead th {\r\ntext-align: right;\r\n}\r\n<table class=\"dataframe\" border=\"1\">\r\n<thead>\r\n<tr>\r\n<th><\/th>\r\n<th>Age<\/th>\r\n<th>Height<\/th>\r\n<th>Coffee<\/th>\r\n<\/tr>\r\n<\/thead>\r\n<tbody>\r\n<tr>\r\n<th>0<\/th>\r\n<td>22.0<\/td>\r\n<td>177.80<\/td>\r\n<td>0.0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>1<\/th>\r\n<td>24.0<\/td>\r\n<td>175.00<\/td>\r\n<td>2.0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>2<\/th>\r\n<td>24.0<\/td>\r\n<td>182.88<\/td>\r\n<td>0.0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>4<\/th>\r\n<td>24.0<\/td>\r\n<td>160.00<\/td>\r\n<td>1.0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>5<\/th>\r\n<td>24.0<\/td>\r\n<td>164.00<\/td>\r\n<td>0.0<\/td>\r\n<\/tr>\r\n<\/tbody>\r\n<\/table>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[7]:<\/div>\r\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\r\n<div class=\"CodeMirror cm-s-jupyter\">\r\n<div class=\"highlight hl-ipython3\">\r\n<pre><span class=\"c1\">##we can quickly fix things with a lambda function<\/span>\r\n<span class=\"c1\">## If you're adventurous, explore the preprocessing functions of sklearn!<\/span>\r\n\r\n<span class=\"n\">df2<\/span><span class=\"o\">=<\/span><span class=\"n\">dfq<\/span><span class=\"o\">.<\/span><span class=\"n\">apply<\/span><span class=\"p\">(<\/span><span class=\"k\">lambda<\/span> <span class=\"n\">x<\/span><span class=\"p\">:<\/span> <span class=\"p\">(<\/span><span class=\"n\">x<\/span> <span class=\"o\">-<\/span> <span class=\"n\">np<\/span><span class=\"o\">.<\/span><span class=\"n\">mean<\/span><span class=\"p\">(<\/span><span class=\"n\">x<\/span><span class=\"p\">))<\/span> <span class=\"o\">\/<\/span> <span class=\"p\">(<\/span><span class=\"n\">np<\/span><span class=\"o\">.<\/span><span class=\"n\">std<\/span><span class=\"p\">(<\/span><span class=\"n\">x<\/span><span class=\"p\">)))<\/span>\r\n\r\n<span class=\"n\">df2<\/span><span class=\"o\">.<\/span><span class=\"n\">head<\/span><span class=\"p\">()<\/span>\r\n<\/pre>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell-outputWrapper\">\r\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\r\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\r\n<div class=\"jp-OutputArea-child\">\r\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[7]:<\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\r\n<div>\r\n\r\n.dataframe tbody tr th:only-of-type {\r\nvertical-align: middle;\r\n}\r\n\r\n.dataframe tbody tr th {\r\nvertical-align: top;\r\n}\r\n\r\n.dataframe thead th {\r\ntext-align: right;\r\n}\r\n<table class=\"dataframe\" border=\"1\">\r\n<thead>\r\n<tr>\r\n<th><\/th>\r\n<th>Age<\/th>\r\n<th>Height<\/th>\r\n<th>Coffee<\/th>\r\n<\/tr>\r\n<\/thead>\r\n<tbody>\r\n<tr>\r\n<th>0<\/th>\r\n<td>-0.732898<\/td>\r\n<td>0.613901<\/td>\r\n<td>-1.184843<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>1<\/th>\r\n<td>-0.483831<\/td>\r\n<td>0.361372<\/td>\r\n<td>0.822101<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>2<\/th>\r\n<td>-0.483831<\/td>\r\n<td>1.072062<\/td>\r\n<td>-1.184843<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>4<\/th>\r\n<td>-0.483831<\/td>\r\n<td>-0.991465<\/td>\r\n<td>-0.181371<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>5<\/th>\r\n<td>-0.483831<\/td>\r\n<td>-0.630708<\/td>\r\n<td>-1.184843<\/td>\r\n<\/tr>\r\n<\/tbody>\r\n<\/table>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[13]:<\/div>\r\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\r\n<div class=\"CodeMirror cm-s-jupyter\">\r\n<div class=\"highlight hl-ipython3\">\r\n<pre><span class=\"c1\">##Let's summarize again!<\/span>\r\n\r\n\r\n<span class=\"n\">df2<\/span><span class=\"o\">.<\/span><span class=\"n\">describe<\/span><span class=\"p\">()<\/span><span class=\"o\">.<\/span><span class=\"n\">round<\/span><span class=\"p\">(<\/span><span class=\"mi\">2<\/span><span class=\"p\">)<\/span>\r\n<\/pre>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell-outputWrapper\">\r\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\r\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\r\n<div class=\"jp-OutputArea-child\">\r\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[13]:<\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\r\n<div>\r\n\r\n.dataframe tbody tr th:only-of-type {\r\nvertical-align: middle;\r\n}\r\n\r\n.dataframe tbody tr th {\r\nvertical-align: top;\r\n}\r\n\r\n.dataframe thead th {\r\ntext-align: right;\r\n}\r\n<table class=\"dataframe\" border=\"1\">\r\n<thead>\r\n<tr>\r\n<th><\/th>\r\n<th>Age<\/th>\r\n<th>Height<\/th>\r\n<th>Coffee<\/th>\r\n<\/tr>\r\n<\/thead>\r\n<tbody>\r\n<tr>\r\n<th>count<\/th>\r\n<td>1.480000e+02<\/td>\r\n<td>1.480000e+02<\/td>\r\n<td>1.480000e+02<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>mean<\/th>\r\n<td>-2.160434e-16<\/td>\r\n<td>2.790561e-16<\/td>\r\n<td>4.800964e-17<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>std<\/th>\r\n<td>1.003396e+00<\/td>\r\n<td>1.003396e+00<\/td>\r\n<td>1.003396e+00<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>min<\/th>\r\n<td>-1.106500e+00<\/td>\r\n<td>-2.163923e+00<\/td>\r\n<td>-1.184843e+00<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>25%<\/th>\r\n<td>-4.838307e-01<\/td>\r\n<td>-7.208974e-01<\/td>\r\n<td>-7.458238e-01<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>50%<\/th>\r\n<td>-2.347631e-01<\/td>\r\n<td>-8.957364e-02<\/td>\r\n<td>-1.813708e-01<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>75%<\/th>\r\n<td>2.633722e-01<\/td>\r\n<td>8.123175e-01<\/td>\r\n<td>8.221012e-01<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>max<\/th>\r\n<td>7.735402e+00<\/td>\r\n<td>4.419882e+00<\/td>\r\n<td>2.829045e+00<\/td>\r\n<\/tr>\r\n<\/tbody>\r\n<\/table>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-MarkdownCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\"><\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput\" data-mime-type=\"text\/markdown\">\r\n<h2 id=\"Dummy-Variables\">Dummy Variables<a class=\"anchor-link\" href=\"#Dummy-Variables\">\u00b6<\/a><\/h2>\r\nWe will now deal with coffee!\r\n\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[9]:<\/div>\r\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\r\n<div class=\"CodeMirror cm-s-jupyter\">\r\n<div class=\"highlight hl-ipython3\">\r\n<pre><span class=\"c1\">## this created just the dummy:<\/span>\r\n<span class=\"n\">tattoo_d<\/span> <span class=\"o\">=<\/span> <span class=\"n\">pandas<\/span><span class=\"o\">.<\/span><span class=\"n\">get_dummies<\/span><span class=\"p\">(<\/span><span class=\"n\">df<\/span><span class=\"p\">[<\/span><span class=\"s1\">'Tattoo'<\/span><span class=\"p\">],<\/span> <span class=\"n\">prefix<\/span> <span class=\"o\">=<\/span> <span class=\"s1\">'Tattoo'<\/span><span class=\"p\">)<\/span>\r\n\r\n<span class=\"c1\">##look:<\/span>\r\n\r\n<span class=\"n\">tattoo_d<\/span><span class=\"o\">.<\/span><span class=\"n\">head<\/span><span class=\"p\">()<\/span>\r\n<\/pre>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell-outputWrapper\">\r\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\r\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\r\n<div class=\"jp-OutputArea-child\">\r\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[9]:<\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\r\n<div>\r\n\r\n.dataframe tbody tr th:only-of-type {\r\nvertical-align: middle;\r\n}\r\n\r\n.dataframe tbody tr th {\r\nvertical-align: top;\r\n}\r\n\r\n.dataframe thead th {\r\ntext-align: right;\r\n}\r\n<table class=\"dataframe\" border=\"1\">\r\n<thead>\r\n<tr>\r\n<th><\/th>\r\n<th>Tattoo_No<\/th>\r\n<th>Tattoo_Yes<\/th>\r\n<\/tr>\r\n<\/thead>\r\n<tbody>\r\n<tr>\r\n<th>0<\/th>\r\n<td>0<\/td>\r\n<td>1<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>1<\/th>\r\n<td>0<\/td>\r\n<td>1<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>2<\/th>\r\n<td>1<\/td>\r\n<td>0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>4<\/th>\r\n<td>1<\/td>\r\n<td>0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>5<\/th>\r\n<td>1<\/td>\r\n<td>0<\/td>\r\n<\/tr>\r\n<\/tbody>\r\n<\/table>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[10]:<\/div>\r\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\r\n<div class=\"CodeMirror cm-s-jupyter\">\r\n<div class=\"highlight hl-ipython3\">\r\n<pre><span class=\"c1\">## Join things back up<\/span>\r\n<span class=\"c1\">## you can use any join method you like:  Concat also works.<\/span>\r\n\r\n<span class=\"n\">df3<\/span><span class=\"o\">=<\/span> <span class=\"n\">df2<\/span><span class=\"o\">.<\/span><span class=\"n\">join<\/span><span class=\"p\">(<\/span><span class=\"n\">tattoo_d<\/span><span class=\"p\">)<\/span>\r\n\r\n<span class=\"n\">df3<\/span>\r\n<\/pre>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell-outputWrapper\">\r\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\r\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\r\n<div class=\"jp-OutputArea-child\">\r\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[10]:<\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\r\n<div>\r\n\r\n.dataframe tbody tr th:only-of-type {\r\nvertical-align: middle;\r\n}\r\n\r\n.dataframe tbody tr th {\r\nvertical-align: top;\r\n}\r\n\r\n.dataframe thead th {\r\ntext-align: right;\r\n}\r\n<table class=\"dataframe\" border=\"1\">\r\n<thead>\r\n<tr>\r\n<th><\/th>\r\n<th>Age<\/th>\r\n<th>Height<\/th>\r\n<th>Coffee<\/th>\r\n<th>Tattoo_No<\/th>\r\n<th>Tattoo_Yes<\/th>\r\n<\/tr>\r\n<\/thead>\r\n<tbody>\r\n<tr>\r\n<th>0<\/th>\r\n<td>-0.732898<\/td>\r\n<td>0.613901<\/td>\r\n<td>-1.184843<\/td>\r\n<td>0<\/td>\r\n<td>1<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>1<\/th>\r\n<td>-0.483831<\/td>\r\n<td>0.361372<\/td>\r\n<td>0.822101<\/td>\r\n<td>0<\/td>\r\n<td>1<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>2<\/th>\r\n<td>-0.483831<\/td>\r\n<td>1.072062<\/td>\r\n<td>-1.184843<\/td>\r\n<td>1<\/td>\r\n<td>0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>4<\/th>\r\n<td>-0.483831<\/td>\r\n<td>-0.991465<\/td>\r\n<td>-0.181371<\/td>\r\n<td>1<\/td>\r\n<td>0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>5<\/th>\r\n<td>-0.483831<\/td>\r\n<td>-0.630708<\/td>\r\n<td>-1.184843<\/td>\r\n<td>1<\/td>\r\n<td>0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>...<\/th>\r\n<td>...<\/td>\r\n<td>...<\/td>\r\n<td>...<\/td>\r\n<td>...<\/td>\r\n<td>...<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>159<\/th>\r\n<td>-0.234763<\/td>\r\n<td>-0.901276<\/td>\r\n<td>0.822101<\/td>\r\n<td>1<\/td>\r\n<td>0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>160<\/th>\r\n<td>-0.608365<\/td>\r\n<td>0.271183<\/td>\r\n<td>-0.181371<\/td>\r\n<td>1<\/td>\r\n<td>0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>162<\/th>\r\n<td>-0.359297<\/td>\r\n<td>-0.089574<\/td>\r\n<td>-0.181371<\/td>\r\n<td>1<\/td>\r\n<td>0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>163<\/th>\r\n<td>0.014305<\/td>\r\n<td>0.631939<\/td>\r\n<td>-1.184843<\/td>\r\n<td>1<\/td>\r\n<td>0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>164<\/th>\r\n<td>-0.234763<\/td>\r\n<td>0.902507<\/td>\r\n<td>-0.181371<\/td>\r\n<td>1<\/td>\r\n<td>0<\/td>\r\n<\/tr>\r\n<\/tbody>\r\n<\/table>\r\n148 rows \u00d7 5 columns\r\n\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-MarkdownCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\"><\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput\" data-mime-type=\"text\/markdown\">\r\n<h2 id=\"Saving\">Saving<a class=\"anchor-link\" href=\"#Saving\">\u00b6<\/a><\/h2>\r\nWe will save our clean dataset as a new file - now we don't need to use the old one.\r\n\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-MarkdownCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\"><\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput\" data-mime-type=\"text\/markdown\">\r\n\r\nStrangely enough, we did LESS well with scaling! this is something we would want to explore in the future\r\n\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[11]:<\/div>\r\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\r\n<div class=\"CodeMirror cm-s-jupyter\">\r\n<div class=\"highlight hl-ipython3\">\r\n<pre><span class=\"c1\">## write!  It's nice to datestamp files, if you make multiple ones:<\/span>\r\n<span class=\"c1\">## you will need to create the folder \"clean_data\", if you dont' have one already<\/span>\r\n\r\n<span class=\"n\">today<\/span> <span class=\"o\">=<\/span> <span class=\"nb\">str<\/span><span class=\"p\">(<\/span><span class=\"n\">date<\/span><span class=\"o\">.<\/span><span class=\"n\">today<\/span><span class=\"p\">())<\/span>\r\n<span class=\"n\">filename<\/span> <span class=\"o\">=<\/span> <span class=\"s2\">\"clean_data\/small_survey_\"<\/span><span class=\"o\">+<\/span> <span class=\"n\">today<\/span><span class=\"o\">+<\/span><span class=\"s2\">\".csv\"<\/span>\r\n\r\n<span class=\"n\">df3<\/span><span class=\"o\">.<\/span><span class=\"n\">to_csv<\/span><span class=\"p\">(<\/span><span class=\"n\">filename<\/span><span class=\"p\">)<\/span>\r\n<span class=\"nb\">print<\/span><span class=\"p\">(<\/span><span class=\"s2\">\"saved to: \"<\/span><span class=\"p\">,<\/span> <span class=\"n\">filename<\/span><span class=\"p\">)<\/span>\r\n<\/pre>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell-outputWrapper\">\r\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\r\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\r\n<div class=\"jp-OutputArea-child\">\r\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\"><\/div>\r\n<div class=\"jp-RenderedText jp-OutputArea-output\" data-mime-type=\"text\/plain\">\r\n<pre>saved to:  clean_data\/small_survey2024-02-13.csv\r\n<\/pre>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[14]:<\/div>\r\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\r\n<div class=\"CodeMirror cm-s-jupyter\">\r\n<div class=\"highlight hl-ipython3\">\r\n<pre><span class=\"c1\">## In my next notbook, I can just pull up the clean data:<\/span>\r\n\r\n<span class=\"n\">dat<\/span> <span class=\"o\">=<\/span> <span class=\"n\">pandas<\/span><span class=\"o\">.<\/span><span class=\"n\">read_csv<\/span><span class=\"p\">(<\/span><span class=\"s2\">\"clean_data\/small_survey2024-02-13.csv\"<\/span><span class=\"p\">)<\/span>\r\n\r\n<span class=\"c1\">## If you are running this, you may need to change the date above.<\/span>\r\n\r\n<span class=\"nb\">print<\/span><span class=\"p\">(<\/span><span class=\"s2\">\"New Shape: \"<\/span><span class=\"p\">,<\/span> <span class=\"n\">dat<\/span><span class=\"o\">.<\/span><span class=\"n\">shape<\/span><span class=\"p\">)<\/span>\r\n\r\n<span class=\"n\">dat<\/span><span class=\"o\">.<\/span><span class=\"n\">head<\/span><span class=\"p\">()<\/span>\r\n<\/pre>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell-outputWrapper\">\r\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\r\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\r\n<div class=\"jp-OutputArea-child\">\r\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\"><\/div>\r\n<div class=\"jp-RenderedText jp-OutputArea-output\" data-mime-type=\"text\/plain\">\r\n<pre>New Shape:  (148, 6)\r\n<\/pre>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-OutputArea-child\">\r\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[14]:<\/div>\r\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\r\n<div>\r\n\r\n.dataframe tbody tr th:only-of-type {\r\nvertical-align: middle;\r\n}\r\n\r\n.dataframe tbody tr th {\r\nvertical-align: top;\r\n}\r\n\r\n.dataframe thead th {\r\ntext-align: right;\r\n}\r\n<table class=\"dataframe\" border=\"1\">\r\n<thead>\r\n<tr>\r\n<th><\/th>\r\n<th>Unnamed: 0<\/th>\r\n<th>Age<\/th>\r\n<th>Height<\/th>\r\n<th>Coffee<\/th>\r\n<th>Tattoo_No<\/th>\r\n<th>Tattoo_Yes<\/th>\r\n<\/tr>\r\n<\/thead>\r\n<tbody>\r\n<tr>\r\n<th>0<\/th>\r\n<td>0<\/td>\r\n<td>-0.732898<\/td>\r\n<td>0.613901<\/td>\r\n<td>-1.184843<\/td>\r\n<td>0<\/td>\r\n<td>1<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>1<\/th>\r\n<td>1<\/td>\r\n<td>-0.483831<\/td>\r\n<td>0.361372<\/td>\r\n<td>0.822101<\/td>\r\n<td>0<\/td>\r\n<td>1<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>2<\/th>\r\n<td>2<\/td>\r\n<td>-0.483831<\/td>\r\n<td>1.072062<\/td>\r\n<td>-1.184843<\/td>\r\n<td>1<\/td>\r\n<td>0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>3<\/th>\r\n<td>4<\/td>\r\n<td>-0.483831<\/td>\r\n<td>-0.991465<\/td>\r\n<td>-0.181371<\/td>\r\n<td>1<\/td>\r\n<td>0<\/td>\r\n<\/tr>\r\n<tr>\r\n<th>4<\/th>\r\n<td>5<\/td>\r\n<td>-0.483831<\/td>\r\n<td>-0.630708<\/td>\r\n<td>-1.184843<\/td>\r\n<td>1<\/td>\r\n<td>0<\/td>\r\n<\/tr>\r\n<\/tbody>\r\n<\/table>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs\">\r\n<div class=\"jp-Cell-inputWrapper\">\r\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\r\n<div class=\"jp-InputArea jp-Cell-inputArea\">\r\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[\u00a0]:<\/div>\r\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\r\n<div class=\"CodeMirror cm-s-jupyter\">\r\n<div class=\"highlight hl-ipython3\">\r\n<pre> \r\n<\/pre>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>\r\n<\/div>","rendered":"<div class=\"jp-Cell jp-MarkdownCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\"><\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput\" data-mime-type=\"text\/markdown\">\n<h2 id=\"Preprocessing:-Scaling-and-Dummies\">Preprocessing: Scaling and Dummies<a class=\"anchor-link\" href=\"-Scaling-and-Dummies\">\u00b6<\/a><\/h2>\n<p>We will do a few things:<\/p>\n<ul>\n<li>Open the dataset<\/li>\n<li>Scale what needs scaling<\/li>\n<li>Dummy Variables<\/li>\n<li>Save as a new file<\/li>\n<\/ul>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[1]:<\/div>\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\n<div class=\"CodeMirror cm-s-jupyter\">\n<div class=\"highlight hl-ipython3\">\n<pre><span class=\"c1\">## Import some libraries<\/span>\r\n<span class=\"c1\">#from sklearn import datasets<\/span>\r\n\r\n<span class=\"c1\">#from sklearn.metrics import accuracy_score<\/span>\r\n<span class=\"c1\">#from sklearn.metrics import silhouette_score<\/span>\r\n<span class=\"c1\">#from sklearn.cluster import KMeans<\/span>\r\n<span class=\"c1\">#from sklearn.cluster import AgglomerativeClustering <\/span>\r\n<span class=\"c1\">#import numpy as np<\/span>\r\n<span class=\"c1\">#import matplotlib.pyplot as plt<\/span>\r\n<span class=\"c1\">#from scipy import stats<\/span>\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">pandas<\/span> \r\n\r\n<span class=\"c1\">#%matplotlib inline<\/span>\r\n\r\n\r\n<span class=\"c1\">## Start with packages<\/span>\r\n\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">numpy<\/span> <span class=\"k\">as<\/span> <span class=\"nn\">np<\/span>\r\n<span class=\"c1\">#import pandas<\/span>\r\n<span class=\"c1\">## this lets me datestamp files: a thing that is really nice<\/span>\r\n<span class=\"kn\">from<\/span> <span class=\"nn\">datetime<\/span> <span class=\"kn\">import<\/span> <span class=\"n\">date<\/span>\r\n\r\n<span class=\"kn\">import<\/span> <span class=\"nn\">os<\/span>\r\n<\/pre>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[2]:<\/div>\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\n<div class=\"CodeMirror cm-s-jupyter\">\n<div class=\"highlight hl-ipython3\">\n<pre><span class=\"c1\">## We are also going to read in the student surey we looked at earlier.<\/span>\r\n<span class=\"c1\">## I'm using a more current version, because you guys filled it out.<\/span>\r\n\r\n<span class=\"n\">survey<\/span> <span class=\"o\">=<\/span> <span class=\"n\">pandas<\/span><span class=\"o\">.<\/span><span class=\"n\">read_csv<\/span><span class=\"p\">(<\/span><span class=\"s2\">\"data\\Clean_Survey_2023-01-26.csv\"<\/span><span class=\"p\">)<\/span> \r\n<span class=\"n\">survey<\/span><span class=\"o\">.<\/span><span class=\"n\">describe<\/span><span class=\"p\">()<\/span><span class=\"o\">.<\/span><span class=\"n\">round<\/span><span class=\"p\">(<\/span><span class=\"mi\">2<\/span><span class=\"p\">)<\/span>\r\n<\/pre>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell-outputWrapper\">\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\n<div class=\"jp-OutputArea-child\">\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[2]:<\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\n<div>\n<p>.dataframe tbody tr th:only-of-type {<br \/>\nvertical-align: middle;<br \/>\n}<\/p>\n<p>.dataframe tbody tr th {<br \/>\nvertical-align: top;<br \/>\n}<\/p>\n<p>.dataframe thead th {<br \/>\ntext-align: right;<br \/>\n}<\/p>\n<table class=\"dataframe\">\n<thead>\n<tr>\n<th><\/th>\n<th>Age<\/th>\n<th>Height<\/th>\n<th>Weight.<\/th>\n<th>Old_Salary<\/th>\n<th>Expected_Salary<\/th>\n<th>Expenses<\/th>\n<th>Transportation_Cost<\/th>\n<th>Entertainment<\/th>\n<th>Cellphone_Cost<\/th>\n<th>Footsize<\/th>\n<th>Alcohol<\/th>\n<th>Sleep<\/th>\n<th>Social_Network<\/th>\n<th>Homework<\/th>\n<th>Work<\/th>\n<th>Coffee<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>count<\/th>\n<td>164.00<\/td>\n<td>154.00<\/td>\n<td>161.00<\/td>\n<td>156.00<\/td>\n<td>146.00<\/td>\n<td>154.00<\/td>\n<td>144.00<\/td>\n<td>157.00<\/td>\n<td>160.00<\/td>\n<td>145.00<\/td>\n<td>139.00<\/td>\n<td>163.00<\/td>\n<td>150.00<\/td>\n<td>155.00<\/td>\n<td>159.00<\/td>\n<td>158.00<\/td>\n<\/tr>\n<tr>\n<th>mean<\/th>\n<td>27.90<\/td>\n<td>171.12<\/td>\n<td>77.88<\/td>\n<td>2659.50<\/td>\n<td>4566.69<\/td>\n<td>1246.88<\/td>\n<td>80.65<\/td>\n<td>181.85<\/td>\n<td>64.60<\/td>\n<td>91.72<\/td>\n<td>1.16<\/td>\n<td>8.49<\/td>\n<td>7.77<\/td>\n<td>14.34<\/td>\n<td>21.64<\/td>\n<td>1.18<\/td>\n<\/tr>\n<tr>\n<th>std<\/th>\n<td>8.16<\/td>\n<td>10.94<\/td>\n<td>42.60<\/td>\n<td>1806.06<\/td>\n<td>1518.18<\/td>\n<td>983.81<\/td>\n<td>74.80<\/td>\n<td>160.52<\/td>\n<td>25.46<\/td>\n<td>859.31<\/td>\n<td>1.47<\/td>\n<td>9.91<\/td>\n<td>6.09<\/td>\n<td>11.79<\/td>\n<td>17.66<\/td>\n<td>1.00<\/td>\n<\/tr>\n<tr>\n<th>min<\/th>\n<td>10.00<\/td>\n<td>147.00<\/td>\n<td>43.00<\/td>\n<td>0.00<\/td>\n<td>0.00<\/td>\n<td>0.00<\/td>\n<td>0.00<\/td>\n<td>0.00<\/td>\n<td>0.00<\/td>\n<td>4.00<\/td>\n<td>0.00<\/td>\n<td>4.00<\/td>\n<td>0.00<\/td>\n<td>0.00<\/td>\n<td>0.00<\/td>\n<td>0.00<\/td>\n<\/tr>\n<tr>\n<th>25%<\/th>\n<td>24.00<\/td>\n<td>164.00<\/td>\n<td>60.00<\/td>\n<td>1400.00<\/td>\n<td>3850.00<\/td>\n<td>400.00<\/td>\n<td>28.00<\/td>\n<td>50.00<\/td>\n<td>50.00<\/td>\n<td>9.00<\/td>\n<td>0.00<\/td>\n<td>6.00<\/td>\n<td>2.00<\/td>\n<td>5.00<\/td>\n<td>3.00<\/td>\n<td>0.31<\/td>\n<\/tr>\n<tr>\n<th>50%<\/th>\n<td>26.00<\/td>\n<td>170.00<\/td>\n<td>70.00<\/td>\n<td>2700.00<\/td>\n<td>4500.00<\/td>\n<td>1000.00<\/td>\n<td>50.00<\/td>\n<td>100.00<\/td>\n<td>60.00<\/td>\n<td>9.88<\/td>\n<td>0.00<\/td>\n<td>7.00<\/td>\n<td>7.00<\/td>\n<td>10.00<\/td>\n<td>20.00<\/td>\n<td>1.00<\/td>\n<\/tr>\n<tr>\n<th>75%<\/th>\n<td>30.00<\/td>\n<td>179.75<\/td>\n<td>82.00<\/td>\n<td>4000.00<\/td>\n<td>5400.00<\/td>\n<td>2000.00<\/td>\n<td>100.00<\/td>\n<td>250.00<\/td>\n<td>80.00<\/td>\n<td>10.43<\/td>\n<td>2.00<\/td>\n<td>8.00<\/td>\n<td>10.00<\/td>\n<td>22.00<\/td>\n<td>40.00<\/td>\n<td>2.00<\/td>\n<\/tr>\n<tr>\n<th>max<\/th>\n<td>90.00<\/td>\n<td>220.00<\/td>\n<td>506.00<\/td>\n<td>8000.00<\/td>\n<td>8600.00<\/td>\n<td>4000.00<\/td>\n<td>300.00<\/td>\n<td>600.00<\/td>\n<td>140.00<\/td>\n<td>10316.00<\/td>\n<td>5.00<\/td>\n<td>78.00<\/td>\n<td>25.00<\/td>\n<td>46.00<\/td>\n<td>60.00<\/td>\n<td>4.00<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[3]:<\/div>\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\n<div class=\"CodeMirror cm-s-jupyter\">\n<div class=\"highlight hl-ipython3\">\n<pre><span class=\"n\">survey<\/span><span class=\"o\">.<\/span><span class=\"n\">head<\/span><span class=\"p\">()<\/span>\r\n<\/pre>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell-outputWrapper\">\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\n<div class=\"jp-OutputArea-child\">\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[3]:<\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\n<div>\n<p>.dataframe tbody tr th:only-of-type {<br \/>\nvertical-align: middle;<br \/>\n}<\/p>\n<p>.dataframe tbody tr th {<br \/>\nvertical-align: top;<br \/>\n}<\/p>\n<p>.dataframe thead th {<br \/>\ntext-align: right;<br \/>\n}<\/p>\n<table class=\"dataframe\">\n<thead>\n<tr>\n<th><\/th>\n<th>Timestamp<\/th>\n<th>program<\/th>\n<th>Gender<\/th>\n<th>Age<\/th>\n<th>Height<\/th>\n<th>Weight.<\/th>\n<th>Old_Salary<\/th>\n<th>Expected_Salary<\/th>\n<th>Organization<\/th>\n<th>Live_parents<\/th>\n<th>&#8230;<\/th>\n<th>Footsize<\/th>\n<th>Alcohol<\/th>\n<th>Tattoo<\/th>\n<th>Sleep<\/th>\n<th>Social_Network<\/th>\n<th>Homework<\/th>\n<th>Work<\/th>\n<th>Travel<\/th>\n<th>Tuition<\/th>\n<th>Coffee<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>0<\/th>\n<td>2019\/02\/04 11:26:35 am GMT-8<\/td>\n<td>HRMG<\/td>\n<td>Female<\/td>\n<td>22.0<\/td>\n<td>177.80<\/td>\n<td>110.0<\/td>\n<td>1400.0<\/td>\n<td>4000.0<\/td>\n<td>3. Non-profit or Government Agency<\/td>\n<td>No<\/td>\n<td>&#8230;<\/td>\n<td>11.12<\/td>\n<td>0.0<\/td>\n<td>Yes<\/td>\n<td>8.0<\/td>\n<td>10.0<\/td>\n<td>10.0<\/td>\n<td>0.0<\/td>\n<td>Yes<\/td>\n<td>No<\/td>\n<td>0.0<\/td>\n<\/tr>\n<tr>\n<th>1<\/th>\n<td>2019\/02\/04 11:27:45 am GMT-8<\/td>\n<td>HRMG<\/td>\n<td>Female<\/td>\n<td>24.0<\/td>\n<td>175.00<\/td>\n<td>63.0<\/td>\n<td>2000.0<\/td>\n<td>3750.0<\/td>\n<td>2. Small Company<\/td>\n<td>Yes<\/td>\n<td>&#8230;<\/td>\n<td>9.50<\/td>\n<td>0.0<\/td>\n<td>Yes<\/td>\n<td>8.0<\/td>\n<td>7.0<\/td>\n<td>20.0<\/td>\n<td>0.0<\/td>\n<td>Yes<\/td>\n<td>Yes<\/td>\n<td>2.0<\/td>\n<\/tr>\n<tr>\n<th>2<\/th>\n<td>2019\/02\/04 11:27:46 am GMT-8<\/td>\n<td>HRMG<\/td>\n<td>Male<\/td>\n<td>24.0<\/td>\n<td>182.88<\/td>\n<td>100.0<\/td>\n<td>2000.0<\/td>\n<td>3500.0<\/td>\n<td>2. Small Company<\/td>\n<td>Yes<\/td>\n<td>&#8230;<\/td>\n<td>10.50<\/td>\n<td>1.0<\/td>\n<td>No<\/td>\n<td>6.0<\/td>\n<td>20.0<\/td>\n<td>20.0<\/td>\n<td>8.0<\/td>\n<td>Yes<\/td>\n<td>No<\/td>\n<td>0.0<\/td>\n<\/tr>\n<tr>\n<th>3<\/th>\n<td>2019\/02\/04 11:29:06 am GMT-8<\/td>\n<td>HRMG<\/td>\n<td>Female<\/td>\n<td>34.0<\/td>\n<td>NaN<\/td>\n<td>61.3<\/td>\n<td>2500.0<\/td>\n<td>3500.0<\/td>\n<td>1. Large Corporation<\/td>\n<td>Yes<\/td>\n<td>&#8230;<\/td>\n<td>7.00<\/td>\n<td>0.0<\/td>\n<td>Yes<\/td>\n<td>78.0<\/td>\n<td>14.0<\/td>\n<td>20.0<\/td>\n<td>0.0<\/td>\n<td>Yes<\/td>\n<td>No<\/td>\n<td>1.0<\/td>\n<\/tr>\n<tr>\n<th>4<\/th>\n<td>2019\/02\/04 11:29:12 am GMT-8<\/td>\n<td>HRMG<\/td>\n<td>Female<\/td>\n<td>24.0<\/td>\n<td>160.00<\/td>\n<td>54.0<\/td>\n<td>0.0<\/td>\n<td>3800.0<\/td>\n<td>2. Small Company<\/td>\n<td>Yes<\/td>\n<td>&#8230;<\/td>\n<td>8.00<\/td>\n<td>3.0<\/td>\n<td>No<\/td>\n<td>67.0<\/td>\n<td>6.0<\/td>\n<td>11.0<\/td>\n<td>0.0<\/td>\n<td>Yes<\/td>\n<td>Yes<\/td>\n<td>1.0<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>5 rows \u00d7 27 columns<\/p>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[4]:<\/div>\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\n<div class=\"CodeMirror cm-s-jupyter\">\n<div class=\"highlight hl-ipython3\">\n<pre><span class=\"c1\">### We will deal with a smaller version of this dataset:<\/span>\r\n\r\n<span class=\"c1\">##df stands for data.frame -&gt; the pandas structure<\/span>\r\n<span class=\"c1\">##.dropna() will get rid of blanks, for today.<\/span>\r\n\r\n<span class=\"n\">df<\/span> <span class=\"o\">=<\/span> <span class=\"n\">survey<\/span><span class=\"p\">[[<\/span><span class=\"s2\">\"Age\"<\/span><span class=\"p\">,<\/span> <span class=\"s2\">\"Height\"<\/span><span class=\"p\">,<\/span> <span class=\"s2\">\"Coffee\"<\/span><span class=\"p\">,<\/span> <span class=\"s2\">\"Tattoo\"<\/span><span class=\"p\">]]<\/span><span class=\"o\">.<\/span><span class=\"n\">dropna<\/span><span class=\"p\">()<\/span>\r\n\r\n<span class=\"n\">df<\/span><span class=\"o\">.<\/span><span class=\"n\">head<\/span><span class=\"p\">()<\/span>\r\n<\/pre>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell-outputWrapper\">\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\n<div class=\"jp-OutputArea-child\">\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[4]:<\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\n<div>\n<p>.dataframe tbody tr th:only-of-type {<br \/>\nvertical-align: middle;<br \/>\n}<\/p>\n<p>.dataframe tbody tr th {<br \/>\nvertical-align: top;<br \/>\n}<\/p>\n<p>.dataframe thead th {<br \/>\ntext-align: right;<br \/>\n}<\/p>\n<table class=\"dataframe\">\n<thead>\n<tr>\n<th><\/th>\n<th>Age<\/th>\n<th>Height<\/th>\n<th>Coffee<\/th>\n<th>Tattoo<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>0<\/th>\n<td>22.0<\/td>\n<td>177.80<\/td>\n<td>0.0<\/td>\n<td>Yes<\/td>\n<\/tr>\n<tr>\n<th>1<\/th>\n<td>24.0<\/td>\n<td>175.00<\/td>\n<td>2.0<\/td>\n<td>Yes<\/td>\n<\/tr>\n<tr>\n<th>2<\/th>\n<td>24.0<\/td>\n<td>182.88<\/td>\n<td>0.0<\/td>\n<td>No<\/td>\n<\/tr>\n<tr>\n<th>4<\/th>\n<td>24.0<\/td>\n<td>160.00<\/td>\n<td>1.0<\/td>\n<td>No<\/td>\n<\/tr>\n<tr>\n<th>5<\/th>\n<td>24.0<\/td>\n<td>164.00<\/td>\n<td>0.0<\/td>\n<td>No<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-MarkdownCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\"><\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput\" data-mime-type=\"text\/markdown\">\n<h2 id=\"Look-at-some-Summary-stats\">Look at some Summary stats<a class=\"anchor-link\" href=\"#Look-at-some-Summary-stats\">\u00b6<\/a><\/h2>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[5]:<\/div>\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\n<div class=\"CodeMirror cm-s-jupyter\">\n<div class=\"highlight hl-ipython3\">\n<pre><span class=\"c1\">##Let's look at our summary statistics!<\/span>\r\n\r\n<span class=\"n\">df<\/span><span class=\"o\">.<\/span><span class=\"n\">describe<\/span><span class=\"p\">(<\/span><span class=\"n\">include<\/span> <span class=\"o\">=<\/span> <span class=\"s1\">'all'<\/span><span class=\"p\">)<\/span><span class=\"o\">.<\/span><span class=\"n\">round<\/span><span class=\"p\">(<\/span><span class=\"mi\">2<\/span><span class=\"p\">)<\/span>\r\n<\/pre>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell-outputWrapper\">\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\n<div class=\"jp-OutputArea-child\">\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[5]:<\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\n<div>\n<p>.dataframe tbody tr th:only-of-type {<br \/>\nvertical-align: middle;<br \/>\n}<\/p>\n<p>.dataframe tbody tr th {<br \/>\nvertical-align: top;<br \/>\n}<\/p>\n<p>.dataframe thead th {<br \/>\ntext-align: right;<br \/>\n}<\/p>\n<table class=\"dataframe\">\n<thead>\n<tr>\n<th><\/th>\n<th>Age<\/th>\n<th>Height<\/th>\n<th>Coffee<\/th>\n<th>Tattoo<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>count<\/th>\n<td>148.00<\/td>\n<td>148.00<\/td>\n<td>148.00<\/td>\n<td>148<\/td>\n<\/tr>\n<tr>\n<th>unique<\/th>\n<td>NaN<\/td>\n<td>NaN<\/td>\n<td>NaN<\/td>\n<td>2<\/td>\n<\/tr>\n<tr>\n<th>top<\/th>\n<td>NaN<\/td>\n<td>NaN<\/td>\n<td>NaN<\/td>\n<td>No<\/td>\n<\/tr>\n<tr>\n<th>freq<\/th>\n<td>NaN<\/td>\n<td>NaN<\/td>\n<td>NaN<\/td>\n<td>109<\/td>\n<\/tr>\n<tr>\n<th>mean<\/th>\n<td>27.89<\/td>\n<td>170.99<\/td>\n<td>1.18<\/td>\n<td>NaN<\/td>\n<\/tr>\n<tr>\n<th>std<\/th>\n<td>8.06<\/td>\n<td>11.13<\/td>\n<td>1.00<\/td>\n<td>NaN<\/td>\n<\/tr>\n<tr>\n<th>min<\/th>\n<td>19.00<\/td>\n<td>147.00<\/td>\n<td>0.00<\/td>\n<td>NaN<\/td>\n<\/tr>\n<tr>\n<th>25%<\/th>\n<td>24.00<\/td>\n<td>163.00<\/td>\n<td>0.44<\/td>\n<td>NaN<\/td>\n<\/tr>\n<tr>\n<th>50%<\/th>\n<td>26.00<\/td>\n<td>170.00<\/td>\n<td>1.00<\/td>\n<td>NaN<\/td>\n<\/tr>\n<tr>\n<th>75%<\/th>\n<td>30.00<\/td>\n<td>180.00<\/td>\n<td>2.00<\/td>\n<td>NaN<\/td>\n<\/tr>\n<tr>\n<th>max<\/th>\n<td>90.00<\/td>\n<td>220.00<\/td>\n<td>4.00<\/td>\n<td>NaN<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-MarkdownCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\"><\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput\" data-mime-type=\"text\/markdown\">\n<h2 id=\"Scaling\">Scaling<a class=\"anchor-link\" href=\"#Scaling\">\u00b6<\/a><\/h2>\n<p>We will deal with the Quantitative columns seperately.<\/p>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[6]:<\/div>\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\n<div class=\"CodeMirror cm-s-jupyter\">\n<div class=\"highlight hl-ipython3\">\n<pre><span class=\"c1\">## make a smallef dataframe with just quant columns:<\/span>\r\n<span class=\"n\">dfq<\/span> <span class=\"o\">=<\/span> <span class=\"n\">df<\/span><span class=\"p\">[[<\/span><span class=\"s2\">\"Age\"<\/span><span class=\"p\">,<\/span> <span class=\"s2\">\"Height\"<\/span><span class=\"p\">,<\/span> <span class=\"s2\">\"Coffee\"<\/span><span class=\"p\">]]<\/span>\r\n\r\n<span class=\"n\">dfq<\/span><span class=\"o\">.<\/span><span class=\"n\">head<\/span><span class=\"p\">()<\/span>\r\n<\/pre>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell-outputWrapper\">\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\n<div class=\"jp-OutputArea-child\">\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[6]:<\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\n<div>\n<p>.dataframe tbody tr th:only-of-type {<br \/>\nvertical-align: middle;<br \/>\n}<\/p>\n<p>.dataframe tbody tr th {<br \/>\nvertical-align: top;<br \/>\n}<\/p>\n<p>.dataframe thead th {<br \/>\ntext-align: right;<br \/>\n}<\/p>\n<table class=\"dataframe\">\n<thead>\n<tr>\n<th><\/th>\n<th>Age<\/th>\n<th>Height<\/th>\n<th>Coffee<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>0<\/th>\n<td>22.0<\/td>\n<td>177.80<\/td>\n<td>0.0<\/td>\n<\/tr>\n<tr>\n<th>1<\/th>\n<td>24.0<\/td>\n<td>175.00<\/td>\n<td>2.0<\/td>\n<\/tr>\n<tr>\n<th>2<\/th>\n<td>24.0<\/td>\n<td>182.88<\/td>\n<td>0.0<\/td>\n<\/tr>\n<tr>\n<th>4<\/th>\n<td>24.0<\/td>\n<td>160.00<\/td>\n<td>1.0<\/td>\n<\/tr>\n<tr>\n<th>5<\/th>\n<td>24.0<\/td>\n<td>164.00<\/td>\n<td>0.0<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[7]:<\/div>\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\n<div class=\"CodeMirror cm-s-jupyter\">\n<div class=\"highlight hl-ipython3\">\n<pre><span class=\"c1\">##we can quickly fix things with a lambda function<\/span>\r\n<span class=\"c1\">## If you're adventurous, explore the preprocessing functions of sklearn!<\/span>\r\n\r\n<span class=\"n\">df2<\/span><span class=\"o\">=<\/span><span class=\"n\">dfq<\/span><span class=\"o\">.<\/span><span class=\"n\">apply<\/span><span class=\"p\">(<\/span><span class=\"k\">lambda<\/span> <span class=\"n\">x<\/span><span class=\"p\">:<\/span> <span class=\"p\">(<\/span><span class=\"n\">x<\/span> <span class=\"o\">-<\/span> <span class=\"n\">np<\/span><span class=\"o\">.<\/span><span class=\"n\">mean<\/span><span class=\"p\">(<\/span><span class=\"n\">x<\/span><span class=\"p\">))<\/span> <span class=\"o\">\/<\/span> <span class=\"p\">(<\/span><span class=\"n\">np<\/span><span class=\"o\">.<\/span><span class=\"n\">std<\/span><span class=\"p\">(<\/span><span class=\"n\">x<\/span><span class=\"p\">)))<\/span>\r\n\r\n<span class=\"n\">df2<\/span><span class=\"o\">.<\/span><span class=\"n\">head<\/span><span class=\"p\">()<\/span>\r\n<\/pre>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell-outputWrapper\">\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\n<div class=\"jp-OutputArea-child\">\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[7]:<\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\n<div>\n<p>.dataframe tbody tr th:only-of-type {<br \/>\nvertical-align: middle;<br \/>\n}<\/p>\n<p>.dataframe tbody tr th {<br \/>\nvertical-align: top;<br \/>\n}<\/p>\n<p>.dataframe thead th {<br \/>\ntext-align: right;<br \/>\n}<\/p>\n<table class=\"dataframe\">\n<thead>\n<tr>\n<th><\/th>\n<th>Age<\/th>\n<th>Height<\/th>\n<th>Coffee<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>0<\/th>\n<td>-0.732898<\/td>\n<td>0.613901<\/td>\n<td>-1.184843<\/td>\n<\/tr>\n<tr>\n<th>1<\/th>\n<td>-0.483831<\/td>\n<td>0.361372<\/td>\n<td>0.822101<\/td>\n<\/tr>\n<tr>\n<th>2<\/th>\n<td>-0.483831<\/td>\n<td>1.072062<\/td>\n<td>-1.184843<\/td>\n<\/tr>\n<tr>\n<th>4<\/th>\n<td>-0.483831<\/td>\n<td>-0.991465<\/td>\n<td>-0.181371<\/td>\n<\/tr>\n<tr>\n<th>5<\/th>\n<td>-0.483831<\/td>\n<td>-0.630708<\/td>\n<td>-1.184843<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[13]:<\/div>\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\n<div class=\"CodeMirror cm-s-jupyter\">\n<div class=\"highlight hl-ipython3\">\n<pre><span class=\"c1\">##Let's summarize again!<\/span>\r\n\r\n\r\n<span class=\"n\">df2<\/span><span class=\"o\">.<\/span><span class=\"n\">describe<\/span><span class=\"p\">()<\/span><span class=\"o\">.<\/span><span class=\"n\">round<\/span><span class=\"p\">(<\/span><span class=\"mi\">2<\/span><span class=\"p\">)<\/span>\r\n<\/pre>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell-outputWrapper\">\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\n<div class=\"jp-OutputArea-child\">\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[13]:<\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\n<div>\n<p>.dataframe tbody tr th:only-of-type {<br \/>\nvertical-align: middle;<br \/>\n}<\/p>\n<p>.dataframe tbody tr th {<br \/>\nvertical-align: top;<br \/>\n}<\/p>\n<p>.dataframe thead th {<br \/>\ntext-align: right;<br \/>\n}<\/p>\n<table class=\"dataframe\">\n<thead>\n<tr>\n<th><\/th>\n<th>Age<\/th>\n<th>Height<\/th>\n<th>Coffee<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>count<\/th>\n<td>1.480000e+02<\/td>\n<td>1.480000e+02<\/td>\n<td>1.480000e+02<\/td>\n<\/tr>\n<tr>\n<th>mean<\/th>\n<td>-2.160434e-16<\/td>\n<td>2.790561e-16<\/td>\n<td>4.800964e-17<\/td>\n<\/tr>\n<tr>\n<th>std<\/th>\n<td>1.003396e+00<\/td>\n<td>1.003396e+00<\/td>\n<td>1.003396e+00<\/td>\n<\/tr>\n<tr>\n<th>min<\/th>\n<td>-1.106500e+00<\/td>\n<td>-2.163923e+00<\/td>\n<td>-1.184843e+00<\/td>\n<\/tr>\n<tr>\n<th>25%<\/th>\n<td>-4.838307e-01<\/td>\n<td>-7.208974e-01<\/td>\n<td>-7.458238e-01<\/td>\n<\/tr>\n<tr>\n<th>50%<\/th>\n<td>-2.347631e-01<\/td>\n<td>-8.957364e-02<\/td>\n<td>-1.813708e-01<\/td>\n<\/tr>\n<tr>\n<th>75%<\/th>\n<td>2.633722e-01<\/td>\n<td>8.123175e-01<\/td>\n<td>8.221012e-01<\/td>\n<\/tr>\n<tr>\n<th>max<\/th>\n<td>7.735402e+00<\/td>\n<td>4.419882e+00<\/td>\n<td>2.829045e+00<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-MarkdownCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\"><\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput\" data-mime-type=\"text\/markdown\">\n<h2 id=\"Dummy-Variables\">Dummy Variables<a class=\"anchor-link\" href=\"#Dummy-Variables\">\u00b6<\/a><\/h2>\n<p>We will now deal with coffee!<\/p>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[9]:<\/div>\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\n<div class=\"CodeMirror cm-s-jupyter\">\n<div class=\"highlight hl-ipython3\">\n<pre><span class=\"c1\">## this created just the dummy:<\/span>\r\n<span class=\"n\">tattoo_d<\/span> <span class=\"o\">=<\/span> <span class=\"n\">pandas<\/span><span class=\"o\">.<\/span><span class=\"n\">get_dummies<\/span><span class=\"p\">(<\/span><span class=\"n\">df<\/span><span class=\"p\">[<\/span><span class=\"s1\">'Tattoo'<\/span><span class=\"p\">],<\/span> <span class=\"n\">prefix<\/span> <span class=\"o\">=<\/span> <span class=\"s1\">'Tattoo'<\/span><span class=\"p\">)<\/span>\r\n\r\n<span class=\"c1\">##look:<\/span>\r\n\r\n<span class=\"n\">tattoo_d<\/span><span class=\"o\">.<\/span><span class=\"n\">head<\/span><span class=\"p\">()<\/span>\r\n<\/pre>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell-outputWrapper\">\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\n<div class=\"jp-OutputArea-child\">\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[9]:<\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\n<div>\n<p>.dataframe tbody tr th:only-of-type {<br \/>\nvertical-align: middle;<br \/>\n}<\/p>\n<p>.dataframe tbody tr th {<br \/>\nvertical-align: top;<br \/>\n}<\/p>\n<p>.dataframe thead th {<br \/>\ntext-align: right;<br \/>\n}<\/p>\n<table class=\"dataframe\">\n<thead>\n<tr>\n<th><\/th>\n<th>Tattoo_No<\/th>\n<th>Tattoo_Yes<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>0<\/th>\n<td>0<\/td>\n<td>1<\/td>\n<\/tr>\n<tr>\n<th>1<\/th>\n<td>0<\/td>\n<td>1<\/td>\n<\/tr>\n<tr>\n<th>2<\/th>\n<td>1<\/td>\n<td>0<\/td>\n<\/tr>\n<tr>\n<th>4<\/th>\n<td>1<\/td>\n<td>0<\/td>\n<\/tr>\n<tr>\n<th>5<\/th>\n<td>1<\/td>\n<td>0<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[10]:<\/div>\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\n<div class=\"CodeMirror cm-s-jupyter\">\n<div class=\"highlight hl-ipython3\">\n<pre><span class=\"c1\">## Join things back up<\/span>\r\n<span class=\"c1\">## you can use any join method you like:  Concat also works.<\/span>\r\n\r\n<span class=\"n\">df3<\/span><span class=\"o\">=<\/span> <span class=\"n\">df2<\/span><span class=\"o\">.<\/span><span class=\"n\">join<\/span><span class=\"p\">(<\/span><span class=\"n\">tattoo_d<\/span><span class=\"p\">)<\/span>\r\n\r\n<span class=\"n\">df3<\/span>\r\n<\/pre>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell-outputWrapper\">\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\n<div class=\"jp-OutputArea-child\">\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[10]:<\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\n<div>\n<p>.dataframe tbody tr th:only-of-type {<br \/>\nvertical-align: middle;<br \/>\n}<\/p>\n<p>.dataframe tbody tr th {<br \/>\nvertical-align: top;<br \/>\n}<\/p>\n<p>.dataframe thead th {<br \/>\ntext-align: right;<br \/>\n}<\/p>\n<table class=\"dataframe\">\n<thead>\n<tr>\n<th><\/th>\n<th>Age<\/th>\n<th>Height<\/th>\n<th>Coffee<\/th>\n<th>Tattoo_No<\/th>\n<th>Tattoo_Yes<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>0<\/th>\n<td>-0.732898<\/td>\n<td>0.613901<\/td>\n<td>-1.184843<\/td>\n<td>0<\/td>\n<td>1<\/td>\n<\/tr>\n<tr>\n<th>1<\/th>\n<td>-0.483831<\/td>\n<td>0.361372<\/td>\n<td>0.822101<\/td>\n<td>0<\/td>\n<td>1<\/td>\n<\/tr>\n<tr>\n<th>2<\/th>\n<td>-0.483831<\/td>\n<td>1.072062<\/td>\n<td>-1.184843<\/td>\n<td>1<\/td>\n<td>0<\/td>\n<\/tr>\n<tr>\n<th>4<\/th>\n<td>-0.483831<\/td>\n<td>-0.991465<\/td>\n<td>-0.181371<\/td>\n<td>1<\/td>\n<td>0<\/td>\n<\/tr>\n<tr>\n<th>5<\/th>\n<td>-0.483831<\/td>\n<td>-0.630708<\/td>\n<td>-1.184843<\/td>\n<td>1<\/td>\n<td>0<\/td>\n<\/tr>\n<tr>\n<th>&#8230;<\/th>\n<td>&#8230;<\/td>\n<td>&#8230;<\/td>\n<td>&#8230;<\/td>\n<td>&#8230;<\/td>\n<td>&#8230;<\/td>\n<\/tr>\n<tr>\n<th>159<\/th>\n<td>-0.234763<\/td>\n<td>-0.901276<\/td>\n<td>0.822101<\/td>\n<td>1<\/td>\n<td>0<\/td>\n<\/tr>\n<tr>\n<th>160<\/th>\n<td>-0.608365<\/td>\n<td>0.271183<\/td>\n<td>-0.181371<\/td>\n<td>1<\/td>\n<td>0<\/td>\n<\/tr>\n<tr>\n<th>162<\/th>\n<td>-0.359297<\/td>\n<td>-0.089574<\/td>\n<td>-0.181371<\/td>\n<td>1<\/td>\n<td>0<\/td>\n<\/tr>\n<tr>\n<th>163<\/th>\n<td>0.014305<\/td>\n<td>0.631939<\/td>\n<td>-1.184843<\/td>\n<td>1<\/td>\n<td>0<\/td>\n<\/tr>\n<tr>\n<th>164<\/th>\n<td>-0.234763<\/td>\n<td>0.902507<\/td>\n<td>-0.181371<\/td>\n<td>1<\/td>\n<td>0<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>148 rows \u00d7 5 columns<\/p>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-MarkdownCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\"><\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput\" data-mime-type=\"text\/markdown\">\n<h2 id=\"Saving\">Saving<a class=\"anchor-link\" href=\"#Saving\">\u00b6<\/a><\/h2>\n<p>We will save our clean dataset as a new file &#8211; now we don&#8217;t need to use the old one.<\/p>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-MarkdownCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\"><\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput\" data-mime-type=\"text\/markdown\">\n<p>Strangely enough, we did LESS well with scaling! this is something we would want to explore in the future<\/p>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[11]:<\/div>\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\n<div class=\"CodeMirror cm-s-jupyter\">\n<div class=\"highlight hl-ipython3\">\n<pre><span class=\"c1\">## write!  It's nice to datestamp files, if you make multiple ones:<\/span>\r\n<span class=\"c1\">## you will need to create the folder \"clean_data\", if you dont' have one already<\/span>\r\n\r\n<span class=\"n\">today<\/span> <span class=\"o\">=<\/span> <span class=\"nb\">str<\/span><span class=\"p\">(<\/span><span class=\"n\">date<\/span><span class=\"o\">.<\/span><span class=\"n\">today<\/span><span class=\"p\">())<\/span>\r\n<span class=\"n\">filename<\/span> <span class=\"o\">=<\/span> <span class=\"s2\">\"clean_data\/small_survey_\"<\/span><span class=\"o\">+<\/span> <span class=\"n\">today<\/span><span class=\"o\">+<\/span><span class=\"s2\">\".csv\"<\/span>\r\n\r\n<span class=\"n\">df3<\/span><span class=\"o\">.<\/span><span class=\"n\">to_csv<\/span><span class=\"p\">(<\/span><span class=\"n\">filename<\/span><span class=\"p\">)<\/span>\r\n<span class=\"nb\">print<\/span><span class=\"p\">(<\/span><span class=\"s2\">\"saved to: \"<\/span><span class=\"p\">,<\/span> <span class=\"n\">filename<\/span><span class=\"p\">)<\/span>\r\n<\/pre>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell-outputWrapper\">\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\n<div class=\"jp-OutputArea-child\">\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\"><\/div>\n<div class=\"jp-RenderedText jp-OutputArea-output\" data-mime-type=\"text\/plain\">\n<pre>saved to:  clean_data\/small_survey2024-02-13.csv\r\n<\/pre>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[14]:<\/div>\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\n<div class=\"CodeMirror cm-s-jupyter\">\n<div class=\"highlight hl-ipython3\">\n<pre><span class=\"c1\">## In my next notbook, I can just pull up the clean data:<\/span>\r\n\r\n<span class=\"n\">dat<\/span> <span class=\"o\">=<\/span> <span class=\"n\">pandas<\/span><span class=\"o\">.<\/span><span class=\"n\">read_csv<\/span><span class=\"p\">(<\/span><span class=\"s2\">\"clean_data\/small_survey2024-02-13.csv\"<\/span><span class=\"p\">)<\/span>\r\n\r\n<span class=\"c1\">## If you are running this, you may need to change the date above.<\/span>\r\n\r\n<span class=\"nb\">print<\/span><span class=\"p\">(<\/span><span class=\"s2\">\"New Shape: \"<\/span><span class=\"p\">,<\/span> <span class=\"n\">dat<\/span><span class=\"o\">.<\/span><span class=\"n\">shape<\/span><span class=\"p\">)<\/span>\r\n\r\n<span class=\"n\">dat<\/span><span class=\"o\">.<\/span><span class=\"n\">head<\/span><span class=\"p\">()<\/span>\r\n<\/pre>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell-outputWrapper\">\n<div class=\"jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser\"><\/div>\n<div class=\"jp-OutputArea jp-Cell-outputArea\">\n<div class=\"jp-OutputArea-child\">\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\"><\/div>\n<div class=\"jp-RenderedText jp-OutputArea-output\" data-mime-type=\"text\/plain\">\n<pre>New Shape:  (148, 6)\r\n<\/pre>\n<\/div>\n<\/div>\n<div class=\"jp-OutputArea-child\">\n<div class=\"jp-OutputPrompt jp-OutputArea-prompt\">Out[14]:<\/div>\n<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult\" data-mime-type=\"text\/html\">\n<div>\n<p>.dataframe tbody tr th:only-of-type {<br \/>\nvertical-align: middle;<br \/>\n}<\/p>\n<p>.dataframe tbody tr th {<br \/>\nvertical-align: top;<br \/>\n}<\/p>\n<p>.dataframe thead th {<br \/>\ntext-align: right;<br \/>\n}<\/p>\n<table class=\"dataframe\">\n<thead>\n<tr>\n<th><\/th>\n<th>Unnamed: 0<\/th>\n<th>Age<\/th>\n<th>Height<\/th>\n<th>Coffee<\/th>\n<th>Tattoo_No<\/th>\n<th>Tattoo_Yes<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<th>0<\/th>\n<td>0<\/td>\n<td>-0.732898<\/td>\n<td>0.613901<\/td>\n<td>-1.184843<\/td>\n<td>0<\/td>\n<td>1<\/td>\n<\/tr>\n<tr>\n<th>1<\/th>\n<td>1<\/td>\n<td>-0.483831<\/td>\n<td>0.361372<\/td>\n<td>0.822101<\/td>\n<td>0<\/td>\n<td>1<\/td>\n<\/tr>\n<tr>\n<th>2<\/th>\n<td>2<\/td>\n<td>-0.483831<\/td>\n<td>1.072062<\/td>\n<td>-1.184843<\/td>\n<td>1<\/td>\n<td>0<\/td>\n<\/tr>\n<tr>\n<th>3<\/th>\n<td>4<\/td>\n<td>-0.483831<\/td>\n<td>-0.991465<\/td>\n<td>-0.181371<\/td>\n<td>1<\/td>\n<td>0<\/td>\n<\/tr>\n<tr>\n<th>4<\/th>\n<td>5<\/td>\n<td>-0.483831<\/td>\n<td>-0.630708<\/td>\n<td>-1.184843<\/td>\n<td>1<\/td>\n<td>0<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<div class=\"jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs\">\n<div class=\"jp-Cell-inputWrapper\">\n<div class=\"jp-Collapser jp-InputCollapser jp-Cell-inputCollapser\"><\/div>\n<div class=\"jp-InputArea jp-Cell-inputArea\">\n<div class=\"jp-InputPrompt jp-InputArea-prompt\">In\u00a0[\u00a0]:<\/div>\n<div class=\"jp-CodeMirrorEditor jp-Editor jp-InputArea-editor\" data-type=\"inline\">\n<div class=\"CodeMirror cm-s-jupyter\">\n<div class=\"highlight hl-ipython3\">\n<pre> \r\n<\/pre>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n","protected":false},"author":883,"menu_order":11,"template":"","meta":{"pb_show_title":"on","pb_short_title":"","pb_subtitle":"","pb_authors":[],"pb_section_license":""},"chapter-type":[],"contributor":[],"license":[],"class_list":["post-430","chapter","type-chapter","status-publish","hentry"],"part":98,"_links":{"self":[{"href":"https:\/\/pressbooks.bccampus.ca\/businessanalytics\/wp-json\/pressbooks\/v2\/chapters\/430","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/pressbooks.bccampus.ca\/businessanalytics\/wp-json\/pressbooks\/v2\/chapters"}],"about":[{"href":"https:\/\/pressbooks.bccampus.ca\/businessanalytics\/wp-json\/wp\/v2\/types\/chapter"}],"author":[{"embeddable":true,"href":"https:\/\/pressbooks.bccampus.ca\/businessanalytics\/wp-json\/wp\/v2\/users\/883"}],"version-history":[{"count":1,"href":"https:\/\/pressbooks.bccampus.ca\/businessanalytics\/wp-json\/pressbooks\/v2\/chapters\/430\/revisions"}],"predecessor-version":[{"id":431,"href":"https:\/\/pressbooks.bccampus.ca\/businessanalytics\/wp-json\/pressbooks\/v2\/chapters\/430\/revisions\/431"}],"part":[{"href":"https:\/\/pressbooks.bccampus.ca\/businessanalytics\/wp-json\/pressbooks\/v2\/parts\/98"}],"metadata":[{"href":"https:\/\/pressbooks.bccampus.ca\/businessanalytics\/wp-json\/pressbooks\/v2\/chapters\/430\/metadata\/"}],"wp:attachment":[{"href":"https:\/\/pressbooks.bccampus.ca\/businessanalytics\/wp-json\/wp\/v2\/media?parent=430"}],"wp:term":[{"taxonomy":"chapter-type","embeddable":true,"href":"https:\/\/pressbooks.bccampus.ca\/businessanalytics\/wp-json\/pressbooks\/v2\/chapter-type?post=430"},{"taxonomy":"contributor","embeddable":true,"href":"https:\/\/pressbooks.bccampus.ca\/businessanalytics\/wp-json\/wp\/v2\/contributor?post=430"},{"taxonomy":"license","embeddable":true,"href":"https:\/\/pressbooks.bccampus.ca\/businessanalytics\/wp-json\/wp\/v2\/license?post=430"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}