diff --git a/DAKD2020_ex3_Elias_Ervela.ipynb b/DAKD2020_ex3_Elias_Ervela.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..c7884966ec90219bd4477c56a406634b1d054f53
--- /dev/null
+++ b/DAKD2020_ex3_Elias_Ervela.ipynb
@@ -0,0 +1 @@
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"DAKD2020_ex3_Elias_Ervela.ipynb","provenance":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.3"},"toc":{"base_numbering":1,"nav_menu":{"height":"180px","width":"160px"},"number_sections":true,"sideBar":true,"skip_h1_title":true,"title_cell":"Table of Contents","title_sidebar":"Contents","toc_cell":false,"toc_position":{},"toc_section_display":true,"toc_window_display":false}},"cells":[{"cell_type":"markdown","metadata":{"id":"o8Yz6wcFl0SZ"},"source":["Elias Ervelä <br>\n","student number 518434 <br>\n","emerve@utu.fi  <br>\n","December, 4, 2020  <br>"]},{"cell_type":"markdown","metadata":{"id":"rMMbxwZ7l0SZ"},"source":["# Data Analysis and Knowledge Discovery: Exercise 3, Supervised learning"]},{"cell_type":"markdown","metadata":{"id":"ND3O3s6Wl0SZ"},"source":["This is the template for the third exercise. The idea of this exercise is to apply supervised learning to predict the ship type using certain attributes (speed, destination harbour...) and K nearest neighbors (kNN) classifier. The data is available in Moodle course page: shipdata_2020.xlsx. <br> \n","\n","General guidance for exercises is given in Moodle course page. <br>\n","\n"," - answer all the questions below\n"," - write easily readable code, include explanations what your code does\n"," - make informative illustrations: include labels for x- and y-axes, legends and captions for your plots\n"," - do not change anything manually or outside the script in the data file\n"," - before saving the ipynb file (and possible printing) run: \"Restart & Run all\", to make sure you return a file that works as expected\n"," - name your file as DAKD2020_ex3_firstname_lastname.ipynb\n"," - +1 bonus point requires a correct solution and also thorough analysis. Discuss also how the results could be improved\n"," - if you encounter problems, Google first. If you can't find an answer to the problem, don't hesitate to ask in the Moodle discussion or directly: pekavir@utu.fi\n"," - Note! Don't leave it to the last moment! No feedback service during the weekend\n"," - The deadline is **Friday 4th of December 23:59**"]},{"cell_type":"markdown","metadata":{"id":"tjQo2BLJl0SZ"},"source":["## Data import"]},{"cell_type":"markdown","metadata":{"id":"Hkfm5UhCl0SZ"},"source":["Gather *all* packages needed for this notebook here:"]},{"cell_type":"code","metadata":{"id":"_HNy2B6Hl0SZ"},"source":["import numpy as np\n","import pandas as pd\n","import matplotlib.pyplot as plt"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"FFHDDGrIl0Sa"},"source":["Import the data."]},{"cell_type":"markdown","metadata":{"id":"E0T5KP-ACagm"},"source":["Lets import the data from my google drive.\n","I used this as a help: https://buomsoo-kim.github.io/colab/2018/04/16/Importing-files-from-Google-Drive-in-Google-Colab.md/\n"]},{"cell_type":"code","metadata":{"id":"iaEpSNSw_IoA"},"source":["from pydrive.auth import GoogleAuth\n","from pydrive.drive import GoogleDrive\n","from google.colab import auth\n","from oauth2client.client import GoogleCredentials"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"74B_BhOPAb2a"},"source":["auth.authenticate_user()\n","gauth = GoogleAuth()\n","gauth.credentials = GoogleCredentials.get_application_default()\n","drive = GoogleDrive(gauth)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"50oqP3boAm6b"},"source":["downloaded = drive.CreateFile({'id':\"1Pkdj3ZSe_ipq31Z7EY2Lpgwdb7oebEjl\"})   # replace the id with id of file you want to access\n","downloaded.GetContentFile('shipdata_2020.xlsx')        # replace the file name with your file"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Zo4R468vA0cs"},"source":["data = pd.read_excel('shipdata_2020.xlsx')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":419},"id":"XP3Pv_M_C2rx","executionInfo":{"status":"ok","timestamp":1607101253685,"user_tz":-120,"elapsed":2176,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"b2b11437-4802-4ae8-9707-dc6bc358a7a7"},"source":["data"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>MMSI</th>\n","      <th>Speed</th>\n","      <th>COG</th>\n","      <th>Destination</th>\n","      <th>Ship_type</th>\n","      <th>Gross_tonnage</th>\n","      <th>Length</th>\n","      <th>Breadth</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>212209000</td>\n","      <td>10.1377</td>\n","      <td>64.3074</td>\n","      <td>Hamina</td>\n","      <td>Cargo</td>\n","      <td>3416</td>\n","      <td>94.91</td>\n","      <td>15.34</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>212436000</td>\n","      <td>13.5256</td>\n","      <td>77.0755</td>\n","      <td>Hamina</td>\n","      <td>Tanker</td>\n","      <td>6280</td>\n","      <td>116.90</td>\n","      <td>18.00</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>219082000</td>\n","      <td>9.9416</td>\n","      <td>74.6762</td>\n","      <td>Hamina</td>\n","      <td>Tanker</td>\n","      <td>9980</td>\n","      <td>141.20</td>\n","      <td>21.90</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>219083000</td>\n","      <td>11.6038</td>\n","      <td>74.7529</td>\n","      <td>Hamina</td>\n","      <td>Tanker</td>\n","      <td>9980</td>\n","      <td>141.20</td>\n","      <td>21.60</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>219426000</td>\n","      <td>11.9203</td>\n","      <td>56.3253</td>\n","      <td>Hamina</td>\n","      <td>Tanker</td>\n","      <td>3219</td>\n","      <td>99.90</td>\n","      <td>15.00</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>129</th>\n","      <td>273374820</td>\n","      <td>10.0396</td>\n","      <td>74.6253</td>\n","      <td>Vysotsk</td>\n","      <td>Tanker</td>\n","      <td>4979</td>\n","      <td>139.90</td>\n","      <td>16.70</td>\n","    </tr>\n","    <tr>\n","      <th>130</th>\n","      <td>273385070</td>\n","      <td>9.3507</td>\n","      <td>74.5454</td>\n","      <td>Vysotsk</td>\n","      <td>Tanker</td>\n","      <td>4979</td>\n","      <td>139.90</td>\n","      <td>16.94</td>\n","    </tr>\n","    <tr>\n","      <th>131</th>\n","      <td>273388150</td>\n","      <td>9.7668</td>\n","      <td>68.7159</td>\n","      <td>Vysotsk</td>\n","      <td>Tanker</td>\n","      <td>5075</td>\n","      <td>140.85</td>\n","      <td>16.86</td>\n","    </tr>\n","    <tr>\n","      <th>132</th>\n","      <td>636092755</td>\n","      <td>11.1554</td>\n","      <td>73.7013</td>\n","      <td>Vysotsk</td>\n","      <td>Tanker</td>\n","      <td>23240</td>\n","      <td>183.00</td>\n","      <td>27.37</td>\n","    </tr>\n","    <tr>\n","      <th>133</th>\n","      <td>357100000</td>\n","      <td>11.2703</td>\n","      <td>59.3888</td>\n","      <td>Vysotsk</td>\n","      <td>Cargo</td>\n","      <td>43717</td>\n","      <td>229.04</td>\n","      <td>32.31</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>134 rows × 8 columns</p>\n","</div>"],"text/plain":["          MMSI    Speed      COG  ... Gross_tonnage  Length  Breadth\n","0    212209000  10.1377  64.3074  ...          3416   94.91    15.34\n","1    212436000  13.5256  77.0755  ...          6280  116.90    18.00\n","2    219082000   9.9416  74.6762  ...          9980  141.20    21.90\n","3    219083000  11.6038  74.7529  ...          9980  141.20    21.60\n","4    219426000  11.9203  56.3253  ...          3219   99.90    15.00\n","..         ...      ...      ...  ...           ...     ...      ...\n","129  273374820  10.0396  74.6253  ...          4979  139.90    16.70\n","130  273385070   9.3507  74.5454  ...          4979  139.90    16.94\n","131  273388150   9.7668  68.7159  ...          5075  140.85    16.86\n","132  636092755  11.1554  73.7013  ...         23240  183.00    27.37\n","133  357100000  11.2703  59.3888  ...         43717  229.04    32.31\n","\n","[134 rows x 8 columns]"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"markdown","metadata":{"id":"4RT-aufBl0Sa"},"source":["## Data preprocessing"]},{"cell_type":"markdown","metadata":{"id":"xYkpfGTul0Sa"},"source":[" - First, find out how many different destinations there are in the data. Do you need to make any preprocessing? **1p**\n"," - Destination harbor is a categorical variable. It needs to be converted into numerical. Explain, why do you need to make this step? You can use get_dummies from pandas to implement onehot coding for categorical features **1p**\n"," - Plot Gross tonnage versus the ship Length. Use different colors for different ship types. According to the plot, there is one clear outlier. Find the correct value from marinetraffic.com, and make the correction **1p**\n"," - It is good to exploit domain knowledge and make some reasonable transformation to the feature values to improve the expected results and/or to avoid redundancy. Find out what gross tonnage means. Make some transformation to Length values to acquire a linear relationship between the transformed length and Gross tonnage values **1p**\n"," - The numerical variables have quite different ranges. To ensure that all variables can have the same importance on the model, perform Z-score standardization. Perform it for speed, transformed length, and breadth **1p**"]},{"cell_type":"markdown","metadata":{"id":"KF1p6H4uHvoS"},"source":["**First, find out how many different destinations there are in the data. Do you need to make any preprocessing? 1p**"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"G1OaInNuDRDs","executionInfo":{"status":"ok","timestamp":1607101253686,"user_tz":-120,"elapsed":2170,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"5c7794c0-d083-417e-fb9e-d918bf7d2d26"},"source":["data['Destination'].unique()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['Hamina', 'Helsinki', 'Kotka', 'Kronshtadt', 'Kunda', 'Muuga',\n","       'Paldiski', 'Porvoo', 'Primorsk', 'Sillamäe', 'Sillamae',\n","       'Tallinn', 'Ust-Luga', 'Valko-Loviisa', 'Viipuri', 'Vuosaari',\n","       'Vysotsk'], dtype=object)"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UmxxxrSrDvxF","executionInfo":{"status":"ok","timestamp":1607101253686,"user_tz":-120,"elapsed":2162,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"b896b3f7-db13-4622-8e31-4b1fc0ffc662"},"source":["# There is Sillamäe and Sillamae that most likely mean the same place. Lets change them all to Sillamae\n","\n","data[data['Destination'] == 'Sillamäe']['Destination']"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["76    Sillamäe\n","77    Sillamäe\n","Name: Destination, dtype: object"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"code","metadata":{"id":"YqK6qBgdFK5e"},"source":["data.loc[76:77,['Destination']] = \"Sillamae\""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"C-s1rX5uG0p9","executionInfo":{"status":"ok","timestamp":1607101401738,"user_tz":-120,"elapsed":668,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"453740b7-958e-4401-ebee-dfd4ce7bcc19"},"source":["# Lets check that worked\n","data['Destination'].unique()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['Hamina', 'Helsinki', 'Kotka', 'Kronshtadt', 'Kunda', 'Muuga',\n","       'Paldiski', 'Porvoo', 'Primorsk', 'Sillamae', 'Tallinn',\n","       'Ust-Luga', 'Valko-Loviisa', 'Viipuri', 'Vuosaari', 'Vysotsk'],\n","      dtype=object)"]},"metadata":{"tags":[]},"execution_count":37}]},{"cell_type":"markdown","metadata":{"id":"YLr0Tsu5H22J"},"source":["**Destination harbor is a categorical variable. It needs to be converted into numerical. Explain, why do you need to make this step? You can use get_dummies from pandas to implement onehot coding for categorical features 1p**\n"]},{"cell_type":"markdown","metadata":{"id":"6cMX_a4Xtbyk"},"source":["Because then we can do numerical opertaions on destinations."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":419},"id":"ld0zFjIaH4mC","executionInfo":{"status":"ok","timestamp":1607101254203,"user_tz":-120,"elapsed":2657,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"6a368371-0688-48f4-f273-432ff9a0055d"},"source":["dest = pd.get_dummies(data['Destination'])\n","dest"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Hamina</th>\n","      <th>Helsinki</th>\n","      <th>Kotka</th>\n","      <th>Kronshtadt</th>\n","      <th>Kunda</th>\n","      <th>Muuga</th>\n","      <th>Paldiski</th>\n","      <th>Porvoo</th>\n","      <th>Primorsk</th>\n","      <th>Sillamae</th>\n","      <th>Tallinn</th>\n","      <th>Ust-Luga</th>\n","      <th>Valko-Loviisa</th>\n","      <th>Viipuri</th>\n","      <th>Vuosaari</th>\n","      <th>Vysotsk</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>129</th>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","    </tr>\n","    <tr>\n","      <th>130</th>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","    </tr>\n","    <tr>\n","      <th>131</th>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","    </tr>\n","    <tr>\n","      <th>132</th>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","    </tr>\n","    <tr>\n","      <th>133</th>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>134 rows × 16 columns</p>\n","</div>"],"text/plain":["     Hamina  Helsinki  Kotka  ...  Viipuri  Vuosaari  Vysotsk\n","0         1         0      0  ...        0         0        0\n","1         1         0      0  ...        0         0        0\n","2         1         0      0  ...        0         0        0\n","3         1         0      0  ...        0         0        0\n","4         1         0      0  ...        0         0        0\n","..      ...       ...    ...  ...      ...       ...      ...\n","129       0         0      0  ...        0         0        1\n","130       0         0      0  ...        0         0        1\n","131       0         0      0  ...        0         0        1\n","132       0         0      0  ...        0         0        1\n","133       0         0      0  ...        0         0        1\n","\n","[134 rows x 16 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"markdown","metadata":{"id":"Cbx6CGM0Mh1l"},"source":["**Plot Gross tonnage versus the ship Length. Use different colors for different ship types. According to the plot, there is one clear outlier. Find the correct value from marinetraffic.com, and make the correction 1p**\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ps4UpUjLOw6B","executionInfo":{"status":"ok","timestamp":1607101254205,"user_tz":-120,"elapsed":2645,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"69ca3ebf-e09a-410e-9441-8bd0fdd75de5"},"source":["# Lets find out the different types of ships\n","\n","data['Ship_type'].unique()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['Cargo', 'Tanker', 'Tug'], dtype=object)"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":265},"id":"kpIheTPJMjgM","executionInfo":{"status":"ok","timestamp":1607101254205,"user_tz":-120,"elapsed":2637,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"880f1d6c-1709-4c8d-a219-51e09da9b094"},"source":["# Plot\n","\n","plt.plot(data[data['Ship_type']=='Cargo']['Gross_tonnage'], data[data['Ship_type']=='Cargo']['Length'], 'o', color = 'r', label = 'Cargo')\n","plt.plot(data[data['Ship_type']=='Tanker']['Gross_tonnage'], data[data['Ship_type']=='Tanker']['Length'], 'o', color = 'b', label = 'Tanker')\n","plt.plot(data[data['Ship_type']=='Tug']['Gross_tonnage'], data[data['Ship_type']=='Tug']['Length'], 'o', color = 'g', label = 'Tug')\n","plt.legend()\n","plt.show()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de3RU9d3v8fc3F0wDLtRAEcUk2MeiUAQxWlpcVQs+Vo7Xumx1jQra0xyEp14eu07V9Dm1y5X22NPnYo+FNh4v1E5tPV4eldpWZWHr7VQTRW7WW00CLARMhYpZSEh+54+9k8wkM5PJZC579nxea82aPb/Ze+YLmfnml9/vu3/bnHOIiEi4lBU6ABERyT4ldxGREFJyFxEJISV3EZEQUnIXEQmhikIHADBp0iRXX19f6DBERIpKW1vbB865yYmeC0Ryr6+vp7W1tdBhiIgUFTPrSPachmVEREJIyV1EJISU3EVEQigQY+6J9PT0sG3bNvbv31/oUAKhqqqKadOmUVlZWehQRKQIBDa5b9u2jUMPPZT6+nrMrNDhFJRzjq6uLrZt28b06dMLHY6IFIHADsvs37+fmpqakk/sAGZGTU2N/ooRCZFoFOrroazMu49Gs/v6ge25A0rsMfR/IRIe0Sg0NkJ3t/e4o8N7DBCJZOc9AttzFxEJq6amwcTer7vba88WJfcRvP/++1x66aV85jOf4eSTT2bx4sW89dZbhQ5LRIpYZ+fo2jMRnuSegwEs5xwXXXQRZ5xxBu+++y5tbW388Ic/ZOfOnWkd29fXN+YYRCR8amtH156JcCT3/gGsjg5wbnAAa4wJft26dVRWVrJs2bKBtjlz5nDSSSexcOFC5s2bx+zZs3nssccAaG9vZ8aMGVx55ZV87nOfY+vWrdx2223MmDGD0047jcsuu4wf//jHAKxfv5758+dz4oknctFFF/Hhhx+OKVYRKR7NzVBdHd9WXe21Z0s4knuOBrA2bdrEySefPKy9qqqKRx99lFdffZV169Zx44030n+5wrfffpvly5ezefNmdu3axcMPP8zrr7/O7373u7j1c6688kpuv/12NmzYwOzZs/n+978/plhFpHhEItDSAnV1YObdt7RkbzIVAl4tk7Z8DGDFcM5xyy238Kc//YmysjK2b98+MFRTV1fH/PnzAXjhhRe44IILqKqqoqqqivPOOw+AvXv3smfPHk4//XQAlixZwiWXXJKTWEUkmCKR7CbzocLRc8/RANasWbNoa2sb1h6NRtm9ezdtbW2sX7+eKVOmDNSgjx8/fkzvKSKSDeFI7jkawPryl7/MJ598QktLy0Dbhg0b6Ojo4NOf/jSVlZWsW7eOjo7Eq24uWLCAJ554gv3797Nv3z7WrFkDwMSJEzn88MN57rnnALj//vsHevEiItkQjmGZ/r9tmpq8oZjaWi+xj/FvHjPj0Ucf5frrr+f222+nqqqK+vp6br31Vq699lpmz55NQ0MDxx9/fMLjTznlFM4//3xOPPFEpkyZwuzZs5k4cSIAq1evZtmyZXR3d3Psscdy7733jilWEZFY1j8RWEgNDQ1u6MU63njjDU444YQCRZQ9+/btY8KECXR3d/OlL32JlpYW5s2bl9FrheX/RESyw8zanHMNiZ4LR889wBobG9myZQv79+9nyZIlGSd2EZHRUHLPsV/96leFDkFESlA4JlRFpGByvbqhZEY9dxHJWD5WN5TMqOcuIhnLx+qGkhkldxHJWJ5PDpdRGDG5m9kxZrbOzLaY2WYzu85vv9XMtpvZev+2OOaYm83sHTN708zOzuU/IFe6urqYO3cuc+fO5cgjj+Too48eeHzgwIG0XuPZZ5/l3HPPzXGkIoWTj9UNJTPp9NwPAjc652YC84EVZjbTf+7fnXNz/duTAP5zlwKzgK8AK82sPAexx8n2pE5NTQ3r169n/fr1LFu2jBtuuGHg8bhx47IRckoHDx7M+XuIjFWik8MBurq87+CiRd7CWP23qqrE381k319N1mZuxOTunNvhnHvV3/4IeAM4OsUhFwC/ds594px7D3gHODUbwSaToxV/h7nrrrs45ZRTmDNnDhdffDHd/mDj0qVLufbaa/niF7/Isccey0MPPTTs2FdeeYWTTjppYF34008/nZNPPpmzzz6bHTt2AHDGGWdw/fXX09DQwB133JHd4EVyIBKBJUuGt+/bB5dfDmvXxrd/8glccUX8dzPZ93f58vx8r8NqVGPuZlYPnAT82W/6JzPbYGb3mNnhftvRwNaYw7aR+pfBmOVrUuerX/0qr7zyCq+//jonnHACd99998BzO3bs4Pnnn2fNmjXcdNNNcce9+OKLLFu2jMcee4za2lq+9a1v8dBDD9HW1sbVV19NU0ygBw4coLW1lRtvvDG7wYvkyJNPjm5/5+K/m8m+vy0tmqwdi7RLIc1sAvAwcL1z7u9mtgq4DXD+/b8CV4/i9RqBRoDaMQ7Q5WtSZ9OmTXz3u99lz5497Nu3j7PPHpxOuPDCCykrK2PmzJlxV2p64403aGxs5KmnnuKoo45i06ZNbNq0ibPOOguA3t5epk6dOrD/17/+9ewGLZJjmXzPYo9Jdnxvb/berxSl1XM3s0q8xB51zj0C4Jzb6Zzrdc71AXcxOPSyHTgm5vBpflsc51yLc67BOdcwefLksfwb8japs3TpUu688042btzI9773vYFlfgEOOeSQge3Y9XqmTp1KVVUVr7322sBzs2bNGhi/37hxI0899dTA/loyWAopkzHuTL5nscckO748yUydJmvTk061jAF3A2845/4tpn1qzG4XAZv87ceBS83sEDObDhwHvJy9kIfLxyWrAD766COmTp1KT08P0TQH/g477DB++9vfcvPNN/Pss88yY8YMdu/ezUsvvQRAT08Pmzdvzm6gIhnIdO6quRkqK9N/H7P472ay729jY36+12GVTs99AXAF8OUhZY8/MrONZrYBOBO4AcA5txl4ENgC/B5Y4ZxL8gdWduTjklUAt912G5///OdZsGBB0mV+E5kyZQpr1qxhxYoVvPbaazz00EN85zvfYc6cOcydO5cXX3wxu4GKZCDTuatIBO69F2pqRn6PQw6B+++P/24m+/6uXJmf73VYacnfIqL/E8mlsjKvxz6UGfT1pf86Q5ckAK/HrcScfamW/NUZqiICZG/uKl9/SUtqSu4iAmR37ioSgfZ2r8ff3q7EXghK7iICqMcdNlryV0QGRCJK5mGhnruISAgpuYuIhJCGZZLo6upi4cKFALz//vuUl5fTfybtyy+/nJeVIUVEMhWa5B7dGKVpbROdezupnVhL88JmIrMzHzzsX/IX4NZbb2XChAl8+9vfzla4IiI5FYphmejGKI1PNNKxtwOHo2NvB41PNBLdmN21QZcuXRq3nO+ECRMA6OvrY/ny5Rx//PGcddZZLF68OOGyvxJiiRZl0WLkUkCh6Lk3rW2iuyf+vOnunm6a1jaNqfeerkceeYT29na2bNnCrl27OOGEE7j66rQXyJRil+gq0Vdf7Z3u2dMz2KYrR0sehaLn3rk38Rqgydqz7fnnn+eSSy6hrKyMI488kjPPPDMv7ysBkWhRlgMHBhN7Py1GLnkUiuReOzHx+dHJ2jNVUVFBn7/IRl9fX9rXUpWQG80C41qMXPIkFMm9eWEz1ZXx501XV1bTvDC7a4PW19fT1tYGwOOPP06P3zNbsGABDz/8MH19fezcuZNnn302q+8rATeaxVe0GLnkSSiSe2R2hJbzWqibWIdh1E2so+W8lqyPt3/zm9/kj3/8I3PmzOGll14auLDGxRdfzLRp05g5cyaXX3458+bNY+LEiVl9bwmwRIuyjBs3fJFzLUYueaQlf7Nk3759TJgwga6uLk499VReeOEFjjzyyKy+R7H9n5SUaNQbT+/s9Hrn/Ul8aJsmUyWLUi35G4pqmSA499xz2bNnDwcOHOBf/uVfsp7YJeCSLcqiZC4FouSeJRpnF5EgCfSYexCGjIJC/xciMhqBTe5VVVV0dXUpqeEl9q6uLqqqqgodiogUicAOy0ybNo1t27axe/fuQocSCFVVVUybNq3QYYhIkQhscq+srGT69OmFDkNEpCgFdlhGREQyp+QuIhJCSu4iIiGk5C4iEkJK7iIiIaTkLiISQkruIiIhpOQuIhJCSu4iIiE0YnI3s2PMbJ2ZbTGzzWZ2nd9+hJk9bWZv+/eH++1mZj8xs3fMbIOZzcv1P0JkmGgU6uuhrMy7j0YLHZFIXqXTcz8I3OicmwnMB1aY2UzgJmCtc+44YK3/GOAc4Dj/1gisynrUIqlEo9DYCB0d4Jx339ioBC8lZcTk7pzb4Zx71d/+CHgDOBq4AFjt77YauNDfvgD4hfP8P+AwM5ua9chFkmlqgu7u+Lbubq9dpESMaszdzOqBk4A/A1Occzv8p94HpvjbRwNbYw7b5rcNfa1GM2s1s1at/Fjasj6C0tk5unaREEo7uZvZBOBh4Hrn3N9jn3PeouujWnjdOdfinGtwzjVMnjx5NIdKiESjsHRp/AjK0qVjTPC1taNrFwmhtJK7mVXiJfaoc+4Rv3ln/3CLf7/Lb98OHBNz+DS/TWSYZcvg4MH4toMHvfaMNTdDdXV8W3X14EWrRUpAOtUyBtwNvOGc+7eYpx4HlvjbS4DHYtqv9Ktm5gN7Y4ZvpNjkuOpk377RtaclEoGWFqirAzPvvqVFF6uWkmIjXcbOzE4DngM2An1+8y144+4PArVAB/A159zf/F8GdwJfAbqBq5xzraneo6GhwbW2ptxFCqG/6iR2crK6OquJ0iz5c7rCokhqZtbmnGtI+FwQrlGq5B5Q9fXeIPhQdXXQ3p6VtygrS5zEzaCvb3i7iAxKldx1hqokl4eqk2Rj62MacxcRJXdJIQ9VJytXwjXXQHm597i83Hu8cmXW3kKkJCm5S3J5qjpZudKrkHHOu1diFxk7JXcZZqBA5ooIk6yLSWVdlNFLfflWokv+oKoTkSKg5C5xolG46qrBk4q6Pq6iq+8IHGV09E6jcfVpI1ZDpqqejEZh0iRvwtTM29aSLyLZp2oZiTNpEnR1pd4nVbFMqupJ8H5x9PTEHzNuHNxzj/4gEBktlUJK2lLVncfuk6xMMVn1ZE0N7NkDvb2Jj8tidaVIyVApZFgVaM3yVMUyyaoku7qSJ/ZUx4lIZpTci1WO1iyvqUn9/EjFMplWSWpNL5HsUnIvVjlas/yOO7wx8ETSWaIlUfXkSMaN05peItmm5F6scnT2aCTiTW7Grrn1y196fxy0t4886Zloza5Ufw3U1GgyVSQXNKFarPKw7ku25GH9MZGSpAnVMCqiNcu1Aq9I/lUUOgDJUH9mbGryhmJqa73EHtCMGYkENjSRUFLPvUgkrHqMRIg2t1Nf20dZZzv1TRGd7SkigJJ7UUhW9bh80V9ovKI729WQ2VegenyRUqYJ1SKQbO60nIP0JhhZC9ScqmZTRXJGE6rFyu/xdnYkPte/l/KE7YE62zNH9fgikpqSe1DFjMXUkjhbl5P4fP5Ane2Zh6s5ichwSu5BFdPjbeYWqvk47ulq66aRnyVsD1Q1ZB6u5iQiwym5B1VMzzbCA7TwTepox+jz6sSXvcrK6u/Et1snLcteDdZQdhHV44uEiZJ7UA3p2UZ4gHam02cVtDdHiaw8DVpaiNS9SLsdS1/dsTQv66TpydOCVZSiM5hECkLVMkEVjcIVV3g1jkMlKIdRUYpI6VG1TKENrfNevnzkuu9IJHFih4STkSpKEZFYWn4g14Z2qTs6YNWqwef7zzyC4V3surrEBe4JJiNVlCIisdRzz7VEXeqhknWxRzEZqaIUEYml5J5r6XadE+03islIFaWISCwl91xLt+ucbL9IxJs87etLebUMFaWISCwl91zpn0Tt6PCybSpZ6mKn+XtARErAiMndzO4xs11mtimm7VYz225m6/3b4pjnbjazd8zsTTM7O1eBF0LaixvGLuMIXtVLf4Kvq4NrrlEXW0RyKp1qmfuAO4FfDGn/d+fcj2MbzGwmcCkwCzgKeMbMPuucS7wIShFJVPSSrMgl4SSqcwFbrlFEwmzEnrtz7k/A39J8vQuAXzvnPnHOvQe8A5w6hvgCY1R15KpLFJECG8uY+z+Z2QZ/2OZwv+1oYGvMPtv8tmHMrNHMWs2sdffu3WMIIz9Gla9VlygiBZZpcl8FfAaYC+wA/nW0L+Cca3HONTjnGiZPnpxhGPkzqnydpC5x0fgXMGPgtmhR1sMUEQEyTO7OuZ3OuV7nXB9wF4NDL9uBY2J2nea3Fb1R1ZEnqEtcVP8Wa7fE/xGzdq0SvIjkRkbJ3cymxjy8COivpHkcuNTMDjGz6cBxwMtjCzEYRl1HPqQucWhi77d2beEXbhOR8EmnFPIB4CVghpltM7NvAD8ys41mtgE4E7gBwDm3GXgQ2AL8HlhRDJUy6ZY4Jqojjz320EO9ezOoqPDWB0s7ABGRLCr5JX+jUVi6FA4eHGyrqID77hu59DzRMrtDXXMNrFyZ6jwmh6ubrhJJERm1VEv+lnxyP/RQ2LdvePuECfDRR6mP7T8BNZXycu8Xx6JF/UMwsVnesZCneMbO8f4cEBEZBa3nnkKixJ6qfUA0SmfHyAm51x+UeuYZWDjuOcAN3BbyFM/wFZVIikjWlXxyz0g0ClddRS0jn5RUXj64/cw9W3HVE3CU4SjzEruWbhSRHCj55J5sLNzoSz7Let110NPDYtZ4+6XQv0QBoKUbRSRvSj65L1uWqNWxrOwub0DducGFZPoTfFcXUS5jNVfh4v4LB+cvyssHJ1PjaOlGEcmDkp9QBa9ksaXFGx8vL4fGqtWs/Hjp8B37F/4yo5736KA+6S4iIrmmCdURrFzpVbQ4592v7L4q4X7RjgXeSA29dFCXcB+tDSYiQVDSyT3pyUsJqleiXEaj3eWN1FBGfEnjIBW+iEgQlGxyj72exrBh9QQLyTTZ/6TbVSd+MZ8KX0QkKEo2uadcnz1BVUunOybh64AKX0QkeEo2uY+4PvuQqpbausTDMHV1aRS+pH19PhGR7CjZ5J5qffbly731ZWIXABvVkr+xUo7/iIjkRskm98WLh5/AVF0N/zB+O6tWuYFlA3p7YdUqeOGFDM8/GtX1+UREsiOdC2SHTjQKq1d7Hel+ZrDkC3+hZe0/kKgSZtUqePJJr6c+qnF1XU9VRAqgJHvuiTrTzsGTz06gl/LEB5HhiIqupyoiBVCSyT1Zp7mj92jKRlgrprsbLr98FPOiGQ/Wi4hkriSTe/JOswG9xK4Rk0zavXgtFiYiBVCSa8uMdAWlcXRzgE+R7CzUWFpLRkQKpXTXlklSX97fmU4m3cQOmhcVkWAKb3Ifob48EoG6mpEutzQyzYuKSBCFN7mnqi/3e/TNXY1U83HcLtV8TA0fpPUWmhcVkaAKb5170pKYjoEB9wje1a2b+AGd1FJLJ83cAsBV3EMPVSnfQvOiIhJU4U3utbVeIh+qvDyuRx/hASI8MHyfMxZx3drz6aKGROPvdXVK7CISXOEdlklWX96/rsAQUS6jnvcoo5f6w/bAZz/LB+Pr+SWR4UM34w5qOEZEAi28yT1ZfXnd8CsoRbmMRu6ig3ocZXR0TaBx1UlEPz6fCA/Qwjepox2jjzraaTn0n9VrF5FAK70692gUrrgibmGZpNdDpZ12pg9/DTNvnV8RkQIq3Tr3RCKR+BXDgE4S1zMma1f9o4gEXWkl9/6TmoaoJXFlTbJ2DbiLSNCVTnJfvtwbjklQQdPMLQnr3fvLIuOYqUxGRAJvxORuZveY2S4z2xTTdoSZPW1mb/v3h/vtZmY/MbN3zGyDmc3LZfBpi0ZZvmoWFe4ARt/ArYIelvO/iYx7mCULt1Lur/ZbXg5Ljnp6eIkkwLJl+Y1dRCQD6fTc7wO+MqTtJmCtc+44YK3/GOAc4Dj/1gisyk6YY7P8vx1kFcvppQKvZt279VLBKlaw6MDjrP7j9LirL63ecyHRhXcTl/GvuQZWrizQv0JEJH1pVcuYWT2wxjn3Of/xm8AZzrkdZjYVeNY5N8PMfu5vPzB0v1Svn+tqmQo76Cf2ZBzJTlTSio8iElS5qJaZEpOw3wem+NtHA1tj9tvmtyUKqtHMWs2sdffu3RmGkZ5UV1dKRSs+ikixGvOEqvO6/qMulnfOtTjnGpxzDZMnTx5rGCmVj3B1pWRU8SgixSrT5L7TH47Bv9/lt28HjonZb5rfVjDR5c9TyQGS//5xLOQpqis+iWvVio8iUswyTe6PA0v87SXAYzHtV/pVM/OBvSONt+dSNAqNP5vH/hQX36jgIFct3E7LfYfoSngiEhojTqia2QPAGcAkYCfwPeA/gQeBWqAD+Jpz7m9mZsCdeNU13cBVzrkRZ0pzNaFaX594YcihNHEqIsUo1YTqiEv+OucuS/LUwgT7OmDF6MLLnc6OxFUww/bTxKmIhEz4zlCNRmHSJDCjljS67WjiVETCJ1zJPRqFq6+Gri4g8bICQydWNXEqImEUruTe1AQHDgw8TLQW+zX8dPCxJk5FJKTCtZ57Wdmw5XxTCsC/XUQkU6WznvtoBs9nzsxdHCIiBRau5L54cXr7HXYYbN6c21hERAooXMn9/vvT2+/DD3Mbh4hIgYUrue/bB3gXvK7nPcropZ73iJKsVF9EJJxGPImp2ES5jEbuopvxAHRQTyN3ASS++IaISAiFq+cONPGDgcTer5vxNPED74EmUkWkBIQuuXeSuGKmk1qoqNBEqoiUhPAk92jUX3Ig8UIxtWyF++7Lb0wiIgUSnuTe1ATOJVxyoJpumq/ZqlNRRaRkhCe5+0s7JlpyoOWX1URWnlbgAEVE8ic8yT3m7NQID9DOdPoop73uDHXYRaTkhCe5Nzd7SzzG0pKPIlKiwpPcIxFviUddK09EJGQnMUUiSuYiIoSl5x6NehdMLSvz7qPRQkckIlJQxd9zj0ahsRG6u73HHR3eY1AvXkRKVvH33JuaBhN7v+5ur11EpEQVf3LvTHxGatJ2EZESUPzJPdnVl0ZzVSYRkZAp/uSu+nYRkWGKP7mrvl1EZJjir5YB1beLiAxR/D13EREZRsldRCSElNxFREJoTGPuZtYOfAT0Agedcw1mdgTwG6AeaAe+5pz7cGxhiojIaGSj536mc26uc67Bf3wTsNY5dxyw1n8sIiJ5lIthmQuA1f72auDCHLyHiIikMNbk7oCnzKzNzPzVupjinNvhb78PTEl0oJk1mlmrmbXu3r17jGFIsdFCniK5NdY699Occ9vN7NPA02b2l9gnnXPOzFyiA51zLUALQENDQ8J9JJy0kKdI7o2p5+6c2+7f7wIeBU4FdprZVAD/ftdYg5Rw0UKeIrmXcXI3s/Fmdmj/NvCPwCbgcWCJv9sS4LGxBinhooU8RXJvLMMyU4BHzaz/dX7lnPu9mb0CPGhm3wA6gK+NPUwJk9pabygmUbuIZEfGyd0591dgToL2LmDhWIKScGtujh9zBy3kKZJtOkNV8k4LeYrkXjhWhZSio4U8RXJLPXcRkRBSchcRCaGiHpaZ9dNZbPlgy8DjmZNmsnnF5gJGJCISDEXbcx+a2AG2fLCFWT+dVaCIRESCo2iT+9DEPlK7iEgpKdrkLiIiySm5i4iEkJK7iEgIFW1yL7PEoSdrFxEpJUWbCftc36jaRURKSVEm9+hGXbZHRCSVokzuTWuTX9Wh5lM1eYxERCSYijK5d+5NflWHO865I4+RiIgEU1Em99qJia/qUPOpGiKztdSgiEhRJvfmhc1UV1bHtVVXVqvXLiLiK8rkHpkdoeW8Fuom1mEYdRPraDmvRb12ERFfUSZ3ERFJrSiX/I1ujNL4RCPdPd5FODv2dtD4RCOAeu8iIhRpz71pbdNAYu/X3dOdskRSRKSUFGVyT1YKmapEUkSklBRlck9WCpmsXUSk1BRlck9WCtm8sLlAEYmIBEtRJneVQoqIpGbOuULHQENDg2ttbS10GCIiRcXM2pxzDYmeK8qeu4iIpKbkLiISQkruIiIhpOQuIhJCSu4iIiEUiGoZM9sNdGR4+CTggyyGkw1BjAmCGVcQY4JgxqWY0hfEuHIRU51zbnKiJwKR3MfCzFqTlQIVShBjgmDGFcSYIJhxKab0BTGufMekYRkRkRBSchcRCaEwJPeWQgeQQBBjgmDGFcSYIJhxKab0BTGuvMZU9GPuIiIyXBh67iIiMoSSu4hIGDnnivYGfAV4E3gHuCkHr38PsAvYFNN2BPA08LZ/f7jfbsBP/Fg2APNijlni7/82sCSm/WRgo3/MT/CHyUaI6RhgHbAF2AxcV+i4gCrgZeB1P6bv++3TgT/7r/MbYJzffoj/+B3/+fqY17rZb38TOHusP2ugHHgNWBOgmNr9/9/1QGuhf37+MYcBDwF/Ad4AvhCAmGb4/0f9t78D1wcgrhvwPuebgAfwPv8F/1wNizPTAwt9w/vSvgscC4zDSywzs/weXwLmEZ/cf9T/Hw7cBNzuby8Gfud/wOYDf3aDX9q/+veH+9v9H8aX/X3NP/acNGKa2v+hBQ4F3gJmFjIuf78J/nal/yGeDzwIXOq3/wy4xt9eDvzM374U+I2/PdP/OR7if1ne9X/OGf+sgX8GfsVgcg9CTO3ApCFthf5crQb+q789Di/ZFzSmBN/394G6QsYFHA28B3wq5vO0NAifq2GxZpr4Cn3D61n8IebxzcDNOXifeuKT+5vAVH97KvCmv/1z4LKh+wGXAT+Paf+53zYV+EtMe9x+o4jvMeCsoMQFVAOvAp/HOxuvYujPC/gD8AV/u8Lfz4b+DPv3y/RnDUwD1gJfBtb471HQmPx92xme3Av28wMm4iUsC0pMCWL8R+CFQseFl9y34v2iqPA/V2cH4XM19FbMY+79/8n9tvltuTbFObfD334fmDJCPKnatyVoT5uZ1QMn4fWUCxqXmZWb2Xq8Yayn8Xofe5xzBxO8zsB7+8/vBWoyiHUk/wH8d6DPf1wTgJgAHPCUmbWZWaPfVsif33RgN3Cvmb1mZv/HzMYXOKahLsUbAqGQcTnntgM/BjqBHXifkzaC8bmKU8zJveCc96vVFeK9zTcHvBIAAAKBSURBVGwC8DBwvXPu74WOyznX65ybi9dbPhU4Pp/vP5SZnQvscs61FTKOJE5zzs0DzgFWmNmXYp8swM+vAm/4cZVz7iTgY7zhjkLGNMDMxgHnA/936HP5jsvMDgcuwPuFeBQwHm+MPHCKOblvx5tc7DfNb8u1nWY2FcC/3zVCPKnapyVoH5GZVeIl9qhz7pGgxAXgnNuDN+H7BeAwM6tI8DoD7+0/PxHoyiDWVBYA55tZO/BrvKGZOwocEzDQ+8M5twt4FO+XYSF/ftuAbc65P/uPH8JL9oH4TOH9EnzVObfTf1zIuBYB7znndjvneoBH8D5rBf9cDZPJWE4Qbni9jb/i/Qbtn3iYlYP3qSd+zP1/ET+Z8yN/+78QP5nzst9+BN545uH+7T3gCP+5oZM5i9OIx4BfAP8xpL1gcQGTgcP87U8BzwHn4vW0YieZlvvbK4ifZHrQ355F/CTTX/EmmMb0swbOYHBCtaAx4fX0Do3ZfhGv51foz9VzwAx/+1Y/noLGFBPbr4GrAvJZ/zxepUy1f8xq4FuF/lwljDXTpBeEG97s+Ft447tNOXj9B/DG1XrwejffwBsvW4tXUvVMzIfEgJ/6sWwEGmJe52q8sqZ3hnxIG/DKqd4F7iS9MqzT8P4M3cBgidjiQsYFnIhXbrjBP+5/+O3H+l+ed/wP/yF+e5X/+B3/+WNjXqvJf983ialcGMvPmvjkXtCY/Pd/ncGy0Sa/vdCfq7lAq/8z/E+8JFjQmPzjxuP1dCfGtBX6/+r7eCWjm4D78RJ0ID7rsTctPyAiEkLFPOYuIiJJKLmLiISQkruISAgpuYuIhJCSu4hICCm5i4iEkJK7iEgI/X/Q2izJGaGojwAAAABJRU5ErkJggg==\n","text/plain":["<Figure size 432x288 with 1 Axes>"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":80},"id":"Yw2AtWwuNtwI","executionInfo":{"status":"ok","timestamp":1607101254206,"user_tz":-120,"elapsed":2630,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"d328592a-8f32-4ff6-a006-47f5e5f8d9c5"},"source":["# I can see an clear outlier with a tanker that has <100 length and >20000 gross tonnage\n","\n","data.loc[(data['Length']<100) & (data['Ship_type'] == 'Tanker') & (data['Gross_tonnage']>20000),['Gross_tonnage']]"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Gross_tonnage</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>83</th>\n","      <td>30026</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["    Gross_tonnage\n","83          30026"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"code","metadata":{"id":"h39L-IaISHZM"},"source":["# Lets change 30026 to real value 326\n","\n","data.loc[(data['Length']<100) & (data['Ship_type'] == 'Tanker') & (data['Gross_tonnage']>20000),['Gross_tonnage']] = 326"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":265},"id":"GdUIfF6ZTNYc","executionInfo":{"status":"ok","timestamp":1607101254833,"user_tz":-120,"elapsed":3244,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"4d725b64-582c-4b58-ce9d-25ca08863e01"},"source":["# Lets plot to check what it looks like now\n","\n","plt.plot(data[data['Ship_type']=='Cargo']['Gross_tonnage'], data[data['Ship_type']=='Cargo']['Length'], 'o', color = 'r', label = 'Cargo')\n","plt.plot(data[data['Ship_type']=='Tanker']['Gross_tonnage'], data[data['Ship_type']=='Tanker']['Length'], 'o', color = 'b', label = 'Tanker')\n","plt.plot(data[data['Ship_type']=='Tug']['Gross_tonnage'], data[data['Ship_type']=='Tug']['Length'], 'o', color = 'g', label = 'Tug')\n","plt.legend()\n","plt.show()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de3RU9d3v8fc3CZgGXKiBIopJsI9FoQhitLS4qhZ8rByvddnqGhW0pzkIT708dp2q6XNqlyvtsafPxR4LbTxeqJ3aerw8KrWtysLW26kSRW7WW00CLARMhYpZSEh+54+9k8wkeyaTyUxmz57Pa61Zs+c3e898ITPf/PL7ffdvm3MOERGJlrJCByAiIrmn5C4iEkFK7iIiEaTkLiISQUruIiIRVFHoAAAmTpzo6urqCh2GiEhRaWlp+cA5NynouVAk97q6OtatW1foMEREioqZtaV6TsMyIiIRpOQuIhJBSu4iIhEUijH3IF1dXWzbto39+/cXOpRQqKysZOrUqYwZM6bQoYhIEQhtct+2bRuHHnoodXV1mFmhwyko5xwdHR1s27aNadOmFTocESkCoR2W2b9/P9XV1SWf2AHMjOrqav0VIxIh8TjU1UFZmXcfj+f29UPbcweU2BPo/0IkOuJxaGiAzk7vcVub9xggFsvNe4S25y4iElWNjf2JvVdnp9eeK0ruQ3j//fe59NJL+cxnPsPJJ5/MokWLeOuttwodlogUsfb24bVnIzrJPQ8DWM45LrroIs444wzeffddWlpa+OEPf8jOnTszOranp2fEMYhI9NTUDK89G9FI7r0DWG1t4Fz/ANYIE/zatWsZM2YMS5cu7WubPXs2J510EgsWLGDu3LnMmjWLxx57DIDW1lamT5/OlVdeyec+9zm2bt3KbbfdxvTp0znttNO47LLL+PGPfwzA+vXrmTdvHieeeCIXXXQRH3744YhiFZHi0dQEVVXJbVVVXnuuRCO552kAa9OmTZx88smD2isrK3n00Ud59dVXWbt2LTfeeCO9lyt8++23WbZsGZs3b2bXrl08/PDDvP766/zud79LWj/nyiuv5Pbbb2fDhg3MmjWL73//+yOKVUSKRywGzc1QWwtm3n1zc+4mUyHk1TIZG40BrATOOW655Rb+9Kc/UVZWxvbt2/uGampra5k3bx4AL7zwAhdccAGVlZVUVlZy3nnnAbB371727NnD6aefDsDixYu55JJL8hKriIRTLJbbZD5QNHrueRrAmjlzJi0tLYPa4/E4u3fvpqWlhfXr1zN58uS+GvRx48aN6D1FRHIhGsk9TwNYX/7yl/nkk09obm7ua9uwYQNtbW18+tOfZsyYMaxdu5a2tuBVN+fPn88TTzzB/v372bdvH6tXrwZgwoQJHH744Tz33HMA3H///X29eBGRXIjGsEzv3zaNjd5QTE2Nl9hH+DePmfHoo49y/fXXc/vtt1NZWUldXR233nor1157LbNmzaK+vp7jjz8+8PhTTjmF888/nxNPPJHJkycza9YsJkyYAMCqVatYunQpnZ2dHHvssdx7770jilVEJJH1TgQWUn19vRt4sY433niDE044oUAR5c6+ffsYP348nZ2dfOlLX6K5uZm5c+dm9VpR+T8RkdwwsxbnXH3Qc9HouYdYQ0MDW7ZsYf/+/SxevDjrxC4iMhxK7nn2q1/9qtAhiEgJisaEqogUTL5XN5TsqOcuIlkbjdUNJTvquYtI1kZjdUPJjpK7iGRtlE8Ol2EYMrmb2TFmttbMtpjZZjO7zm+/1cy2m9l6/7Yo4ZibzewdM3vTzM7O5z8gXzo6OpgzZw5z5szhyCOP5Oijj+57fODAgYxe49lnn+Xcc8/Nc6QihTMaqxtKdjLpuR8EbnTOzQDmAcvNbIb/3L875+b4tycB/OcuBWYCXwFWmFl5HmJPkutJnerqatavX8/69etZunQpN9xwQ9/jsWPH5iLktA4ePJj39xAZqaCTwwE6Orzv4MKF3sJYvbfKyuDvZqrvryZrszdkcnfO7XDOvepvfwS8ARyd5pALgF875z5xzr0HvAOcmotgU8nTir+D3HXXXZxyyinMnj2biy++mE5/sHHJkiVce+21fPGLX+TYY4/loYceGnTsK6+8wkknndS3Lvzpp5/OySefzNlnn82OHTsAOOOMM7j++uupr6/njjvuyG3wInkQi8HixYPb9+2Dyy+HNWuS2z/5BK64Ivm7mer7u2zZ6Hyvo2pYY+5mVgecBPzZb/onM9tgZveY2eF+29HA1oTDtpH+l8GIjdakzle/+lVeeeUVXn/9dU444QTuvvvuvud27NjB888/z+rVq7npppuSjnvxxRdZunQpjz32GDU1NXzrW9/ioYceoqWlhauvvprGhEAPHDjAunXruPHGG3MbvEiePPnk8PZ3Lvm7mer729ysydqRyLgU0szGAw8D1zvn/m5mK4HbAOff/ytw9TBerwFoAKgZ4QDdaE3qbNq0ie9+97vs2bOHffv2cfbZ/dMJF154IWVlZcyYMSPpSk1vvPEGDQ0NPPXUUxx11FFs2rSJTZs2cdZZZwHQ3d3NlClT+vb/+te/ntugRfIsm+9Z4jGpju/uzt37laKMeu5mNgYvscedc48AOOd2Oue6nXM9wF30D71sB45JOHyq35bEOdfsnKt3ztVPmjRpJP+GUZvUWbJkCXfeeScbN27ke9/7Xt8yvwCHHHJI33biej1TpkyhsrKS1157re+5mTNn9o3fb9y4kaeeeqpvfy0ZLIWUzRh3Nt+zxGNSHV+eYqZOk7WZyaRaxoC7gTecc/+W0D4lYbeLgE3+9uPApWZ2iJlNA44DXs5dyIONxiWrAD766COmTJlCV1cX8QwH/g477DB++9vfcvPNN/Pss88yffp0du/ezUsvvQRAV1cXmzdvzm2gIlnIdu6qqQnGjMn8fcySv5upvr8NDaPzvY6qTHru84ErgC8PKHv8kZltNLMNwJnADQDOuc3Ag8AW4PfAcudcij+wcmM0LlkFcNttt/H5z3+e+fPnp1zmN8jkyZNZvXo1y5cv57XXXuOhhx7iO9/5DrNnz2bOnDm8+OKLuQ1UJAvZzl3FYnDvvVBdPfR7HHII3H9/8ncz1fd3xYrR+V5HlZb8LSL6P5F8KivzeuwDmUFPT+avM3BJAvB63ErMuZduyV+doSoiQO7mrkbrL2lJT8ldRIDczl3FYtDa6vX4W1uV2AtByV1EAPW4o0ZL/opIn1hMyTwq1HMXEYkgJXcRkQjSsEwKHR0dLFiwAID333+f8vJyes+kffnll0dlZUgRkWxFJrnHN8ZpXNNI+952aibU0LSgidis7AcPe5f8Bbj11lsZP3483/72t3MVrohIXkViWCa+MU7DEw207W3D4Wjb20bDEw3EN+Z2bdAlS5YkLec7fvx4AHp6eli2bBnHH388Z511FosWLQpc9lciLGhRFi1GLgUUiZ5745pGOruSz5vu7OqkcU3jiHrvmXrkkUdobW1ly5Yt7Nq1ixNOOIGrr854gUwpdkFXib76au90z66u/jZdOVpGUSR67u17g9cATdWea88//zyXXHIJZWVlHHnkkZx55pmj8r4SEkGLshw40J/Ye2kxchlFkUjuNROCz49O1Z6tiooKevxFNnp6ejK+lqpE3HAWGNdi5DJKIpHcmxY0UTUm+bzpqjFVNC3I7dqgdXV1tLS0APD444/T5ffM5s+fz8MPP0xPTw87d+7k2Wefzen7SsgNZ/EVLUYuoyQSyT02K0bzec3UTqjFMGon1NJ8XnPOx9u/+c1v8sc//pHZs2fz0ksv9V1Y4+KLL2bq1KnMmDGDyy+/nLlz5zJhwoScvreEWNCiLGPHDl7kXIuRyyjSkr85sm/fPsaPH09HRwennnoqL7zwAkceeWRO36PY/k9KSjzujae3t3u9894kPrBNk6mSQ+mW/I1EtUwYnHvuuezZs4cDBw7wL//yLzlP7BJyqRZlUTKXAlFyzxGNs4tImIR6zD0MQ0Zhof8LERmO0Cb3yspKOjo6lNTwEntHRweVlZWFDkVEikRoh2WmTp3Ktm3b2L17d6FDCYXKykqmTp1a6DBEpEiENrmPGTOGadOmFToMEZGiFNphGRERyZ6Su4hIBCm5i4hEkJK7iEgEKbmLiESQkruISAQpuYuIRJCSu4hIBCm5i4hE0JDJ3cyOMbO1ZrbFzDab2XV++xFm9rSZve3fH+63m5n9xMzeMbMNZjY33/8IkUHicairg7Iy7z4eL3REIqMqk577QeBG59wMYB6w3MxmADcBa5xzxwFr/McA5wDH+bcGYGXOoxZJJx6HhgZoawPnvPuGBiV4KSlDJnfn3A7n3Kv+9kfAG8DRwAXAKn+3VcCF/vYFwC+c5/8Bh5nZlJxHLpJKYyN0dia3dXZ67SIlYlhj7mZWB5wE/BmY7Jzb4T/1PjDZ3z4a2Jpw2Da/beBrNZjZOjNbp5UfS1vOR1Da24fXLhJBGSd3MxsPPAxc75z7e+Jzzlt0fVgLrzvnmp1z9c65+kmTJg3nUImQeByWLEkeQVmyZIQJvqZmeO0iEZRRcjezMXiJPe6ce8Rv3tk73OLf7/LbtwPHJBw+1W8TGWTpUjh4MLnt4EGvPWtNTVBVldxWVdV/0WqREpBJtYwBdwNvOOf+LeGpx4HF/vZi4LGE9iv9qpl5wN6E4RspNnmuOtm3b3jtGYnFoLkZamvBzLtvbtbFqqWk2FCXsTOz04DngI1Aj998C964+4NADdAGfM059zf/l8GdwFeATuAq59y6dO9RX1/v1q1Lu4sUQm/VSeLkZFVVThOlWerndIVFkfTMrMU5Vx/4XBiuUarkHlJ1dd4g+EC1tdDampO3KCsLTuJm0NMzuF1E+qVL7jpDVVIbhaqTVGPrIxpzFxEld0ljFKpOVqyAa66B8nLvcXm593jFipy9hUhJUnKX1Eap6mTFCq9CxjnvXoldZOSU3GWQvgKZK2JMtA4mlnVQRjd15VuJL/6Dqk5EioCSuySJx+Gqq/pPKur4uJKOniNwlNHWPZWGVacNWQ2ZrnoyHoeJE70JUzNvW0u+iOSeqmUkycSJ0NGRfp90xTLpqifB+8XR1ZV8zNixcM89+oNAZLhUCikZS1d3nrhPqjLFVNWT1dWwZw90dwcfl8PqSpGSoVLIqCrQmuXpimVSVUl2dKRO7OmOE5HsKLkXqzytWV5dnf75oYplsq2S1JpeIrml5F6s8rRm+R13eGPgQTJZoiWoenIoY8dqTS+RXFNyL1Z5Ons0FvMmNxPX3PrlL70/Dlpbh570DFqzK91fA9XVmkwVyQdNqBarUVj3JVdGYf0xkZKkCdUoKqI1y7UCr8joqyh0AJKl3szY2OgNxdTUeIk9pBkzFgttaCKRpJ57kQiseozFiDe1UlfTQ1l7K3WNMZ3tKSKAkntRSFX1uGzhX2i4ojPX1ZC5V6B6fJFSpgnVIpBq7rScg3QHjKyFak5Vs6kieaMJ1WLl93jb24LP9e+mPLA9VGd75qkeX0TSU3IPq4SxmBqCs3U5wefzh+psz1G4mpOIDKbkHlYJPd4mbqGKj5OerrJOGvhZYHuoqiFH4WpOIjKYkntYJfRsYzxAM9+kllaMHq9OfOmrrKj6TnK7tdO89NVwDWUXUT2+SJQouYfVgJ5tjAdoZRo9VkFrU5zYitOguZlY7Yu02rH01B5L09J2Gp88LVxFKTqDSaQgVC0TVvE4XHGFV+M4UEA5jIpSREqPqmUKbWCd97JlQ9d9x2LBiR0CJyNVlCIiibT8QL4N7FK3tcHKlf3P9555BIO72LW1wQXuAZORKkoRkUTquedbUJd6oFRd7GFMRqooRUQSKbnnW6Zd56D9hjEZqaIUEUmk5J5vmXadU+0Xi3mTpz09aa+WoaIUEUmk5J4vvZOobW1etk0nR13sDH8PiEgJGDK5m9k9ZrbLzDYltN1qZtvNbL1/W5Tw3M1m9o6ZvWlmZ+cr8ELIeHHDxGUcwat66U3wtbVwzTXqYotIXmVSLXMfcCfwiwHt/+6c+3Fig5nNAC4FZgJHAc+Y2Wedc8GLoBSRoKKXVEUugZOozoVsuUYRibIhe+7OuT8Bf8vw9S4Afu2c+8Q59x7wDnDqCOILjWHVkasuUUQKbCRj7v9kZhv8YZvD/bajga0J+2zz2wYxswYzW2dm63bv3j2CMEbHsPK16hJFpMCyTe4rgc8Ac4AdwL8O9wWcc83OuXrnXP2kSZOyDGP0DCtfp6hLXDjuBczouy1cmPMwRUSALJO7c26nc67bOdcD3EX/0Mt24JiEXaf6bUVvWHXkAXWJC+veYs2W5D9i1qxRgheR/MgquZvZlISHFwG9lTSPA5ea2SFmNg04Dnh5ZCGGw7DryAfUJQ5M7L3WrCn8wm0iEj2ZlEI+ALwETDezbWb2DeBHZrbRzDYAZwI3ADjnNgMPAluA3wPLi6FSJtMSx6A68sRjDz3UuzeDigpvfbCMAxARyaGSX/I3HoclS+Dgwf62igq4776hS8+Dltkd6JprYMWKdOcxOVztNJVIisiwpVvyt+ST+6GHwr59g9vHj4ePPkp/bO8JqOmUl3u/OBYu7B2CSczyjgU8xTN2jvfngIjIMGg99zSCEnu69j7xOO1tQyfkbn9Q6plnYMHY5wDXd1vAUzzDV1QiKSI5V/LJPSvxOFx1FTUMfVJSeXn/9jP3bMVVjcdRhqPMS+xaulFE8qDkk3uqsXCjJ/Us63XXQVcXi1jt7ZdG7xIFgJZuFJFRU/LJfenSoFbH0rK7vAF15/oXkulN8B0dxLmMVVyFS/ov7J+/KC/vn0xNoqUbRWQUlPyEKngli83N3vh4eTk0VK5ixcdLBu/Yu/CXGXW8Rxt1KXcREck3TagOYcUKr6LFOe9+RedVgfvF2+Z7IzV000Zt4D5aG0xEwqCkk3vKk5cCqlfiXEaD3eWN1FBGckljPxW+iEgYlGxyT7yexqBh9YCFZBrtf9LpqoJfzKfCFxEJi5JN7mnXZw+oaml3xwS+DqjwRUTCp2ST+5Drsw+oaqmpDR6Gqa3NoPAl4+vziYjkRskm93Trsy9b5q0vk7gA2LCW/E2UdvxHRCQ/Sja5L1o0+ASmqir4h3HbWbnS9S0b0N0NK1fCCy9kef7RsK7PJyKSG5lcIDty4nFYtcrrSPcyg8Vf+AvNa/6BoEqYlSvhySe9nvqwxtV1PVURKYCS7LkHdaadgyefHU835cEHkeWIiq6nKiIFUJLJPVWnua37aMqGWCumsxMuv3wY86JZD9aLiGSvJJN76k6zAd0krhGTSsa9eC0WJiIFUJJrywx1BaWxdHKAT5HqLNREWktGRAqldNeWSVFf3tuZTiXTxA6aFxWRcIpuch+ivjwWg9rqoS63NDTNi4pIGEU3uaerL/d79E0dDVTxcdIuVXxMNR9k9BaaFxWRsIpunXvKkpi2vgH3GN7VrRv5Ae3UUEM7TdwCwFXcQxeVad9C86IiElbRTe41NV4iH6i8PKlHH+MBYjwweJ8zFnLdmvPpoJqg8ffaWiV2EQmv6A7LpKov711XYIA4l1HHe5TRTd1he+Czn+WDcXX8ktjgoZuxBzUcIyKhFt3knqq+vHbwFZTiXEYDd9FGHY4y2jrG07DyJOIfn0+MB2jmm9TSitFDLa00H/rP6rWLSKiVXp17PA5XXJG0sEzK66HSSivTBr+GmbfOr4hIAZVunXuQWCx5xTCgneB6xlTtqn8UkbArreTee1LTADUEV9akateAu4iEXekk92XLvOGYgAqaJm4JrHfvLYtMYqYyGREJvSGTu5ndY2a7zGxTQtsRZva0mb3t3x/ut5uZ/cTM3jGzDWY2N5/BZyweZ9nKmVS4Axg9fbcKuljG/yY29mEWL9hKub/ab3k5LD7q6cElkgBLl45u7CIiWcik534f8JUBbTcBa5xzxwFr/McA5wDH+bcGYGVuwhyZZf/tICtZRjcVeDXr3q2bClaynIUHHmfVH6clXX1p1Z4LiS+4m6SMf801sGJFgf4VIiKZy6haxszqgNXOuc/5j98EznDO7TCzKcCzzrnpZvZzf/uBgfule/18V8tU2EE/safiSHWiklZ8FJGwyke1zOSEhP0+MNnfPhrYmrDfNr8tKKgGM1tnZut2796dZRiZSXd1pXS04qOIFKsRT6g6r+s/7GJ551yzc67eOVc/adKkkYaRVvkQV1dKRRWPIlKssk3uO/3hGPz7XX77duCYhP2m+m0FE1/2PGM4QOrfP44FPEVVxSdJrVrxUUSKWbbJ/XFgsb+9GHgsof1Kv2pmHrB3qPH2fIrHoeFnc9mf5uIbFRzkqgXbab7vEF0JT0QiY8gJVTN7ADgDmAjsBL4H/CfwIFADtAFfc879zcwMuBOvuqYTuMo5N+RMab4mVOvqgheGHEgTpyJSjNJNqA655K9z7rIUTy0I2NcBy4cXXv60twVXwQzaTxOnIhIx0TtDNR6HiRPBjBoy6LajiVMRiZ5oJfd4HK6+Gjo6gOBlBQZOrGriVESiKFrJvbERDhzoexi0Fvs1/LT/sSZORSSiorWee1nZoOV80wrBv11EJFuls577cAbPZ8zIXxwiIgUWreS+aFFm+x12GGzenN9YREQKKFrJ/f77M9vvww/zG4eISIFFK7nv2wd4F7yu4z3K6KaO94iTqlRfRCSahjyJqdjEuYwG7qKTcQC0UUcDdwEEX3xDRCSCotVzBxr5QV9i79XJOBr5gfdAE6kiUgIil9zbCa6YaacGKio0kSoiJSE6yT0e95ccCF4opoatcN99oxuTiEiBRCe5NzaCc4FLDlTRSdM1W3UqqoiUjOgkd39px6AlB5p/WUVsxWkFDlBEZPREJ7knnJ0a4wFamUYP5bTWnqEOu4iUnOgk96Ymb4nHRFryUURKVHSSeyzmLfGoa+WJiETsJKZYTMlcRISo9Nzjce+CqWVl3n08XuiIREQKqvh77vE4NDRAZ6f3uK3NewzqxYtIySr+nntjY39i79XZ6bWLiJSo4k/u7cFnpKZsFxEpAcWf3FNdfWk4V2USEYmY4k/uqm8XERmk+JO76ttFRAYp/moZUH27iMgAxd9zFxGRQZTcRUQiSMldRCSCRjTmbmatwEdAN3DQOVdvZkcAvwHqgFbga865D0cWpoiIDEcueu5nOufmOOfq/cc3AWucc8cBa/zHIiIyivIxLHMBsMrfXgVcmIf3EBGRNEaa3B3wlJm1mJm/WheTnXM7/O33gclBB5pZg5mtM7N1u3fvzurNtRikiEiwkda5n+ac225mnwaeNrO/JD7pnHNm5oIOdM41A80A9fX1gfuko8UgRURSG1HP3Tm33b/fBTwKnArsNLMpAP79rpEGGUSLQYqIpJZ1cjezcWZ2aO828I/AJuBxYLG/22LgsZEGGUSLQYqIpDaSYZnJwKNm1vs6v3LO/d7MXgEeNLNvAG3A10Ye5mA1Nd5QTFC7iEipyzq5O+f+CswOaO8AFowkqEw0NSWPuYMWgxQR6VW0Z6hqMUgRkdSKelVILQYpIhKsaHvuIiKSmpK7iEgEFfWwzMyfzmTLB1v6Hs+YOIPNyzcXMCIRkXAo2p77wMQOsOWDLcz86cwCRSQiEh5Fm9wHJvah2kVESknRJncREUlNyV1EJIKU3EVEIqhok3uZBYeeql1EpJQUbSbscT3DahcRKSVFmdzjG3XJJRGRdIoyuTeuSX1FjupPVY9iJCIi4VSUyb19b+orctxxzh2jGImISDgVZXKvmRB8RY7qT1UTm6VlIkVEijK5Ny1oompMVVJb1Zgq9dpFRHxFmdxjs2I0n9dM7YRaDKN2Qi3N5zWr1y4i4ivK5C4iIukV5ZK/8Y1xGp5ooLPLu4Bq2942Gp5oAFDvXUSEIu25N65p7EvsvTq7OtOWSIqIlJKiTO6pSiHTlUiKiJSSokzuqUohU7WLiJSaokzuqUohmxY0FSgiEZFwKcrkrlJIEZH0zDlX6Bior69369atK3QYIiJFxcxanHP1Qc8VZc9dRETSU3IXEYkgJXcRkQhSchcRiSAldxGRCApFtYyZ7Qbasjx8IvBBDsPJhTDGBOGMK4wxQTjjUkyZC2Nc+Yip1jk3KeiJUCT3kTCzdalKgQoljDFBOOMKY0wQzrgUU+bCGNdox6RhGRGRCFJyFxGJoCgk9+ZCBxAgjDFBOOMKY0wQzrgUU+bCGNeoxlT0Y+4iIjJYFHruIiIygJK7iEgUOeeK9gZ8BXgTeAe4KQ+vfw+wC9iU0HYE8DTwtn9/uN9uwE/8WDYAcxOOWezv/zawOKH9ZGCjf8xP8IfJhojpGGAtsAXYDFxX6LiASuBl4HU/pu/77dOAP/uv8xtgrN9+iP/4Hf/5uoTXutlvfxM4e6Q/a6AceA1YHaKYWv3/3/XAukL//PxjDgMeAv4CvAF8IQQxTff/j3pvfweuD0FcN+B9zjcBD+B9/gv+uRoUZ7YHFvqG96V9FzgWGIuXWGbk+D2+BMwlObn/qPc/HLgJuN3fXgT8zv+AzQP+7Pq/tH/17w/3t3s/jC/7+5p/7DkZxDSl90MLHAq8BcwoZFz+fuP97TH+h3ge8CBwqd/+M+Aaf3sZ8DN/+1LgN/72DP/neIj/ZXnX/zln/bMG/hn4Ff3JPQwxtQITB7QV+nO1Cviv/vZYvGRf0JgCvu/vA7WFjAs4GngP+FTC52lJGD5Xg2LNNvEV+obXs/hDwuObgZvz8D51JCf3N4Ep/vYU4E1/++fAZQP3Ay4Dfp7Q/nO/bQrwl4T2pP2GEd9jwFlhiQuoAl4FPo93Nl7FwJ8X8AfgC/52hb+fDfwZ9u6X7c8amAqsAb4MrPbfo6Ax+fu2Mji5F+znB0zAS1gWlpgCYvxH4IVCx4WX3Lfi/aKo8D9XZ4fhczXwVsxj7r3/yb22+W35Ntk5t8Pffh+YPEQ86dq3BbRnzMzqgJPwesoFjcvMys1sPd4w1tN4vY89zrmDAa/T997+83uB6ixiHcp/AP8d6PEfV4cgJgAHPGVmLWbW4LcV8uc3DdgN3Gtmr5nZ/zGzcQWOaaBL8YZAKGRczrntwI+BdmAH3uekhXB8rpIUc3IvOOf9anWFeG8zG3xelE8AAAKASURBVA88DFzvnPt7oeNyznU75+bg9ZZPBY4fzfcfyMzOBXY551oKGUcKpznn5gLnAMvN7EuJTxbg51eBN/y40jl3EvAx3nBHIWPqY2ZjgfOB/zvwudGOy8wOBy7A+4V4FDAOb4w8dIo5uW/Hm1zsNdVvy7edZjYFwL/fNUQ86dqnBrQPyczG4CX2uHPukbDEBeCc24M34fsF4DAzqwh4nb739p+fAHRkEWs684HzzawV+DXe0MwdBY4J6Ov94ZzbBTyK98uwkD+/bcA259yf/ccP4SX7UHym8H4Jvuqc2+k/LmRcC4H3nHO7nXNdwCN4n7WCf64GyWYsJww3vN7GX/F+g/ZOPMzMw/vUkTzm/r9Insz5kb/9X0iezHnZbz8CbzzzcP/2HnCE/9zAyZxFGcRjwC+A/xjQXrC4gEnAYf72p4DngHPxelqJk0zL/O3lJE8yPehvzyR5kumveBNMI/pZA2fQP6Fa0JjwenqHJmy/iNfzK/Tn6jlgur99qx9PQWNKiO3XwFUh+ax/Hq9Spso/ZhXwrUJ/rgJjzTbpheGGNzv+Ft74bmMeXv8BvHG1LrzezTfwxsvW4JVUPZPwITHgp34sG4H6hNe5Gq+s6Z0BH9J6vHKqd4E7yawM6zS8P0M30F8itqiQcQEn4pUbbvCP+x9++7H+l+cd/8N/iN9e6T9+x3/+2ITXavTf900SKhdG8rMmObkXNCb//V+nv2y00W8v9OdqDrDO/xn+J14SLGhM/nHj8Hq6ExLaCv1/9X28ktFNwP14CToUn/XEm5YfEBGJoGIecxcRkRSU3EVEIkjJXUQkgpTcRUQiSMldRCSClNxFRCJIyV1EJIL+PzWpLMmcz6yqAAAAAElFTkSuQmCC\n","text/plain":["<Figure size 432x288 with 1 Axes>"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"aLYa3EoeWZTL"},"source":["**It is good to exploit domain knowledge and make some reasonable transformation to the feature values to improve the expected results and/or to avoid redundancy. Find out what gross tonnage means. Make some transformation to Length values to acquire a linear relationship between the transformed length and Gross tonnage values 1p**"]},{"cell_type":"markdown","metadata":{"id":"MFgXUPTkYkzr"},"source":["Gross tonnage is the ships volume times a multplier based on the volume. To be exact:\n","\n","\n","Gross_tonnage = V * (0.2 + 0.02 * log10(V))\n","\n","\n","Volume is height * width * depth (m^3). So we can get linear relationship with the length doing length^3. So:\n","\n","length^3*log10(length^3) ~ gross_tonnage"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":276},"id":"dly3Vdt2WgO6","executionInfo":{"status":"ok","timestamp":1607101254834,"user_tz":-120,"elapsed":3237,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"2728e89a-7272-43ec-89f7-fe88b73c242f"},"source":["# Lets check if we get linear looking data\n","\n","plt.plot(data[data['Ship_type']=='Cargo']['Gross_tonnage'], (data[data['Ship_type']=='Cargo']['Length']**3)*(np.log10(data[data['Ship_type']=='Cargo']['Length']**3)), 'o', color = 'r', label = 'Cargo')\n","plt.plot(data[data['Ship_type']=='Tanker']['Gross_tonnage'], (data[data['Ship_type']=='Tanker']['Length']**3)*(np.log10(data[data['Ship_type']=='Tanker']['Length']**3)), 'o', color = 'b', label = 'Tanker')\n","plt.plot(data[data['Ship_type']=='Tug']['Gross_tonnage'], (data[data['Ship_type']=='Tug']['Length']**3)*(np.log10(data[data['Ship_type']=='Tug']['Length']**3)), 'o', color = 'g', label = 'Tug')\n","plt.legend()\n","plt.show()"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXQAAAEDCAYAAAAlRP8qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de3Scdb3v8fc3adoQwi6YViiUJMXDBoqlUMJF4QjaItCNICLHdg3SApJTiljceLZgvCCsbkU9e1k9FA3KRRiKWAQq4hHpBkFAbAIFWxCokJSwgYZI0ZpTmzbf88fzpJlJZjKT5Mnc8nmtNSvz/J7bl2T6nR+/53cxd0dERIpfWb4DEBGRaCihi4iUCCV0EZESoYQuIlIilNBFREqEErqISInIa0I3s5vMbIuZbcji2Foze9jMnjGz58xsfi5iFBEpFvmuod8CnJblsV8G7nL3o4AFwMqxCkpEpBjlNaG7+6PAXxLLzOx9ZvZ/zazVzB4zs0P7Dgf+KXw/GfivHIYqIlLwJuQ7gBSagSXu/rKZHUdQE/8IcDXwoJldBuwJzMtfiCIihaegErqZVQMfBH5mZn3Fk8KfC4Fb3P1/m9kHgNvM7P3u3puHUEVECk5BJXSCJqCt7n5kin0XEba3u/uTZlYJTAG25DA+EZGCle+Hoknc/a/Aq2Z2LoAFZoe7NwNzw/LDgEqgMy+BiogUIMvnbItmtgo4maCm/RbwNeA/gRuAaUAFcKe7X2NmM4EbgWqCB6T/5u4P5iNuEZFClNeELiIi0SmoJhcRERm5vD0UnTJlitfX1+fr9iIiRam1tfVtd5+aal/eEnp9fT0tLS35ur2ISFEys/Z0+9TkIiJSIpTQRURKhBK6iEiJKKiRoj09PXR0dLB9+/Z8h1IQKisrmT59OhUVFfkORUSKQEEl9I6ODvbaay/q6+tJmMtlXHJ3urq66OjoYMaMGfkOR0SKQEE1uWzfvp2amppxn8wBzIyamhr934pICYnHob4eysqCn/F4tNcvqBo6oGSeQL8LkdIRj0NjI3R3B9vt7cE2QCwWzT0KqoYuIlKqmpr6k3mf7u6gPCpK6Cm8+eabLFiwgPe9730cffTRzJ8/n5deeinfYYlIEdu8eXjlI1HcCX0MGqTcnbPPPpuTTz6ZP//5z7S2tvKNb3yDt956K6tze3u13oaIDFZbO7zykSjehN7XINXeDu79DVKjTOoPP/wwFRUVLFmyZHfZ7NmzOeqoo5g7dy5z5sxh1qxZ3HfffQC0tbVxyCGHcP755/P+97+f1157jWuvvZZDDjmEE088kYULF/Kd73wHgPXr13P88cdzxBFHcPbZZ/POO++MKlYRKR7Ll0NVVXJZVVVQHpXiTehj1CC1YcMGjj766EHllZWV3HPPPTz99NM8/PDDXHHFFfRNPfzyyy+zdOlSNm7cyJYtW7j77rt59tln+dWvfpU0X83555/Pddddx3PPPcesWbP4+te/PqpYRaR4xGLQ3Ax1dWAW/Gxuju6BKBRgL5es5aJBKoG786UvfYlHH32UsrIyXn/99d3NMHV1dRx//PEAPP7445x11llUVlZSWVnJxz72MQDeffddtm7dykknnQTAokWLOPfcc8ckVhEpTLFYtAl8oOKtoY9Rg9Thhx9Oa2vroPJ4PE5nZyetra2sX7+efffdd3cf8T333HNU9xQRiULxJvQxapD6yEc+wj/+8Q+am5t3lz333HO0t7fz3ve+l4qKCh5++GHa21PPYHnCCSfwi1/8gu3bt7Nt2zbuv/9+ACZPnsw+++zDY489BsBtt922u7YuIhKF4m1y6fv/lqamoJmltjZI5qP8/xkz45577uHyyy/nuuuuo7Kykvr6eq6++mo+97nPMWvWLBoaGjj00ENTnn/MMcdw5plncsQRR7Dvvvsya9YsJk+eDMCtt97KkiVL6O7u5qCDDuLmm28eVawiIonytqZoQ0ODD1zg4oUXXuCwww7LSzxR2rZtG9XV1XR3d/OhD32I5uZm5syZM6JrlcrvRESiYWat7t6Qal/x1tALWGNjI88//zzbt29n0aJFI07mIiLDoYQ+Bu644458hyAi41DGh6JmdpOZbTGzDRmOO8bMdprZJ6MLT0REspVNL5dbgNOGOsDMyoHrgAcjiElEREYgY0J390eBv2Q47DLgbmBLFEGJiMjwjbofupkdAJwN3JDFsY1m1mJmLZ2dnaO9tYiIJIhiYNF3gS+6e8ZpBt292d0b3L1h6tSpEdw6Wl1dXRx55JEceeSR7LfffhxwwAG7t3fs2JHVNR555BHOOOOMMY5URGSwKBJ6A3CnmbUBnwRWmtnHI7huRlHPnltTU8P69etZv349S5Ys4fOf//zu7YkTJ0YR8pB27tw55vcQkdI16oTu7jPcvd7d64HVwFJ3v3fUkWUwRrPnDnLjjTdyzDHHMHv2bM455xy6wxkeFy9ezOc+9zk++MEPctBBB7F69epB565bt46jjjpq97zqJ510EkcffTSnnnoqb7zxBgAnn3wyl19+OQ0NDaxYsSLa4EVkXMmm2+Iq4EngEDPrMLOLzGyJmS3JdO5YysVyTgCf+MQnWLduHc8++yyHHXYYP/7xj3fve+ONN/jd737H/fffz5VXXpl03hNPPMGSJUu47777qK2t5bLLLmP16tW0trZy4YUX0pQQ6I4dO2hpaeGKK66INngRGVcyDixy94XZXszdF48qmmHI1ey5GzZs4Mtf/jJbt25l27ZtnHrqqbv3ffzjH6esrIyZM2cmrWj0wgsv0NjYyIMPPsj+++/Phg0b2LBhA6eccgoAu3btYtq0abuP/9SnPhVt0CIyLhXtSNHa2qCZJVV5lBYvXsy9997L7NmzueWWW3jkkUd275s0adLu94lz4kybNo3t27fzzDPPsP/+++PuHH744Tz55JMp76Hpd0UkCkU7fW4ulnMC+Nvf/sa0adPo6ekhnmUD/d57780vf/lLrrrqKh555BEOOeQQOjs7dyf0np4eNm7cGG2gIjLuFW1Cz8VyTgDXXnstxx13HCeccELaKXNT2Xfffbn//vu59NJLeeaZZ1i9ejVf/OIXmT17NkceeSRPPPFEtIGKyLin6XMLnH4nIpJoqOlzi7aGLiIiyZTQRURKhBK6iEiJUEIXESkRSugiIiVCCV1EpEQU7UjRsdDV1cXcuXMBePPNNykvL6dvmt8//OEPOZlxUURkpIo6ocf/GKdpbROb391M7eRals9dTmzWyEcW9U2fC3D11VdTXV3NF77whajCFREZU0Xb5BL/Y5zGXzTS/m47jtP+bjuNv2gk/sdo589dvHhx0tS41dXVAPT29rJ06VIOPfRQTjnlFObPn59yCl0RkVwp2oTetLaJ7p7k+XO7e7ppWhvx/Llp/PznP6etrY3nn3+e2267Le3EWyIiuVK0CX3zu6nnyU1XHrXf/e53nHvuuZSVlbHffvvx4Q9/OCf3FRFJp2gTeu3k1PPkpisfqQkTJtDbGyyX2tvbm/XaoiIiuVa0CX353OVUVSTPn1tVUcXyudHOn1tfX09raysAa9asoaenB4ATTjiBu+++m97eXt56662kedJFSl3U6/lKNLJZgu4mM9tiZhvS7I+Z2XNm9kcze8LMZkcf5mCxWTGaP9ZM3eQ6DKNuch3NH2seVS+XVC6++GJ++9vfMnv2bJ588sndi1Gcc845TJ8+nZkzZ3LeeecxZ84cJk+eHOm9RQpRrtbzleHLOH2umX0I2Ab8xN3fn2L/B4EX3P0dMzsduNrdj8t041KYPnfbtm1UV1fT1dXFsccey+OPP85+++0X6T2K7Xcipa++PvVqYXV10NaW62jGn6Gmz81mTdFHzax+iP2JKzX8Hpg+3ACL1RlnnMHWrVvZsWMHX/nKVyJP5iKFKFfr+crwRT2w6CLgV+l2mlkj0AhQG/Xin3mgdnMZj3K1nq8MX2QPRc3swwQJ/YvpjnH3ZndvcPeGviH1KY6JKqSip9+FFKJcrecrwxdJQjezI4AfAWe5e9dIr1NZWUlXV5cSGUEy7+rqorKyMt+hiCTJ1Xq+MnyjbnIxs1rg58Cn3f2l0Vxr+vTpdHR00NnZOdqwSkJlZSXTp4+bRxJSRGIxJfBClDGhm9kq4GRgipl1AF8DKgDc/QfAV4EaYKWZAexM9wQ2k4qKCmbMmDGSU0VExr1serkszLD/M8BnIotIRERGpGhHioqISDIldBEZkXgcpkwJHoyaBe/jcZg3r7/MDCorU48iTTd9gKYVGLmMI0XHSqqRoiJSHOJxuOACCKc2ysgMbrut/0Fq3/QB3QkzYFdVwaJFcOutg8vVi6bfUCNFldBFZNjSDf8fSuLUAOnOLy+HXbuGPne8Gyqhq8lFRIZtJMP8E89Jd36qZD7S+41HSugiMmwjGeafeE6688vLo7vfeKSELiLDtnw5VFRkf7xZ8tQA6aYPaGzUtAKjoYQuIsMWi8HNN0NNTX9ZdXXydp9Jk5IfiPadn2r6gJUrNa3AaOihqIiMWrpeK0rG0dNDUREZU01Nyckcgu2mpvzEM14poYvIqGnRi8KghC4io5auF4p6p+SWErrIOBfFUHstelEYlNBFxrG+h5nt7eAe/GxsHH5S16IXhUG9XETGsXRD8DXUvnCpl4uIpKSHmaVFCV1kHNPDzNKSMaGb2U1mtsXMNqTZb2b2PTPbZGbPmdmc6MMUkbGgh5mlJZsa+i3AaUPsPx04OHw1AjeMPiwRyQU9zCwt2awp+qiZ1Q9xyFnATzx4uvp7M9vbzKa5+xsRxSgiYygWUwIvFVG0oR8AvJaw3RGWiYhIDuX0oaiZNZpZi5m1dHZ25vLWIiIlL4qE/jpwYML29LBsEHdvdvcGd2+YOnVqBLcWEZE+UST0NcD5YW+X44F31X4uIpJ7GR+Kmtkq4GRgipl1AF8DKgDc/QfAA8B8YBPQDVwwVsGKiEh62fRyWZhhvwOXRhaRiIiMiEaKioiUCCV0EZESoYQuIlIilNBFREqEErqISIlQQhcRKRFK6CIiJUIJXUSkRCihi4iUCCV0EZESoYQuIlIilNBFRioeh/p6KCsLfsbjqctEciTj5FwikkI8Do2N0N0dbLe3w4UXgjv09PSXNTYG77XGm+SAaugiI9HU1J/M++zY0Z/M+3R3B8eK5IASushIbN48NseKjIISushI1NaOzbEio6CELjISy5dDVVVy2cSJUFGRXFZVFRwrkgNK6CIjEYtBczPU1YFZ8POmm+Dmm5PLmpv1QFRyxoIV5DIcZHYasAIoB37k7t8csL8WuBXYOzzmSnd/YKhrNjQ0eEtLy0jjFhEZl8ys1d0bUu3LWEM3s3LgeuB0YCaw0MxmDjjsy8Bd7n4UsABYObqQRURkuLJpcjkW2OTur7j7DuBO4KwBxzjwT+H7ycB/RReiiIhkI5uEfgDwWsJ2R1iW6GrgPDPrAB4ALkt1ITNrNLMWM2vp7OwcQbgiIpJOVA9FFwK3uPt0YD5wm5kNura7N7t7g7s3TJ06NaJbi4gIZJfQXwcOTNieHpYlugi4C8DdnwQqgSlRBCgiItnJJqGvAw42sxlmNpHgoeeaAcdsBuYCmNlhBAldbSoiIjmUMaG7+07gs8CvgRcIerNsNLNrzOzM8LArgIvN7FlgFbDYs+kPKSIikclqtsWwT/kDA8q+mvD+eeCEaEMTEZHh0EhREZESoYQuIlIilNBFREqEErqISIlQQhcRKRFK6CIiJUIJXUSkRCihi4iUCCV0EZESoYQuIlIilNBFREqEErqISIlQQhcRKRFK6CIiJUIJXUSkRCihi4iUCCV0EZESkVVCN7PTzOxFM9tkZlemOeZ/mNnzZrbRzO6INkwREckk4xJ0ZlYOXA+cAnQA68xsTbjsXN8xBwNXASe4+ztm9t6xClhERFLLpoZ+LLDJ3V9x9x3AncBZA465GLje3d8BcPct0YYpIiKZZJPQDwBeS9juCMsS/TPwz2b2uJn93sxOS3UhM2s0sxYza+ns7BxZxCIiklJUD0UnAAcDJwMLgRvNbO+BB7l7s7s3uHvD1KlTI7q1iIhAdgn9deDAhO3pYVmiDmCNu/e4+6vASwQJXkREciSbhL4OONjMZpjZRGABsGbAMfcS1M4xsykETTCvRBinSGbxONTXQ1lZ8DMez3dEIjmVsZeLu+80s88CvwbKgZvcfaOZXQO0uPuacN9Hzex5YBfwv9y9aywDF0kSj0NjI3R3B9vt7cE2QCyWv7hEcsjcPS83bmho8JaWlrzcW0pQfX2QxAeqq4O2tlxHIzJmzKzV3RtS7dNIUSkNmzcPr1ykBCmhS2morR1euUgJUkKX0rB8OVRVJZdVVQXlIuOEErrkxbx5YNb/mjdvlBeMxaC5OWgzNwt+NjfrgaiMK3ooKjk3bx6sXTu4fO5ceOih3McjUkz0UFQKSqpkPlS5iGRHCV1EpEQooctgGnEpUpSU0CVZ34jL9nZw7x9xGWFSnzt3eOUikh0ldEnW1NQ/fL5Pd3dQHpGHHhqcvPVAVGT0Ms7lIuNMjkZcKnmLRE81dEmmEZciRUsJXZJpxKVI0VJCl2QacSlStJTQBUjuqThlWYwp29ooo5d62oijZC5SDJTQhXgcLrigv6diV1fwiqrXYjwOU6b0z9syZYq6touMBSV0Ydky6OlJvz+bXovpxiL1fVl0Jaxf1dUFF16opC4StawSupmdZmYvmtkmM7tyiOPOMTM3s5QTx0hh6spiscChei0ONRapqSn1l8WOHZF2bRcRskjoZlYOXA+cDswEFprZzBTH7QUsA56KOkjJv6F6LaYbi7RsWepV4fpoMSGRaGVTQz8W2OTur7j7DuBO4KwUx10LXAdsjzA+GWgM5lmpqRl6f6Zei+kSc6aav7q2i0Qrm4R+APBawnZHWLabmc0BDnT3Xw51ITNrNLMWM2vp7OwcdrDj3hjNs7JiBUycmHpfNr0WR5KYJ05U13aRqI36oaiZlQH/AVyR6Vh3b3b3BndvmDp16mhvPf6M0TwrsRjcdFNy1/Pbbw++M9raMndBTzUWaSg1NcH91LVdJFrZzOXyOnBgwvb0sKzPXsD7gUfMDGA/YI2ZnenuWpIoSmM4z0osNvIE23deU1MQSm0tbNuWusmlri74khCR6GVTQ18HHGxmM8xsIrAAWNO3093fdfcp7l7v7vXA7wEl87FQwPOsxGJBou7tDX6uWKEZBERyLWNCd/edwGeBXwMvAHe5+0Yzu8bMzhzrACVBEc2zohkERHJPi0QXm77O3X1tG8uXK0uKjCNDLRKt+dCLzWgau0WkpGnofwEbaji9lvwUkYGU0AtUui7nS+f9icZPd4/lkp/R0LeOSM6pDb1A1denHjZfzk52pWgpK6jugH3fRol95quq9FRUJAJDtaGrhl5owprt5vbelLt3UZ6yvKDmRcnBQtMiMpgSeiFJaGepJXWGLmdXyvIC6IreL0cLTYtIMiX0QpJQs13Ol6ji70m7q6ybRn6QsryguqIX8AAokVKmhF5IEmqwMVbRzMXU0YbRGwzMWfI0K6u+mFxum2le8nRhNU0X0QAokVKihF5IBtRgY6yijRn01rw3mCRr5YnQ3Eys7gna7CB66w5i+ZLNND1wYmF1JtEwUZG8UC+XQhKPB2uz7diRXF5RATffPCghqjOJyPijXi75MLAf9tKlmftlx2Kw116Dy3t6UvYQUWcSEUmkGvpYSFV1HihdVbqsLBgxNJBZMJXhyA4VkRKhGnqupao6D5SuKj2MHiLqTCIiiZTQx0K2/a1THTeMHiLqTCIiiZTQo9TXbp5tM1aqqvQweoioM4mIJFJCzyDrOaYSZ9PKxlBV6YHL/wyRoYdxqIiUOCX0IaSb8TBlUh+q3byuDi65RFVpERlTWfVyMbPTgBVAOfAjd//mgP3/CnwG2Al0Ahe6+5BV1WLo5ZJuxsOUMxuqy4mI5MCoermYWTlwPXA6MBNYaGYzBxz2DNDg7kcAq4FvjS7kwjCsOabSdC2ZV/EIZux+zZsXXXwiIomyaXI5Ftjk7q+4+w7gTuCsxAPc/WF372tv+D0wPdow82NY3QJTdDmZx29Yu+O/J5WtXaukLiJjI5uEfgDwWsJ2R1iWzkXAr0YTVC5k87Azm26BS5fChAlg58WYsP1vLN3zlt3t5GuZC9ig665dm5/BXCJS2iJ9KGpm5wENwLfT7G80sxYza+ns7Izy1sMSj8PixckPOxcvHpzU03ULhOBLwAxuuAF2hVOU7+ot44a/L2LpkrDLSYpknhSEiEiEMj4UNbMPAFe7+6nh9lUA7v6NAcfNA74PnOTuWzLdOJ8PRffaC7ZtG1xeXQ1/+9sQJ8bjxJc9RWPXN+hmz7SHlZfDzp1Bwk/N8boZBbRmnIgUi9EO/V8HHGxmM8xsIrAAWDPgBkcBPwTOzCaZ51uqZD5UORDUqC+4gGVdXxkymUN/jX3uXICBX5jOXB7U6j0iErmMCd3ddwKfBX4NvADc5e4bzewaMzszPOzbQDXwMzNbb2Zr0lyuwHn6hvVly4j3fJIupmS8Snm47OdDD8HciY8F1w1fc3mQhzhNE66ISOQGLx+fgrs/ADwwoOyrCe+Lqt+GWZou4/T2dzzvG0UEQWN6VxdN/DtDtouH+k4DeOim16CxevCk5ZpwRUQiNu5Gisbjg3uuBJwl3JBc1N1NfNlTQaWdXbRTN+S1y8uDAaErVyYUasIVEcmRrGropSLdNOVlZfA/e1eyksuSj2dh8AC0C4b67qupgbffHuLGsZgSuIiMuXFVQ0833cqBBwJ7VjOBHoxeJtDDUr5PE/+e8QFo1cSdrFiRYkfWs3qJiERjXK1YNFQ3wvCIDGX9+wyntqab5SuqB1e+tdiniIwRrVhEkGPTJ3QYnLgtRVmgrs7o9TLa3k6RzEGLfYpIXoybhN7UlP26E0OpqMiig8qwZvUSEYlG6SX0NG3XQ+fSzF0Rdx+ZzaFa7FNE8qC0EvoQK1LUvmeoYaAweERnajt2ZNFyosU+RSQPSiuhp2u7XraM5X+9jCr+HsltMracqO+5iORBafVDT5dpu7qIcQvwD5axIhy+n+ohaHayajlR33MRybHSqqEPyLRL+f7uvuVGL+cRZyv7MJcHKWfniG6hlhMRKVSlldAT2q6X8n1u4FJ2MYH+LojGLiawlo+yi/I0F/GEV3J5TY1aTkSkcJVWQo/FYNEi4izkBi4lfTNK+uaVcnbhlHE7Mepow+iljjZuv+Rx3n5byVxECldpJPR4HKZMATPiN2ylkRsZTpt4ol3hryTGKtqYQS/ltM39DLGVJ0YYsIhI9Io/ocfj8OlPE+/6KPW8ynnEM86/MpQ66+jfMAumT3zooQgCFREZW8Xfy2XRIuK+gEZuHHYir5rwD7p3TurfroLlzbUQ0yLOIlJ8ireG3tfMsmtXVrMiDlRX/Reab5mkruIiUjKKLqHH41A/ZRtl5y2kvquFOAvZTKaO4ck17qoqWP6DGmKxYJ3m3t7gp5K5iBSzrBK6mZ1mZi+a2SYzuzLF/klm9tNw/1NmVh91oADz/mMp571stH92L/xr5bR/bQbnXXkv76ErzRlOHW1cwvVhjxVXTVxESlbGNnQzKweuB04BOoB1ZrbG3Z9POOwi4B13/29mtgC4DvhUlIEu/eVS1v71hsGdVyb9P7qurKXqm28nNbtU8XeauZgYq4KC2/eBWH2UIYmIFJRsaujHApvc/RV33wHcCZw14JizgFvD96uBuWZZzUuYtebW5tQ9EQ2YtJ1mLk7qN747mc+cGUzUpSq5iJS4bBL6AcBrCdsdYVnKY9x9J/AuUDPwQmbWaGYtZtbS2dk5rEB3+a4h98dYRVtNA723r6LN64n5HUEi37hxWPcRESlWOX0o6u7N7t7g7g1Tp04d1rnllm6ofuj229FQThEZz7JJ6K8DByZsTw/LUh5jZhOAyZD2SeWINB7dmHbfHhP2UCIXkXEvm4S+DjjYzGaY2URgAbBmwDFrgEXh+08C/+kRrz698l9WcknDJYPK9yjfg+4vd6c4Q0RkfLFs8q6ZzQe+C5QDN7n7cjO7Bmhx9zVmVgncBhwF/AVY4O6vDHXNhoYGb2lpGfV/gIjIeGJmre7ekGpfVkP/3f0B4IEBZV9NeL8dOHc0QYqIyOgU3UhRERFJTQldRKREKKGLiJQIJXQRkRKRVS+XMbmxWSfQPsLTpwBvRxhOFAoxJijMuAoxJijMuBRT9goxrrGIqc7dU47MzFtCHw0za0nXbSdfCjEmKMy4CjEmKMy4FFP2CjGuXMekJhcRkRKhhC4iUiKKNaE35zuAFAoxJijMuAoxJijMuBRT9goxrpzGVJRt6CIiMlix1tBFRGQAJXQRkVLh7kX1Ak4DXgQ2AVeOwfVvArYAGxLK3gP8Bng5/LlPWG7A98JYngPmJJyzKDz+ZWBRQvnRwB/Dc75H2OyVIaYDgYeB54GNwLJ8xwVUAn8Ang1j+npYPgN4KrzOT4GJYfmkcHtTuL8+4VpXheUvAqeO9m9NMCvoM8D9BRRTW/j7XU8wS2khfK72Jlgy8k/AC8AHCiCmQ8LfUd/rr8DlBRDX5wk+5xuAVQSf/7x/rgbFOdIT8/Ei+If6Z+AgYCJBMpkZ8T0+BMwhOaF/q++XDFwJXBe+nw/8KvxQHQ88lfAP9ZXw5z7h+74P4B/CYy089/QsYprW90EF9gJeAmbmM67wuOrwfUX4wT0euItg+mSAHwCXhO+XAj8I3y8Afhq+nxn+HSeF/0D+HP6dR/y3Bv4VuIP+hF4IMbUBUwaU5ftzdSvwmfD9RIIEn9eYUvx7fxOoy2dcBEtsvgrskfB5WlwIn6tBsY408eXjRVCD+HXC9lXAVWNwn3qSE/qLwLTw/TTgxfD9D4GFA48DFgI/TCj/YVg2DfhTQnnSccOI7z7glEKJC6gCngaOIxgVN2Hg3wv4NfCB8P2E8Dgb+DfsO26kf2uCFbXWAh8B7g/vkdeYwmPbGJzQ8/b3I1hV7FUG1E4L5TMVnvNR4PF8x0X/msnvCT8n9wOnFsLnauCr2NrQs1mweizs6+5vhO/fBPbNEM9Q5R0pyrNmZvUEC4k8le+4zANKQkIAAALgSURBVKzczNYTNFH9hqCWsdWDhcIHXifdQuLDjTWT7wL/BvSG2zUFEBOAAw+aWauZ9a2nmM+/3wygE7jZzJ4xsx+Z2Z55jmmgBQTNG+QzLnd/HfgOsBl4g+Bz0kphfK6SFFtCzzsPvkI9H/c2s2rgbuByd/9rvuNy913ufiRBrfhY4NBc3n8gMzsD2OLurfmMI40T3X0OcDpwqZl9KHFnHv5+EwiaFm9w96OAvxM0ZeQzpt3C5S7PBH42cF+u4zKzfYCzCL4E9wf2JGjzLjjFltCzWbB6LLxlZtMAwp9bMsQzVPn0FOUZmVkFQTKPu/vPCyUuAHffSvDQ9gPA3uFC4QOvk24h8eHGOpQTgDPNrA24k6DZZUWeYwJ21/Jw9y3APQRfgPn8+3UAHe7+VLi9miDBF8RniuCL72l3fyvczmdc84BX3b3T3XuAnxN81vL+uRpkJO00+XoR1CpeIfim7Ht4cPgY3Kee5Db0b5P8QOZb4ft/IfmBzB/C8vcQtE/uE75eBd4T7hv4QGZ+FvEY8BPguwPK8xYXMBXYO3y/B/AYcAZBjSrxQdHS8P2lJD8ouit8fzjJD4peIXhINKq/NXAy/Q9F8xoTQY1ur4T3TxDU8PL9uXoMOCR8f3UYT15jSojtTuCCAvmsH0fQw6UqPOdW4LJ8f65SxjrSpJevF8FT7ZcI2mubxuD6qwjayXoIajEXEbR/rSXo/vRQwgfDgOvDWP4INCRc50KCLkibBnwwGwi6Pv0Z+D9k12XqRIL/xXyO/u5c8/MZF3AEQdfA58LzvhqWHxT+g9kUfuAnheWV4famcP9BCddqCu/7Igk9DkbztyY5oec1pvD+z9LfxbMpLM/35+pIoCX8G95LkPjyGlN43p4ENdrJCWX5/l19naB75wbgNoKkXBCf9cSXhv6LiJSIYmtDFxGRNJTQRURKhBK6iEiJUEIXESkRSugiIiVCCV1EpEQooYuIlIj/D4+NZiyYnDinAAAAAElFTkSuQmCC\n","text/plain":["<Figure size 432x288 with 1 Axes>"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":419},"id":"A7gvUIBQp-Ca","executionInfo":{"status":"ok","timestamp":1607101254835,"user_tz":-120,"elapsed":3230,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"235b0016-11d7-4243-c50f-ff050509085c"},"source":["# Lets add a column for this transformation\n","\n","data['Length_transformed'] = (data['Length']**3)*(np.log10(data['Length']**3))\n","\n","data"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>MMSI</th>\n","      <th>Speed</th>\n","      <th>COG</th>\n","      <th>Destination</th>\n","      <th>Ship_type</th>\n","      <th>Gross_tonnage</th>\n","      <th>Length</th>\n","      <th>Breadth</th>\n","      <th>Length_transformed</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>212209000</td>\n","      <td>10.1377</td>\n","      <td>64.3074</td>\n","      <td>Hamina</td>\n","      <td>Cargo</td>\n","      <td>3416</td>\n","      <td>94.91</td>\n","      <td>15.34</td>\n","      <td>5.071453e+06</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>212436000</td>\n","      <td>13.5256</td>\n","      <td>77.0755</td>\n","      <td>Hamina</td>\n","      <td>Tanker</td>\n","      <td>6280</td>\n","      <td>116.90</td>\n","      <td>18.00</td>\n","      <td>9.910062e+06</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>219082000</td>\n","      <td>9.9416</td>\n","      <td>74.6762</td>\n","      <td>Hamina</td>\n","      <td>Tanker</td>\n","      <td>9980</td>\n","      <td>141.20</td>\n","      <td>21.90</td>\n","      <td>1.815643e+07</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>219083000</td>\n","      <td>11.6038</td>\n","      <td>74.7529</td>\n","      <td>Hamina</td>\n","      <td>Tanker</td>\n","      <td>9980</td>\n","      <td>141.20</td>\n","      <td>21.60</td>\n","      <td>1.815643e+07</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>219426000</td>\n","      <td>11.9203</td>\n","      <td>56.3253</td>\n","      <td>Hamina</td>\n","      <td>Tanker</td>\n","      <td>3219</td>\n","      <td>99.90</td>\n","      <td>15.00</td>\n","      <td>5.980718e+06</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>129</th>\n","      <td>273374820</td>\n","      <td>10.0396</td>\n","      <td>74.6253</td>\n","      <td>Vysotsk</td>\n","      <td>Tanker</td>\n","      <td>4979</td>\n","      <td>139.90</td>\n","      <td>16.70</td>\n","      <td>1.762655e+07</td>\n","    </tr>\n","    <tr>\n","      <th>130</th>\n","      <td>273385070</td>\n","      <td>9.3507</td>\n","      <td>74.5454</td>\n","      <td>Vysotsk</td>\n","      <td>Tanker</td>\n","      <td>4979</td>\n","      <td>139.90</td>\n","      <td>16.94</td>\n","      <td>1.762655e+07</td>\n","    </tr>\n","    <tr>\n","      <th>131</th>\n","      <td>273388150</td>\n","      <td>9.7668</td>\n","      <td>68.7159</td>\n","      <td>Vysotsk</td>\n","      <td>Tanker</td>\n","      <td>5075</td>\n","      <td>140.85</td>\n","      <td>16.86</td>\n","      <td>1.801271e+07</td>\n","    </tr>\n","    <tr>\n","      <th>132</th>\n","      <td>636092755</td>\n","      <td>11.1554</td>\n","      <td>73.7013</td>\n","      <td>Vysotsk</td>\n","      <td>Tanker</td>\n","      <td>23240</td>\n","      <td>183.00</td>\n","      <td>27.37</td>\n","      <td>4.159621e+07</td>\n","    </tr>\n","    <tr>\n","      <th>133</th>\n","      <td>357100000</td>\n","      <td>11.2703</td>\n","      <td>59.3888</td>\n","      <td>Vysotsk</td>\n","      <td>Cargo</td>\n","      <td>43717</td>\n","      <td>229.04</td>\n","      <td>32.31</td>\n","      <td>8.506501e+07</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>134 rows × 9 columns</p>\n","</div>"],"text/plain":["          MMSI    Speed      COG  ...  Length Breadth  Length_transformed\n","0    212209000  10.1377  64.3074  ...   94.91   15.34        5.071453e+06\n","1    212436000  13.5256  77.0755  ...  116.90   18.00        9.910062e+06\n","2    219082000   9.9416  74.6762  ...  141.20   21.90        1.815643e+07\n","3    219083000  11.6038  74.7529  ...  141.20   21.60        1.815643e+07\n","4    219426000  11.9203  56.3253  ...   99.90   15.00        5.980718e+06\n","..         ...      ...      ...  ...     ...     ...                 ...\n","129  273374820  10.0396  74.6253  ...  139.90   16.70        1.762655e+07\n","130  273385070   9.3507  74.5454  ...  139.90   16.94        1.762655e+07\n","131  273388150   9.7668  68.7159  ...  140.85   16.86        1.801271e+07\n","132  636092755  11.1554  73.7013  ...  183.00   27.37        4.159621e+07\n","133  357100000  11.2703  59.3888  ...  229.04   32.31        8.506501e+07\n","\n","[134 rows x 9 columns]"]},"metadata":{"tags":[]},"execution_count":18}]},{"cell_type":"markdown","metadata":{"id":"O-qAVjP1hOW0"},"source":["**The numerical variables have quite different ranges. To ensure that all variables can have the same importance on the model, perform Z-score standardization. Perform it for speed, transformed length, and breadth 1p**"]},{"cell_type":"code","metadata":{"id":"via0OpTKkPMd"},"source":["data_std = data.copy()\n","\n","data_std['Speed'] = (data_std['Speed'] - data_std['Speed'].mean()) / data_std['Speed'].std()\n","data_std['Length_transformed'] = (data_std['Length_transformed'] - data_std['Length_transformed'].mean()) / data_std['Length_transformed'].std()\n","data_std['Breadth'] = (data_std['Breadth'] - data_std['Breadth'].mean()) / data_std['Breadth'].std()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":419},"id":"mz2A72c7p0Ry","executionInfo":{"status":"ok","timestamp":1607101254836,"user_tz":-120,"elapsed":3217,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"e3a58c76-c5af-4a9d-85f8-83bc566f182f"},"source":["data_std"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>MMSI</th>\n","      <th>Speed</th>\n","      <th>COG</th>\n","      <th>Destination</th>\n","      <th>Ship_type</th>\n","      <th>Gross_tonnage</th>\n","      <th>Length</th>\n","      <th>Breadth</th>\n","      <th>Length_transformed</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>212209000</td>\n","      <td>-0.160696</td>\n","      <td>64.3074</td>\n","      <td>Hamina</td>\n","      <td>Cargo</td>\n","      <td>3416</td>\n","      <td>94.91</td>\n","      <td>-0.487276</td>\n","      <td>-0.557527</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>212436000</td>\n","      <td>1.574301</td>\n","      <td>77.0755</td>\n","      <td>Hamina</td>\n","      <td>Tanker</td>\n","      <td>6280</td>\n","      <td>116.90</td>\n","      <td>-0.219871</td>\n","      <td>-0.410920</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>219082000</td>\n","      <td>-0.261122</td>\n","      <td>74.6762</td>\n","      <td>Hamina</td>\n","      <td>Tanker</td>\n","      <td>9980</td>\n","      <td>141.20</td>\n","      <td>0.172188</td>\n","      <td>-0.161060</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>219083000</td>\n","      <td>0.590117</td>\n","      <td>74.7529</td>\n","      <td>Hamina</td>\n","      <td>Tanker</td>\n","      <td>9980</td>\n","      <td>141.20</td>\n","      <td>0.142030</td>\n","      <td>-0.161060</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>219426000</td>\n","      <td>0.752202</td>\n","      <td>56.3253</td>\n","      <td>Hamina</td>\n","      <td>Tanker</td>\n","      <td>3219</td>\n","      <td>99.90</td>\n","      <td>-0.521456</td>\n","      <td>-0.529977</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>129</th>\n","      <td>273374820</td>\n","      <td>-0.210935</td>\n","      <td>74.6253</td>\n","      <td>Vysotsk</td>\n","      <td>Tanker</td>\n","      <td>4979</td>\n","      <td>139.90</td>\n","      <td>-0.350558</td>\n","      <td>-0.177115</td>\n","    </tr>\n","    <tr>\n","      <th>130</th>\n","      <td>273385070</td>\n","      <td>-0.563732</td>\n","      <td>74.5454</td>\n","      <td>Vysotsk</td>\n","      <td>Tanker</td>\n","      <td>4979</td>\n","      <td>139.90</td>\n","      <td>-0.326431</td>\n","      <td>-0.177115</td>\n","    </tr>\n","    <tr>\n","      <th>131</th>\n","      <td>273388150</td>\n","      <td>-0.350640</td>\n","      <td>68.7159</td>\n","      <td>Vysotsk</td>\n","      <td>Tanker</td>\n","      <td>5075</td>\n","      <td>140.85</td>\n","      <td>-0.334473</td>\n","      <td>-0.165415</td>\n","    </tr>\n","    <tr>\n","      <th>132</th>\n","      <td>636092755</td>\n","      <td>0.360484</td>\n","      <td>73.7013</td>\n","      <td>Vysotsk</td>\n","      <td>Tanker</td>\n","      <td>23240</td>\n","      <td>183.00</td>\n","      <td>0.722077</td>\n","      <td>0.549150</td>\n","    </tr>\n","    <tr>\n","      <th>133</th>\n","      <td>357100000</td>\n","      <td>0.419326</td>\n","      <td>59.3888</td>\n","      <td>Vysotsk</td>\n","      <td>Cargo</td>\n","      <td>43717</td>\n","      <td>229.04</td>\n","      <td>1.218685</td>\n","      <td>1.866228</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>134 rows × 9 columns</p>\n","</div>"],"text/plain":["          MMSI     Speed      COG  ...  Length   Breadth  Length_transformed\n","0    212209000 -0.160696  64.3074  ...   94.91 -0.487276           -0.557527\n","1    212436000  1.574301  77.0755  ...  116.90 -0.219871           -0.410920\n","2    219082000 -0.261122  74.6762  ...  141.20  0.172188           -0.161060\n","3    219083000  0.590117  74.7529  ...  141.20  0.142030           -0.161060\n","4    219426000  0.752202  56.3253  ...   99.90 -0.521456           -0.529977\n","..         ...       ...      ...  ...     ...       ...                 ...\n","129  273374820 -0.210935  74.6253  ...  139.90 -0.350558           -0.177115\n","130  273385070 -0.563732  74.5454  ...  139.90 -0.326431           -0.177115\n","131  273388150 -0.350640  68.7159  ...  140.85 -0.334473           -0.165415\n","132  636092755  0.360484  73.7013  ...  183.00  0.722077            0.549150\n","133  357100000  0.419326  59.3888  ...  229.04  1.218685            1.866228\n","\n","[134 rows x 9 columns]"]},"metadata":{"tags":[]},"execution_count":20}]},{"cell_type":"markdown","metadata":{"id":"IC45j4lHl0Sa"},"source":["## Classification accuracy with random training and test sets"]},{"cell_type":"markdown","metadata":{"id":"xZOcgo3Gl0Sa"},"source":["Predict the **ship type** using **speed, destination, transformed length, and breadth** as features. Find an estimation for the classification accuracy (number of correctly classified ships to the total number of ships) using *random training and test sets*. <br>\n"," - Produce training and test data **1p**\n","     - Gather the normalized features and one-hot-coded destination columns as array __X__ (input variables), and the ship type as array **y** (output variable)     \n","     - Divide the data randomly into training (20%) and test (80%) sets\n","     - Do you need to use stratification? Explain your decision\n"," - Train the model and test its performance **1p**\n","     - Use kNN classifier with k=3\n","     - Print out the confusion matrix. How does the model perform with different ship types?\n","     - What is the (total) classification accuracy?\n"," - Repeat the calculation 1000 times with different split of training/test data, and make a histogram of the results for classification accuracy **1p**\n"," - Discuss your results **1p**"]},{"cell_type":"markdown","metadata":{"id":"ki03tbUxtjoD"},"source":["**Gather the normalized features and one-hot-coded destination columns as array __X__ (input variables), and the ship type as array y (output variable)**"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":419},"id":"IaK8iG5Ptizr","executionInfo":{"status":"ok","timestamp":1607101254838,"user_tz":-120,"elapsed":3210,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"a639e251-0851-4444-bdb6-b824a838ee7a"},"source":["X = data_std[['Speed','Length_transformed','Breadth']]\n","X = pd.concat([X,dest], axis=1)\n","X"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Speed</th>\n","      <th>Length_transformed</th>\n","      <th>Breadth</th>\n","      <th>Hamina</th>\n","      <th>Helsinki</th>\n","      <th>Kotka</th>\n","      <th>Kronshtadt</th>\n","      <th>Kunda</th>\n","      <th>Muuga</th>\n","      <th>Paldiski</th>\n","      <th>Porvoo</th>\n","      <th>Primorsk</th>\n","      <th>Sillamae</th>\n","      <th>Tallinn</th>\n","      <th>Ust-Luga</th>\n","      <th>Valko-Loviisa</th>\n","      <th>Viipuri</th>\n","      <th>Vuosaari</th>\n","      <th>Vysotsk</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>-0.160696</td>\n","      <td>-0.557527</td>\n","      <td>-0.487276</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>1.574301</td>\n","      <td>-0.410920</td>\n","      <td>-0.219871</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>-0.261122</td>\n","      <td>-0.161060</td>\n","      <td>0.172188</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>0.590117</td>\n","      <td>-0.161060</td>\n","      <td>0.142030</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0.752202</td>\n","      <td>-0.529977</td>\n","      <td>-0.521456</td>\n","      <td>1</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>129</th>\n","      <td>-0.210935</td>\n","      <td>-0.177115</td>\n","      <td>-0.350558</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","    </tr>\n","    <tr>\n","      <th>130</th>\n","      <td>-0.563732</td>\n","      <td>-0.177115</td>\n","      <td>-0.326431</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","    </tr>\n","    <tr>\n","      <th>131</th>\n","      <td>-0.350640</td>\n","      <td>-0.165415</td>\n","      <td>-0.334473</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","    </tr>\n","    <tr>\n","      <th>132</th>\n","      <td>0.360484</td>\n","      <td>0.549150</td>\n","      <td>0.722077</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","    </tr>\n","    <tr>\n","      <th>133</th>\n","      <td>0.419326</td>\n","      <td>1.866228</td>\n","      <td>1.218685</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>0</td>\n","      <td>1</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>134 rows × 19 columns</p>\n","</div>"],"text/plain":["        Speed  Length_transformed   Breadth  ...  Viipuri  Vuosaari  Vysotsk\n","0   -0.160696           -0.557527 -0.487276  ...        0         0        0\n","1    1.574301           -0.410920 -0.219871  ...        0         0        0\n","2   -0.261122           -0.161060  0.172188  ...        0         0        0\n","3    0.590117           -0.161060  0.142030  ...        0         0        0\n","4    0.752202           -0.529977 -0.521456  ...        0         0        0\n","..        ...                 ...       ...  ...      ...       ...      ...\n","129 -0.210935           -0.177115 -0.350558  ...        0         0        1\n","130 -0.563732           -0.177115 -0.326431  ...        0         0        1\n","131 -0.350640           -0.165415 -0.334473  ...        0         0        1\n","132  0.360484            0.549150  0.722077  ...        0         0        1\n","133  0.419326            1.866228  1.218685  ...        0         0        1\n","\n","[134 rows x 19 columns]"]},"metadata":{"tags":[]},"execution_count":21}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":419},"id":"8NZYrLXD1rhm","executionInfo":{"status":"ok","timestamp":1607101255201,"user_tz":-120,"elapsed":3565,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"07d2fe01-498a-4292-8d6e-a90482f3c90c"},"source":["y = data_std[['Ship_type']]\n","y"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Ship_type</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Cargo</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Tanker</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>Tanker</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Tanker</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Tanker</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>129</th>\n","      <td>Tanker</td>\n","    </tr>\n","    <tr>\n","      <th>130</th>\n","      <td>Tanker</td>\n","    </tr>\n","    <tr>\n","      <th>131</th>\n","      <td>Tanker</td>\n","    </tr>\n","    <tr>\n","      <th>132</th>\n","      <td>Tanker</td>\n","    </tr>\n","    <tr>\n","      <th>133</th>\n","      <td>Cargo</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>134 rows × 1 columns</p>\n","</div>"],"text/plain":["    Ship_type\n","0       Cargo\n","1      Tanker\n","2      Tanker\n","3      Tanker\n","4      Tanker\n","..        ...\n","129    Tanker\n","130    Tanker\n","131    Tanker\n","132    Tanker\n","133     Cargo\n","\n","[134 rows x 1 columns]"]},"metadata":{"tags":[]},"execution_count":22}]},{"cell_type":"markdown","metadata":{"id":"nfSFv9Zf9tDT"},"source":["**Divide the data randomly into training (20%) and test (80%) sets**\n","\n","**Do you need to use stratification? Explain your decision**\n"]},{"cell_type":"markdown","metadata":{"id":"KyO5YrbXwaX8"},"source":["\n","Yes. Randomizing the test and train doesn't guarantee that all types of ships are represented in the training data."]},{"cell_type":"code","metadata":{"id":"7HrASZhP2K0W"},"source":["# Lets divide data 20% training and 80% tests using stratify\n","from sklearn.model_selection import train_test_split\n","\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, stratify = y, random_state = 1) # Lets put random state so we can get same result with reruns\n","\n","#print(X_train)\n","#print(y_train)\n","#print(X_test)\n","#print(y_test)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"hVlrC2AKCHeG"},"source":["**Use kNN classifier with k=3**"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"juKnfvVACGfH","executionInfo":{"status":"ok","timestamp":1607101255202,"user_tz":-120,"elapsed":3551,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"402faa08-986a-4be7-ca17-208b91eda920"},"source":["from sklearn.neighbors import KNeighborsClassifier\n","\n","neigh = KNeighborsClassifier(n_neighbors=3)\n","neigh.fit(X_train, np.ravel(y_train))"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n","                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,\n","                     weights='uniform')"]},"metadata":{"tags":[]},"execution_count":24}]},{"cell_type":"markdown","metadata":{"id":"yeTR4o5_xPIS"},"source":["**Print out the confusion matrix. How does the model perform with different ship types?**"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"AFxSmkd9Gpau","executionInfo":{"status":"ok","timestamp":1607101255202,"user_tz":-120,"elapsed":3544,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"43a07cb5-a716-4a71-cffc-11e1695a1257"},"source":["from sklearn.metrics import confusion_matrix\n","y_pred = neigh.predict(X_test)\n","\n","confusion_matrix(y_test, y_pred, labels=['Cargo','Tanker','Tug'])"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[34, 19,  1],\n","       [ 6, 41,  0],\n","       [ 6,  0,  1]])"]},"metadata":{"tags":[]},"execution_count":25}]},{"cell_type":"markdown","metadata":{"id":"s4mOPxApH8xY"},"source":["Cargo with 63% accuracy.\n","Tanker with 87% accuracy.\n","Tug with 14% accuracy.\n","\n","Performed best with Tanker."]},{"cell_type":"markdown","metadata":{"id":"0K6BzRlPxV-N"},"source":["**What is the (total) classification accuracy?**"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"thdl7671G0bk","executionInfo":{"status":"ok","timestamp":1607101255202,"user_tz":-120,"elapsed":3537,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"55c577ee-3588-41b3-9335-c9ac73485afb"},"source":["# Total accuracy\n","\n","neigh.score(X_test, np.ravel(y_test))"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.7037037037037037"]},"metadata":{"tags":[]},"execution_count":26}]},{"cell_type":"markdown","metadata":{"id":"q_gB_4N1KNU2"},"source":["**Repeat the calculation 1000 times with different split of training/test data, and make a histogram of the results for classification accuracy 1p**"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":282},"id":"oFfwi1t8KRp7","executionInfo":{"status":"ok","timestamp":1607101263904,"user_tz":-120,"elapsed":12231,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"e8945eb7-1c03-417f-b9d5-4c9a09430198"},"source":["# Test size 0.8\n","accuracy = np.zeros(1000)\n","for i in range(1000):\n","  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, stratify = y)\n","\n","  neigh = KNeighborsClassifier(n_neighbors=3)\n","  neigh.fit(X_train, np.ravel(y_train))\n","\n","  accuracy[i] = neigh.score(X_test, np.ravel(y_test))\n","\n","plt.hist(accuracy)\n","plt.show()\n","\n","print(np.mean(accuracy))"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAYAAAAD4CAYAAADlwTGnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAOp0lEQVR4nO3df6zddX3H8edrVDGZbpb12rDSeZmpcSXZ0N0wMvcHC4mUkqyQJaQs0+rYahZcNPGfqn9IlpB1yYTNzJEUJdZFZcQfoQvsB2MsRjPUiyJQGFKhhHaVXn/MHzFho773x/02Hspt77nnB+ccPs9HcnK+5/P9fs73/bmf5r7u9/s939NUFZKk9vzcpAuQJE2GASBJjTIAJKlRBoAkNcoAkKRGrZt0AQAbNmyo+fn5SZchSTPl/vvv/05VzQ3afyoCYH5+nsXFxUmXIUkzJclTw/T3FJAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDVqKu4ElvRC83vunMh+D++9YiL71YvPIwBJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWrUqgGQZHOSe5M8kuRgknd37dcnOZrkge6xvafP+5IcSvJYksvGOQBJ0mD6+SqI54D3VtXXkrwKuD/J3d26m6rqr3o3TrIV2AlcAPwy8G9JXl9VJ0ZZuCRpOKseAVTVsar6Wrf8I+BRYNMZuuwAbquqZ6vqSeAQcNEoipUkjc6argEkmQfeCHy5a3pXkgeT3Jpkfde2CXi6p9sRzhwYkqQJ6DsAkrwS+Czwnqr6IXAz8DrgQuAY8KG17DjJ7iSLSRaXlpbW0lWSNAJ9BUCSl7H8y/+TVfU5gKp6pqpOVNVPgVv42Wmeo8Dmnu7ndW3PU1X7qmqhqhbm5uaGGYMkaQD9fAoowMeAR6vqxp72c3s2uwp4uFs+AOxMcnaS84EtwFdGV7IkaRT6+RTQm4G3Ag8leaBrez9wTZILgQIOA+8EqKqDSW4HHmH5E0TX+QkgSZo+qwZAVX0RyAqr7jpDnxuAG4aoS5I0Zt4JLEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjernqyAkNWR+z50T2/fhvVdMbN8t8ghAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNWjUAkmxOcm+SR5IcTPLurv2cJHcnebx7Xt+1J8mHkxxK8mCSN417EJKktevnCOA54L1VtRW4GLguyVZgD3BPVW0B7uleA1wObOkeu4GbR161JGloqwZAVR2rqq91yz8CHgU2ATuA/d1m+4Eru+UdwCdq2X3Aq5OcO/LKJUlDWdM1gCTzwBuBLwMbq+pYt+rbwMZueRPwdE+3I13bqe+1O8liksWlpaU1li1JGlbfAZDklcBngfdU1Q9711VVAbWWHVfVvqpaqKqFubm5tXSVJI1AXwGQ5GUs//L/ZFV9rmt+5uSpne75eNd+FNjc0/28rk2SNEX6+RRQgI8Bj1bVjT2rDgC7uuVdwB097W/rPg10MfCDnlNFkqQpsa6Pbd4MvBV4KMkDXdv7gb3A7UmuBZ4Cru7W3QVsBw4BPwHeMdKKJUkjsWoAVNUXgZxm9aUrbF/AdUPWJUkaM+8ElqRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktSofr4LSGra/J47J12CNBYeAUhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNWjUAktya5HiSh3vark9yNMkD3WN7z7r3JTmU5LEkl42rcEnScPo5Avg4sG2F9puq6sLucRdAkq3ATuCCrs/fJTlrVMVKkkZn1QCoqi8A3+vz/XYAt1XVs1X1JHAIuGiI+iRJYzLMNYB3JXmwO0W0vmvbBDzds82Rru0FkuxOsphkcWlpaYgyJEmDGDQAbgZeB1wIHAM+tNY3qKp9VbVQVQtzc3MDliFJGtRAAVBVz1TViar6KXALPzvNcxTY3LPpeV2bJGnKDBQASc7teXkVcPITQgeAnUnOTnI+sAX4ynAlSpLGYd1qGyT5NHAJsCHJEeCDwCVJLgQKOAy8E6CqDia5HXgEeA64rqpOjKd0SdIwVg2AqrpmheaPnWH7G4AbhilKkjR+3gksSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVGrfhWEJL1Y5vfcOZH9Ht57xUT2O2keAUhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJatSqAZDk1iTHkzzc03ZOkruTPN49r+/ak+TDSQ4leTDJm8ZZvCRpcP0cAXwc2HZK2x7gnqraAtzTvQa4HNjSPXYDN4+mTEnSqK0aAFX1BeB7pzTvAPZ3y/uBK3vaP1HL7gNeneTcURUrSRqdQa8BbKyqY93yt4GN3fIm4Ome7Y50bZKkKTP0ReCqKqDW2i/J7iSLSRaXlpaGLUOStEaDBsAzJ0/tdM/Hu/ajwOae7c7r2l6gqvZV1UJVLczNzQ1YhiRpUIMGwAFgV7e8C7ijp/1t3aeBLgZ+0HOqSJI0RdattkGSTwOXABuSHAE+COwFbk9yLfAUcHW3+V3AduAQ8BPgHWOoWZI0AqsGQFVdc5pVl66wbQHXDVuUJGn8vBNYkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGrXofgDQN5vfcOekSpJccjwAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKj1g3TOclh4EfACeC5qlpIcg7wD8A8cBi4uqq+P1yZkqRRG8URwO9W1YVVtdC93gPcU1VbgHu615KkKTOOU0A7gP3d8n7gyjHsQ5I0pGEDoIB/TXJ/kt1d28aqOtYtfxvYuFLHJLuTLCZZXFpaGrIMSdJaDXUNAPidqjqa5DXA3Un+q3dlVVWSWqljVe0D9gEsLCysuI0kaXyGOgKoqqPd83Hg88BFwDNJzgXono8PW6QkafQGDoAkP5/kVSeXgbcADwMHgF3dZruAO4YtUpI0esOcAtoIfD7Jyff5VFX9c5KvArcnuRZ4Crh6+DIlSaM2cABU1RPAb6zQ/l3g0mGKkiSN37AXgdWY+T13TroESSPiV0FIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQo7wOQ1LxJ3t9yeO8VE9u3RwCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRnkj2AzyP2WRNAoeAUhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqO8E3gI3pEraZZ5BCBJjTIAJKlRYzsFlGQb8DfAWcBHq2rvOPbjaRhJGsxYjgCSnAV8BLgc2Apck2TrOPYlSRrMuE4BXQQcqqonqup/gduAHWPalyRpAOM6BbQJeLrn9RHgt3o3SLIb2N29/HGSxwbc1wbgOwP2nVaOaTY4ptkxtePKXw7cdQPw2mH2PbGPgVbVPmDfsO+TZLGqFkZQ0tRwTLPBMc2Ol+K4ujHND/Me4zoFdBTY3PP6vK5NkjQlxhUAXwW2JDk/ycuBncCBMe1LkjSAsZwCqqrnkrwL+BeWPwZ6a1UdHMe+GMFppCnkmGaDY5odL8VxDX8KvapGUYgkacZ4J7AkNcoAkKRGTXUAJNmW5LEkh5LsWWH925MsJXmge/xxz7pdSR7vHrte3MpPb8gxnehpn5qL6quNqdvm6iSPJDmY5FM97TM5T902pxvTTM5Tkpt66v5mkv/pWTeT87TKmGZ1nn4lyb1Jvp7kwSTbe9a9r+v3WJLLVt1ZVU3lg+WLx98CfhV4OfANYOsp27wd+NsV+p4DPNE9r++W18/ymLp1P570GAYc0xbg6yfnAHjNS2CeVhzTLM/TKdv/Gcsf3pjpeTrdmGZ5nli++Pun3fJW4HDP8jeAs4Hzu/c560z7m+YjgGG+TuIy4O6q+l5VfR+4G9g2pjrX4qX4FRn9jOlPgI90c0FVHe/aZ3meTjemabXWf3vXAJ/ulmd5nnr1jmla9TOmAn6hW/5F4L+75R3AbVX1bFU9CRzq3u+0pjkAVvo6iU0rbPf73WHQZ5KcvPms374vtmHGBPCKJItJ7kty5Vgr7V8/Y3o98PokX+pq37aGvpMwzJhgducJgCSvZfkvyH9fa98X2TBjgtmdp+uBP0xyBLiL5SObfvs+zzQHQD/+EZivql9n+a+S/ROuZxTONKbX1vLt7H8A/HWS102iwAGsY/mUySUs/xV2S5JXT7Si4Z1pTLM6TyftBD5TVScmXcgIrTSmWZ2na4CPV9V5wHbg75MM9Lt8mgNg1a+TqKrvVtWz3cuPAr/Zb98JGWZMVNXR7vkJ4D+AN46z2D7187M+Ahyoqv/rDk2/yfIvz5mdJ04/plmep5N28vxTJbM8TyedOqZZnqdrgdsBquo/gVew/MVwa5+nSV/0OMPFkHUsX2w6n59dDLnglG3O7Vm+CrivWz4HeJLlC1bru+VzZnxM64Gzu+UNwOOc4YLXlI1pG7C/p/angV+a8Xk63Zhmdp667d4AHKa7SbRrm9l5OsOYZnaegH8C3t4t/xrL1wACXMDzLwI/wSoXgSc62D5+GNtZ/svqW8AHurY/B36vW/4L4GA36HuBN/T0/SOWL4IcAt4x6bEMOybgt4GHuvaHgGsnPZY1jCnAjcAjXe07XwLztOKYZnmeutfXA3tX6DuT83S6Mc3yPLH8aZ8vdbU/ALylp+8Hun6PAZevti+/CkKSGjXN1wAkSWNkAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRG/T8tcGIs+lLfsgAAAABJRU5ErkJggg==\n","text/plain":["<Figure size 432x288 with 1 Axes>"]},"metadata":{"tags":[],"needs_background":"light"}},{"output_type":"stream","text":["0.6838055555555557\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":282},"id":"chKlN4gWa6D6","executionInfo":{"status":"ok","timestamp":1607101271503,"user_tz":-120,"elapsed":19821,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"bdd40a37-46ad-495b-aa34-fbc9672b01f7"},"source":["# Test size 0.5\n","accuracy = np.zeros(1000)\n","for i in range(1000):\n","  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, stratify = y)\n","\n","  neigh = KNeighborsClassifier(n_neighbors=3)\n","  neigh.fit(X_train, np.ravel(y_train))\n","\n","  accuracy[i] = neigh.score(X_test, np.ravel(y_test))\n","\n","plt.hist(accuracy)\n","plt.show()\n","\n","print(np.mean(accuracy))"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAOlUlEQVR4nO3dbYxc1X3H8e+vkBCpSRtTOxY1TpZERi20KqQrQptWokINBNSaqBIyUhOXojqqoEqkvKhJXgRVQnKlAlLUlMoRKKZKoCgPxRL0gRLaKFHzsBAC2JRgwAi7Dt6EJBClosX598VcN4NZe2d2ZnZ2T74faTRnzr137vnvtX6+e+7M3VQVkqS2/My0ByBJGj/DXZIaZLhLUoMMd0lqkOEuSQ06edoDAFi7dm3NzMxMexiStKo88MAD36mqdQstWxHhPjMzw9zc3LSHIUmrSpJnjrfMaRlJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWrQiviGqrSYme13T23f+3dcOrV9S0vlmbskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSgRcM9ycYk9yfZm2RPkg90/dclOZjkoe5xSd821ybZl+TxJBdNsgBJ0qsNcuOwl4EPVdWDSd4APJDk3m7ZTVX1V/0rJzkL2AKcDfwi8K9JzqyqI+McuCTp+BY9c6+qQ1X1YNd+EXgM2HCCTTYDd1TVS1X1NLAPOG8cg5UkDWaoOfckM8C5wFe7rmuSPJzk1iRrur4NwLN9mx1ggf8MkmxLMpdkbn5+fuiBS5KOb+BwT/J64LPAB6vqBeBm4G3AOcAh4IZhdlxVO6tqtqpm161bN8ymkqRFDBTuSV5DL9g/VVWfA6iq56rqSFX9GPgEP5l6OQhs7Nv89K5PkrRMBvm0TIBbgMeq6sa+/tP6VnsP8GjX3g1sSXJKkjOATcDXxjdkSdJiBvm0zDuB9wKPJHmo6/swcEWSc4AC9gPvB6iqPUnuBPbS+6TN1X5SRpKW16LhXlVfArLAontOsM31wPUjjEuSNAK/oSpJDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDBvljHdJPtZntd09lv/t3XDqV/aoNnrlLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1aNFwT7Ixyf1J9ibZk+QDXf+pSe5N8kT3vKbrT5KPJdmX5OEkb590EZKkVxrkzP1l4ENVdRZwPnB1krOA7cB9VbUJuK97DfBuYFP32AbcPPZRS5JOaNFwr6pDVfVg134ReAzYAGwGdnWr7QIu69qbgduq5yvAG5OcNvaRS5KOa6g59yQzwLnAV4H1VXWoW/RtYH3X3gA827fZga7v2PfalmQuydz8/PyQw5YkncjAf4kpyeuBzwIfrKoXkvz/sqqqJDXMjqtqJ7ATYHZ2dqhtNT3T+qtEkoYz0Jl7ktfQC/ZPVdXnuu7njk63dM+Hu/6DwMa+zU/v+iRJy2SQT8sEuAV4rKpu7Fu0G9jatbcCd/X1v6/71Mz5wA/6pm8kSctgkGmZdwLvBR5J8lDX92FgB3BnkquAZ4DLu2X3AJcA+4AfAVeOdcSSpEUtGu5V9SUgx1l84QLrF3D1iOOSJI3Ab6hKUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBi0a7kluTXI4yaN9fdclOZjkoe5xSd+ya5PsS/J4kosmNXBJ0vENcub+SeDiBfpvqqpzusc9AEnOArYAZ3fb/E2Sk8Y1WEnSYBYN96r6IvD8gO+3Gbijql6qqqeBfcB5I4xPkrQEo8y5X5Pk4W7aZk3XtwF4tm+dA13fqyTZlmQuydz8/PwIw5AkHWup4X4z8DbgHOAQcMOwb1BVO6tqtqpm161bt8RhSJIWsqRwr6rnqupIVf0Y+AQ/mXo5CGzsW/X0rk+StIyWFO5JTut7+R7g6CdpdgNbkpyS5AxgE/C10YYoSRrWyYutkOR24AJgbZIDwEeBC5KcAxSwH3g/QFXtSXInsBd4Gbi6qo5MZuiSpONZNNyr6ooFum85wfrXA9ePMihJ0mj8hqokNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYt+mf2JE3HzPa7p7Lf/Tsuncp+NV6euUtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lq0KLhnuTWJIeTPNrXd2qSe5M80T2v6fqT5GNJ9iV5OMnbJzl4SdLCBrm3zCeBvwZu6+vbDtxXVTuSbO9e/znwbmBT93gHcHP3rDGa1j1HJK0ei565V9UXgeeP6d4M7Orau4DL+vpvq56vAG9Mctq4BitJGsxS59zXV9Whrv1tYH3X3gA827fega7vVZJsSzKXZG5+fn6Jw5AkLWTkC6pVVUAtYbudVTVbVbPr1q0bdRiSpD5LDffnjk63dM+Hu/6DwMa+9U7v+iRJy2ip4b4b2Nq1twJ39fW/r/vUzPnAD/qmbyRJy2TRT8skuR24AFib5ADwUWAHcGeSq4BngMu71e8BLgH2AT8CrpzAmCVJi1g03KvqiuMsunCBdQu4etRBSZJG4zdUJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIadPIoGyfZD7wIHAFerqrZJKcCfw/MAPuBy6vqe6MNU5I0jHGcuf9OVZ1TVbPd6+3AfVW1Cbivey1JWkaTmJbZDOzq2ruAyyawD0nSCYwa7gX8S5IHkmzr+tZX1aGu/W1g/UIbJtmWZC7J3Pz8/IjDkCT1G2nOHfitqjqY5E3AvUn+s39hVVWSWmjDqtoJ7ASYnZ1dcB1J0tKMdOZeVQe758PA54HzgOeSnAbQPR8edZCSpOEsOdyT/GySNxxtA+8CHgV2A1u71bYCd406SEnScEaZllkPfD7J0ff5dFX9U5KvA3cmuQp4Brh89GFKkoax5HCvqqeAX1ug/7vAhaMMSpI0mlEvqP5Um9l+97SHIEkL8vYDktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDfKukJJeYZp3O92/49Kp7bs1nrlLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGrTqv8Q0zS9cSNJK5Zm7JDXIcJekBhnuktQgw12SGmS4S1KDVv2nZSS1Y1qffmvxVsMTO3NPcnGSx5PsS7J9UvuRJL3aRM7ck5wEfBz4XeAA8PUku6tq7yT2J0mjaPEPlEzqzP08YF9VPVVV/wPcAWye0L4kSceY1Jz7BuDZvtcHgHf0r5BkG7Cte/nDJI+Pad9rge+M6b1WihZrAutaTVqsCVZAXfnLkTZ/y/EWTO2CalXtBHaO+32TzFXV7Ljfd5parAmsazVpsSZoty6Y3LTMQWBj3+vTuz5J0jKYVLh/HdiU5IwkrwW2ALsntC9J0jEmMi1TVS8nuQb4Z+Ak4Naq2jOJfS1g7FM9K0CLNYF1rSYt1gTt1kWqatpjkCSNmbcfkKQGGe6S1KBVE+6D3M4gyeVJ9ibZk+TTff1bkzzRPbYu36gXN2JdR5I81D1W1AXrxepKclPf2L+V5Pt9y1bk8RqxptV8rN6c5P4k30jycJJL+pZd2233eJKLlnfkJ7bUupLMJPnvvuP1t8s/+jGoqhX/oHdR9kngrcBrgW8CZx2zzibgG8Ca7vWbuudTgae65zVde820axq1rq79w2nXsNS6jln/z+hddF+xx2uUmlb7saJ30fFPu/ZZwP6+9jeBU4Azuvc5ado1jaGuGeDRadcw6mO1nLkPcjuDPwE+XlXfA6iqw13/RcC9VfV8t+xe4OJlGvdiRqlrJRv29hNXALd37ZV6vEapaSUbpK4Cfq5r/zzwX117M3BHVb1UVU8D+7r3WwlGqasJqyXcF7qdwYZj1jkTODPJl5N8JcnFQ2w7LaPUBfC6JHNd/2WTHuwQBv6ZJ3kLvbO+Lwy77TIbpSZY3cfqOuAPkxwA7qH3W8mg207LKHUBnNFN1/x7kt+e6EgnpKX7uZ9MbwrjAnrfiP1ikl+d6ojGY8G6qur7wFuq6mCStwJfSPJIVT05xbEuxRbgM1V1ZNoDGaOFalrNx+oK4JNVdUOS3wD+LsmvTHtQY3C8ug4Bb66q7yb5deAfkpxdVS9MdbRDWi1n7oPczuAAsLuq/rf7FfFb9EJxJd8KYZS6qKqD3fNTwL8B5056wAMa5me+hVdOX6zU4zVKTav9WF0F3AlQVf8BvI7eDbdW6rGCEerqppm+2/U/QG/u/syJj3jcpj3pP8iD3tnrU/R+1T16ceTsY9a5GNjVtdfS+5XsF+hdmHua3sW5NV371GnXNIa61gCn9PU/wQku8K20urr1fgnYT/dluq5vRR6vEWta1ccK+Efgj7r2L9Obmw5wNq+8oPoUK+eC6ih1rTtaB70LsgdXwr/BoX8G0x7AEAfrEnpnrU8CH+n6/gL4/a4d4EZgL/AIsKVv2z+md7FnH3DltGsZR13Ab3avv9k9XzXtWoapq3t9HbBjgW1X5PFaak2r/VjR+yTJl7vxPwS8q2/bj3TbPQ68e9q1jKMu4A+APV3fg8DvTbuWpTy8/YAkNWi1zLlLkoZguEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QG/R+kqFcRQSMOnAAAAABJRU5ErkJggg==\n","text/plain":["<Figure size 432x288 with 1 Axes>"]},"metadata":{"tags":[],"needs_background":"light"}},{"output_type":"stream","text":["0.7231641791044776\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":282},"id":"dlOsrMEVbA6_","executionInfo":{"status":"ok","timestamp":1607101277715,"user_tz":-120,"elapsed":26025,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"8862b047-6b56-452a-fece-4e0f918bcc08"},"source":["# Test size 0.2\n","accuracy = np.zeros(1000)\n","for i in range(1000):\n","  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify = y)\n","\n","  neigh = KNeighborsClassifier(n_neighbors=3)\n","  neigh.fit(X_train, np.ravel(y_train))\n","\n","  accuracy[i] = neigh.score(X_test, np.ravel(y_test))\n","\n","plt.hist(accuracy)\n","plt.show()\n","\n","print(np.mean(accuracy))"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAQZElEQVR4nO3dcayddX3H8fdngGimG2CvTdcWy1yNw2UWd4csLpFBnAiJRbeRkkyrYdYtODVxZsX9oS4jwWRKYuZI6mBUo2CHOjphcwxZjIuAFymVFtEKZbRWekVAiRkb9bs/7kM8bW97zr3n3nPKz/crOTnP83t+z3m+99dzP/fp7z7nuakqJElt+YVxFyBJWniGuyQ1yHCXpAYZ7pLUIMNdkhp0/LgLAFiyZEmtWrVq3GVI0rPKXXfd9YOqmpht2zER7qtWrWJqamrcZUjSs0qSh460zWkZSWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoP6hnuS5ya5M8k9SXYk+VDXfm2SB5Ns6x5ruvYk+ViSXUm2J3nlYn8RkqSDDfIJ1aeAc6rqySQnAF9N8q/dtvdV1Q2H9H89sLp7vAq4qnuWnpVWbbxpLMfdfcUFYzmu2tD3zL1mPNmtntA9jvbnm9YCn+z2ux04Kcmy4UuVJA1qoDn3JMcl2QbsB26pqju6TZd3Uy9XJjmxa1sOPNyz+56uTZI0IgOFe1UdqKo1wArgzCS/AVwGvAz4beAU4C/ncuAkG5JMJZmanp6eY9mSpKOZ09UyVfU4cBtwXlXt66ZengL+ETiz67YXWNmz24qu7dDX2lRVk1U1OTEx6x0rJUnzNMjVMhNJTuqWnwe8FvjWM/PoSQJcCNzb7bIVeEt31cxZwBNVtW9RqpckzWqQq2WWAZuTHMfMD4MtVfXFJF9OMgEE2Ab8adf/ZuB8YBfwE+BtC1+2JOlo+oZ7VW0Hzpil/Zwj9C/g0uFLkyTNl59QlaQGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWpQ33BP8twkdya5J8mOJB/q2k9LckeSXUk+m+Q5XfuJ3fqubvuqxf0SJEmHGuTM/SngnKp6BbAGOC/JWcCHgSur6teAx4BLuv6XAI917Vd2/SRJI9Q33GvGk93qCd2jgHOAG7r2zcCF3fLabp1u+7lJsmAVS5L6GmjOPclxSbYB+4FbgO8Cj1fV012XPcDybnk58DBAt/0J4IWzvOaGJFNJpqanp4f7KiRJBxko3KvqQFWtAVYAZwIvG/bAVbWpqiaranJiYmLYl5Mk9ZjT1TJV9ThwG/A7wElJju82rQD2dst7gZUA3fZfBh5dkGolSQMZ5GqZiSQndcvPA14L3MdMyP9h1209cGO3vLVbp9v+5aqqhSxaknR0x/fvwjJgc5LjmPlhsKWqvphkJ3B9kr8B7gau7vpfDXwqyS7gh8C6RahbknQUfcO9qrYDZ8zS/gAz8++Htv8P8EcLUp0kaV78hKokNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhrUN9yTrExyW5KdSXYkeXfX/sEke5Ns6x7n9+xzWZJdSe5P8rrF/AIkSYc7foA+TwPvrapvJHkBcFeSW7ptV1bV3/Z2TnI6sA54OfArwH8keWlVHVjIwiVJR9b3zL2q9lXVN7rlHwP3AcuPssta4PqqeqqqHgR2AWcuRLGSpMHMac49ySrgDOCOrumdSbYnuSbJyV3bcuDhnt32MMsPgyQbkkwlmZqenp5z4ZKkIxs43JM8H/gc8J6q+hFwFfASYA2wD/jIXA5cVZuqarKqJicmJuayqySpj4HCPckJzAT7p6vq8wBV9UhVHaiqnwKf4GdTL3uBlT27r+jaJEkjMsjVMgGuBu6rqo/2tC/r6fZG4N5ueSuwLsmJSU4DVgN3LlzJkqR+Brla5tXAm4FvJtnWtb0fuDjJGqCA3cA7AKpqR5ItwE5mrrS51CtlJGm0+oZ7VX0VyCybbj7KPpcDlw9RlyRpCH5CVZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkho0yJ/ZkzQGqzbeNJbj7r7igrEcVwvLM3dJapDhLkkN6hvuSVYmuS3JziQ7kry7az8lyS1JvtM9n9y1J8nHkuxKsj3JKxf7i5AkHWyQM/engfdW1enAWcClSU4HNgK3VtVq4NZuHeD1wOrusQG4asGrliQdVd9wr6p9VfWNbvnHwH3AcmAtsLnrthm4sFteC3yyZtwOnJRk2YJXLkk6ojnNuSdZBZwB3AEsrap93abvA0u75eXAwz277enaDn2tDUmmkkxNT0/PsWxJ0tEMHO5Jng98DnhPVf2od1tVFVBzOXBVbaqqyaqanJiYmMuukqQ+Bgr3JCcwE+yfrqrPd82PPDPd0j3v79r3Ait7dl/RtUmSRmSQq2UCXA3cV1Uf7dm0FVjfLa8Hbuxpf0t31cxZwBM90zeSpBEY5BOqrwbeDHwzybau7f3AFcCWJJcADwEXddtuBs4HdgE/Ad62oBVLkvrqG+5V9VUgR9h87iz9C7h0yLokSUPwE6qS1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWpQ33BPck2S/Unu7Wn7YJK9SbZ1j/N7tl2WZFeS+5O8brEKlyQd2SBn7tcC583SfmVVrekeNwMkOR1YB7y82+fvkxy3UMVKkgbTN9yr6ivADwd8vbXA9VX1VFU9COwCzhyiPknSPAwz5/7OJNu7aZuTu7blwMM9ffZ0bYdJsiHJVJKp6enpIcqQJB1qvuF+FfASYA2wD/jIXF+gqjZV1WRVTU5MTMyzDEnSbOYV7lX1SFUdqKqfAp/gZ1Mve4GVPV1XdG2SpBGaV7gnWdaz+kbgmStptgLrkpyY5DRgNXDncCVKkubq+H4dklwHnA0sSbIH+ABwdpI1QAG7gXcAVNWOJFuAncDTwKVVdWBxSpckHUnfcK+qi2dpvvoo/S8HLh+mKEnScPyEqiQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBfT/EJB0LVm28adwlSM8qnrlLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBnkppKSDjPOy091XXDC2Y7fGM3dJapDhLkkNMtwlqUGGuyQ1qG+4J7kmyf4k9/a0nZLkliTf6Z5P7tqT5GNJdiXZnuSVi1m8JGl2g5y5Xwucd0jbRuDWqloN3NqtA7weWN09NgBXLUyZkqS56BvuVfUV4IeHNK8FNnfLm4ELe9o/WTNuB05KsmyhipUkDWa+c+5Lq2pft/x9YGm3vBx4uKffnq7tMEk2JJlKMjU9PT3PMiRJsxn6F6pVVUDNY79NVTVZVZMTExPDliFJ6jHfcH/kmemW7nl/174XWNnTb0XXJkkaofmG+1Zgfbe8Hrixp/0t3VUzZwFP9EzfSJJGpO+9ZZJcB5wNLEmyB/gAcAWwJcklwEPARV33m4HzgV3AT4C3LULNkqQ++oZ7VV18hE3nztK3gEuHLUqSNBw/oSpJDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KD+t7PXZJGZdXGm8Zy3N1XXDCW4y4mz9wlqUGeuWtOxnVmJWluPHOXpAYZ7pLUoKGmZZLsBn4MHACerqrJJKcAnwVWAbuBi6rqseHKlCTNxUKcuf9eVa2pqslufSNwa1WtBm7t1iVJI7QY0zJrgc3d8mbgwkU4hiTpKIYN9wL+PcldSTZ0bUural+3/H1g6ZDHkCTN0bCXQv5uVe1N8iLgliTf6t1YVZWkZtux+2GwAeDUU08dsgxJUq+hztyram/3vB/4AnAm8EiSZQDd8/4j7LupqiaranJiYmKYMiRJh5h3uCf5xSQveGYZ+H3gXmArsL7rth64cdgiJUlzM8y0zFLgC0meeZ3PVNW/Jfk6sCXJJcBDwEXDlylJmot5h3tVPQC8Ypb2R4FzhylKkjQcP6EqSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGuRfYnoW8q8hSerHM3dJapDhLkkNMtwlqUHOuUv6uTfO32PtvuKCRXldz9wlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDfJDTEPwBl6SjlWeuUtSgxYt3JOcl+T+JLuSbFys40iSDrco0zJJjgM+DrwW2AN8PcnWqtq50MdyakSSDrdYZ+5nAruq6oGq+l/gemDtIh1LknSIxfqF6nLg4Z71PcCrejsk2QBs6FafTHL/ItUymyXAD0Z4vGcDx+RgjsfhHJODLch45MND7f7iI20Y29UyVbUJ2DSOYyeZqqrJcRz7WOWYHMzxOJxjcrBjfTwWa1pmL7CyZ31F1yZJGoHFCvevA6uTnJbkOcA6YOsiHUuSdIhFmZapqqeTvBP4EnAccE1V7ViMY83TWKaDjnGOycEcj8M5Jgc7pscjVTXuGiRJC8xPqEpSgwx3SWpQ0+He7xYISd6aZDrJtu7xJ+Ooc1QGuSVEkouS7EyyI8lnRl3jqA3wHrmy5/3x7SSPj6POURlgPE5NcluSu5NsT3L+OOocpQHG5MVJbu3G4z+TrBhHnYepqiYfzPwi97vArwLPAe4BTj+kz1uBvxt3rcfQeKwG7gZO7tZfNO66xz0mh/T/c2YuDhh77WN8j2wC/qxbPh3YPe66j4Ex+Sdgfbd8DvCpcdddVU2fuXsLhIMNMh5vBz5eVY8BVNX+Edc4anN9j1wMXDeSysZjkPEo4Je65V8GvjfC+sZhkDE5Hfhyt3zbLNvHouVwn+0WCMtn6fcH3X+nbkiycpbtrRhkPF4KvDTJfyW5Pcl5I6tuPAZ9j5DkxcBp/OybuEWDjMcHgT9Osge4mZn/zbRskDG5B3hTt/xG4AVJXjiC2o6q5XAfxL8Aq6rqN4FbgM1jrmfcjmdmauZsZs5SP5HkpLFWdOxYB9xQVQfGXciYXQxcW1UrgPOBTyX5ec+RvwBek+Ru4DXMfBp/7O+Tlv9R+t4CoaoeraqnutV/AH5rRLWNwyC3hNgDbK2q/6uqB4FvMxP2rZrLbTLW0faUDAw2HpcAWwCq6mvAc5m5gVarBsmR71XVm6rqDOCvurax/+K95XDvewuEJMt6Vt8A3DfC+kZtkFtC/DMzZ+0kWcLMNM0DoyxyxAa6TUaSlwEnA18bcX2jNsh4/DdwLkCSX2cm3KdHWuVoDZIjS3r+93IZcM2Ia5xVs+FeVU8Dz9wC4T5gS1XtSPLXSd7QdXtXd8nfPcC7mLl6pkkDjseXgEeT7GTmF0Pvq6pHx1Px4htwTGDmG/r66i6HaNWA4/Fe4O3d98x1wFtbHpcBx+Rs4P4k3waWApePpdhDePsBSWpQs2fukvTzzHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDfp/HpRrKaUmfoQAAAAASUVORK5CYII=\n","text/plain":["<Figure size 432x288 with 1 Axes>"]},"metadata":{"tags":[],"needs_background":"light"}},{"output_type":"stream","text":["0.7384074074074075\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"pDlKu0JrXEAt"},"source":["**Discuss your results 1p**"]},{"cell_type":"markdown","metadata":{"id":"UFim2sjfXF_H"},"source":["It mostly performed with the accuracy around 70%.\n","When trained with more data it performed a little better on average.\n"]},{"cell_type":"markdown","metadata":{"id":"kDyquaYOl0Sa"},"source":["## Classification accuracy using leave-one-out cross validation"]},{"cell_type":"markdown","metadata":{"id":"_88lR86Kl0Sa"},"source":["Again, predict the **ship type** using **speed, destination, transformed length, and breadth** of the ship as features. Find an estimation for the classification accuracy using *leave-one-out cross validation (LOO CV)*. <br>\n","\n"," - Use leave-one-out cross validation to estimate the model performance **1p**\n","     - Use kNN classifier with k=3\n","     - What is the classification accuracy? Compare the result with the one you got in the previous task\n"," - Which method gives better evaluation of the performance of the classifier with this data set? Explain your choice **1p**"]},{"cell_type":"markdown","metadata":{"id":"C9WP3MZOx8fe"},"source":["**Use kNN classifier with k=3**"]},{"cell_type":"code","metadata":{"id":"Qs_hya_7c8w0"},"source":["from sklearn.model_selection import LeaveOneOut\n","\n","loo = LeaveOneOut()\n","\n","neigh = KNeighborsClassifier(n_neighbors=3)\n","\n","accuracy = np.zeros(loo.get_n_splits(X))\n","\n","for train_index, test_index in loo.split(X):\n","  X_train, X_test = X.iloc[train_index,:], X.iloc[test_index,:]\n","  y_train, y_test = y.iloc[train_index], y.iloc[test_index]\n","\n","  neigh.fit(X_train, np.ravel(y_train))\n","  \n","  accuracy[test_index] = neigh.score(X_test, np.ravel(y_test))"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"c3HONsWQyBHz"},"source":["**What is the classification accuracy? Compare the result with the one you got in the previous task**"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"xEqI6C22yGV9","executionInfo":{"status":"ok","timestamp":1607102779029,"user_tz":-120,"elapsed":1161,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"d3c277da-2898-42e2-ec24-ebfc4c4241b4"},"source":["print(np.mean(accuracy))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["0.753731343283582\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"hh64ObxKioi-"},"source":["The accuracy is better with this cross-validation."]},{"cell_type":"markdown","metadata":{"id":"OEN34i4wyWGD"},"source":["**Which method gives better evaluation of the performance of the classifier with this data set? Explain your choice**"]},{"cell_type":"markdown","metadata":{"id":"CJLnbpmKym8t"},"source":["This gives better evaluation because we have more training data, and every datapoint gets to be part of the test."]},{"cell_type":"markdown","metadata":{"id":"BYt8MfHHl0Sa"},"source":["## Model selection with leave-one-out cross validation"]},{"cell_type":"markdown","metadata":{"id":"lQ5coDj8l0Sa"},"source":["- Select the best model (kNN with selection of k) using leave-one-out cross validation **2p**\n","    - Repeat the model performance estimation with values k=1...30\n","    - Which value of k produces the best classification accuracy?\n","    - If the number of k is still increased, what is the limit that the classification accuracy approaches? Why?\n","- Can you say something about the performance of this *selected* model with new, unseen data? Explain, how you could you estimate the performance of this selected model. **1p**"]},{"cell_type":"markdown","metadata":{"id":"u9GSR9Pbywet"},"source":["**Repeat the model performance estimation with values k=1...30**"]},{"cell_type":"code","metadata":{"id":"cNNzbRXIj5XC"},"source":["loo = LeaveOneOut()\n","\n","accuracy = np.zeros(loo.get_n_splits(X))\n","\n","means = np.zeros(30)\n","\n","for i in range(30):\n","\n","  neigh = KNeighborsClassifier(n_neighbors=i+1)\n","\n","  for train_index, test_index in loo.split(X):\n","    X_train, X_test = X.iloc[train_index,:], X.iloc[test_index,:]\n","    y_train, y_test = y.iloc[train_index], y.iloc[test_index]\n","\n","    neigh.fit(X_train, np.ravel(y_train))\n","    \n","    accuracy[test_index] = neigh.score(X_test, np.ravel(y_test))\n","\n","  means[i]=np.mean(accuracy)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":282},"id":"2qtS_ThqmnrA","executionInfo":{"status":"ok","timestamp":1607101292900,"user_tz":-120,"elapsed":41189,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"175f2223-8bbc-42ed-a94f-8bb2d20f7d14"},"source":["plt.plot(np.array(range(30))+1, means)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[<matplotlib.lines.Line2D at 0x7feab4bcc2e8>]"]},"metadata":{"tags":[]},"execution_count":32},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXSc1Zng/+9TpX1XWZJtSZaqJBmM2WwsySwG7CSkSWcIZJnE0KRDkoaATaZ/5PT5NWeWNENPn19Pd8+k+9ex2bLQWcBDCElIxw2kg7wAxpKMbbxho9WWbCPZpX2vqjt/qEqUZS0lqXY9n3N0UL31Lvel5EdX9733ecQYg1JKqfhliXQDlFJKhZYGeqWUinMa6JVSKs5poFdKqTingV4ppeJcQqQbMFleXp6x2+2RboZSSsWUAwcOXDDG5E/1XtQFervdTn19faSboZRSMUVEWqd7T4dulFIqzgUU6EXkThE5KSINIvL4FO9/T0QOeb9OiUi3d/smv+2HRGRYRO4J9k0opZSa3qxDNyJiBbYBdwBtQJ2IvGqMOe7bxxjzmN/+3wbWerfXAGu8221AA/BGMG9AKaXUzALp0VcDDcaYJmPMKLADuHuG/e8FXpxi+5eAfzPGDM69mUoppeYrkEBfBJzxe93m3XYZESkFHMCbU7y9mal/ASAiD4lIvYjUd3Z2BtAkpZRSgQr2w9jNwMvGGLf/RhFZDlwLvD7VQcaYZ40xlcaYyvz8KWcHKaWUmqdAAn07sMLvdbF321Sm67V/GfiVMWZsbs1TSim1UIEE+jpgpYg4RCSJ8WD+6uSdRGQVkAvsm+Ic043bx42mzn52n9JhJ6VU9Jk10BtjXMCjjA+7nABeMsYcE5EnReRzfrtuBnaYSQnuRcTO+F8Eu4PV6Gj0d6+d5NsvvBfpZiil1GUCWhlrjNkJ7Jy07buTXj8xzbEtTPPwNl4YY6htcdI77KJ3eIyslMRIN0kppSboytggaOzsxzkwCsDZ7qEIt0YppS6lgT4Iapu7Jr5v79JAr5SKLhrog6CuxUlqohWAdu3RK6WijAb6IKhtdrLxynySrBYN9EqpqKOBfoHOdg/R3j1EtcPG8pwUHbpRSkUdDfQLVNfiBKDKbqMoJ1Ufxiqloo4G+gXa3+wkMzmBq5ZnUZiTqkM3Sqmoo4F+geqanayz52K1CEU5qXT0jTDq8kS6WUopNUED/QJ0DYzyYUc/VXYbAEW5qRgD53uGI9wypZT6mAb6BfCNz1c7vIE+JxWAtm5Nua+Uih4a6BegrsVJUoKF64qzgY8D/dlu7dErpaKHBvoFqG3pYk1xDskJ44ullmWnALo6VikVXTTQz9PAiIuj7T0TwzYAKYlW8jOTdYqlUiqqaKCfp4Onu3F7DFV+gR7Gh290iqVSKppooJ+n2hYnFoEbSnIu2a6BXikVbTTQz1Nds5PVhVlkTso9X5Q7Hugn1V9RSqmI0UA/D6MuDwfPdE3Mn/dXmJ3CqMvDhf7RCLRMKaUup4F+Ho6e7WF4zEP1FIG+KDcN0AIkSqnooYF+HmqbvYnMHFMEeu9ceh2nV0pFCw3081DX7KQsP528jOTL3vt40ZQGeqVUdNBAP0cej6G+tWvKYRuArNQEMpITaNNFU0qpKBFQoBeRO0XkpIg0iMjjU7z/PRE55P06JSLdfu+ViMgbInJCRI6LiD14zQ+/Ux199AyNTfkgFkBEKMxJ0aEbpVTUSJhtBxGxAtuAO4A2oE5EXjXGHPftY4x5zG//bwNr/U7xE+BvjDG/F5EMIKZz+NY1X5rIbCpagEQpFU0C6dFXAw3GmCZjzCiwA7h7hv3vBV4EEJHVQIIx5vcAxph+Y0xMp3asbeliWVYKxbmp0+7jm0uvlFLRIJBAXwSc8Xvd5t12GREpBRzAm95NVwDdIvKKiBwUkb/3/oUw+biHRKReROo7OzvndgdhZIyhtvki1Q4bIjLtfoU5qXQPjjEw4gpj65RSamrBfhi7GXjZGOP2vk4AbgX+AqgCyoAHJh9kjHnWGFNpjKnMz88PcpOC54xziI96R6acVulPZ94opaJJIIG+HVjh97rYu20qm/EO23i1AYe8wz4u4NfADfNpaDSo9RUameZBrM/HBUg00CulIi+QQF8HrBQRh4gkMR7MX528k4isAnKBfZOOzRERXzf9E8DxycfGirpmJ9mpiawsyJhxv6Jc7dErpaLHrIHe2xN/FHgdOAG8ZIw5JiJPisjn/HbdDOwwftm8vEM4fwH8QUSOAAI8F8wbCKe6FidV9lwslunH5wEKMlNIsIgWIFFKRYVZp1cCGGN2AjsnbfvupNdPTHPs74Hr5tm+qNHZN0LThQG+UrVi1n2tFmFZdor26JVSUUFXxgZociHw2WheeqVUtNBAH6DaZiepiVauKcoOaP/xRVNaJFwpFXka6ANU1+JkbUkOidbA/pcV5aZyvncYlzumFwIrpeKABvoA9A2PceJc77T5baZSmJOK22M436u9eqVUZGmgD8CB1i48JvDxefBfNKWBXikVWRroA1DX4iTBIqydVAh8Jr659O3dMZ3aRykVBzTQB6C22ck1RdmkJQU0GxWAwmzt0SulooMG+lkMj7k5fKZnTsM2AKlJVpakJ2kBEqVUxGmgn8X7bT2Muj1zehDrU6h56ZVSUUAD/Sx8C6UqS3PnfKwumlJKRQMN9LOobXZyxdIMctOT5nxsUW4q7V1D+KX/UUqpsNNAPwO3x/Bea9e8hm1gfOhmaMxN9+BYkFumlFKB00A/g9eOnqdvxMVtV8yvGIpvLr0O3yilIkkD/TSMMWyraaAsP51PXbV0XufQQK+UigYa6Kex61Qnx8/18sjt5VhnyT8/nYlFUzrFUikVQRrop2CMYdubDRTlpHLP2inroAckNy2R1ESr9uiVUhGlgX4Ktc1O6lu7eOi2soCzVU5FRCjM0QIkSqnI0kA/hW27GsnLSAqomtRsinLTtEevlIooDfSTvN/WzZ5TnXxzQxkpidYFn69Ie/RKqQjTQD/J9ppGslISuP/GkqCcrygnlQv9owyPuYNyPqWUmquAAr2I3CkiJ0WkQUQen+L974nIIe/XKRHp9nvP7ffeq8FsfLB9+FEfrx07zwM328lMSQzKOQsn8tJrr14pFRmz5t0VESuwDbgDaAPqRORVY8xx3z7GmMf89v82sNbvFEPGmDXBa3LoPLWrkdREKw/c4gjaOf3n0pflZwTtvEopFahAevTVQIMxpskYMwrsAO6eYf97gReD0bhwOuMc5DeHz3Lf+hJs88hrMx2dS6+UirRAAn0RcMbvdZt322VEpBRwAG/6bU4RkXoReVdE7pnmuIe8+9R3dnYG2PTgemZPI1YRHry1LKjnXZqVgkV06EYpFTnBfhi7GXjZGOP/5LHUGFMJ3Af8o4iUTz7IGPOsMabSGFOZnz+/vDIL0dE7zEv1bXxxXTHLslOCeu5Eq4VlWSm0aaBXSkVIIIG+HfCfUF7s3TaVzUwatjHGtHv/2wTs4tLx+6jwg7eacbk9PHx7cHvzPlqARCkVSYEE+jpgpYg4RCSJ8WB+2ewZEVkF5AL7/Lblikiy9/s84Bbg+ORjI6l7cJSfvdvK564vpHRJekiuUZSrBUiUUpEza6A3xriAR4HXgRPAS8aYYyLypIh8zm/XzcAOc2mVjauAehE5DNQAf+s/Wyca/PjtFgZH3TyysSJk1yjKSeVc9zBujxYgUUqF36zTKwGMMTuBnZO2fXfS6yemOO4d4NoFtC+k+kdcPP9OC3esXsqVyzJDdp3CnFRcHkNn30jQnwEopdRsFvXK2Bf2t9IzNMbWTaHrzYPfFMvuwZBeRymlprJoA/3wmJvn9jazoSKPNStyQnqtjxdNDYf0OkopNZVFG+h/caCNzr4Rtmy6bLZn0E0Eel00pZSKgEUZ6MfcHp7Z3cgNJTncVLYk5NdLT04gJy1Rp1gqpSIioIex8ea3h8/S1jXEf//c1YjMr0zgXBVmz32K5f6mi7x8oC2gfROswpaNFaywpc2neZcZc3vYXtPI5uoVLM3SB8hKxbJFGej/8EEHhdkpfGJVQdiuWZSbyumLgT+MdXsMj79yhI96h8lJnT2TZkffCIOjbv5pc3DWo/3qYDvf+/dTiMB/+uTKoJxTKRUZizLQN3b0s2p5Vth68zA+Tv9u48WA99955BzNFwZ4+v4buPOa5bPu///tPMFze5v4zh1XLHjhl9tjeHpXIwB1Lc4FnUspFXmLboze7TE0XRigPD80q2CnU5STSt+Ii56hsVn3NcawraaBioIMPr16WUDn/+YGBwlWC0/vblxoU3nt6HmaLgxQuiSN91q7cLk9Cz6nUipyFl2gb+saZNTloaIgvLnhfXPpA3kg++YHHXxwvo8tG8uxWAL7q6MgK4UvVxbz8oE2zvfMfxqnMYbv1zRQlp/Od+64goFRN8fP9c77fEqpyFt0gb6hox8g7IG+MMAplr5AW5ybyl3XF87pGt+6rRyPgef2Ns27nbtOdnLiXC+P3F7Ojd4ZSbXNOnyjVCxbvIE+P3QpD6bim0t/tmfmQL+v6SIHT3fzrdvLSbTO7eNZYUvj7jWFvLD/NM6B0Tm30fdLpignlXvWFrE0K4USW5oGeqVi3KIL9I2d/eRlJJOdFpyasIFakp5EUoJl1h799ppG8jOT+Y/riud1nS0byxl2ufnx281zPnZ/s5MDrV08dFvZxC+ZaoeN+tYuLs1Vp5SKJYsu0Dd09If9QSyAxSIU5aTOWIDk0Jlu3mq4wIO3OkhJtM7rOhUFmdx59TKef6eFvuHZH/z621bTQF5GEl+p+rj8QLXdhnNglMbO/nm1RykVeYsq0BtjaOjoD/v4vE9hTsqMD2O31TSQnZrIfetLF3SdLRsr6Bt28dN3WwM+5v22bvZ+eIFvbii75JdMlcMGQG1z14LapJSKnEUV6Dv7R+gddkUs0BflpE47dHPyfB+/P/4RX7/FTkbywpY3XFuczW1X5PPDvc0MjbpnP4DxIaOslATuv7Hkku32JWnkZSTrfHqlYtiiCvSNHQNA+Gfc+BTlpNHRN8KI6/Lgu31XA2lJVh642R6Ua23dWM7FgVFeqj8z674fftTHa8fO88DNdjJTLn12ISJUO3L1gaxSMWxRBfoG7zhzeX7khm6Ay+a5t14c4LeHz3L/jaXkpCUF5Vrry5ZQZc/lmd2NjLpmXvD01K5GUhOtPHCLY8r3q+w22ruHtByiUjFqUQX6xo5+0pOsLI9QlaeJAiSThm+e3t1EgtXCn22YOtDO15ZNFZztGebXh6ar5Q5nnIP85vBZ7ltfgi196l8yVfbxcfo67dUrFZMWVaBv6OinvCAjrDlu/H1cgOTjQH++Z5hfHmjjy5XFFAQ5S+TGK/K5ujCLp3c1Tluv9undjVhFePDWsmnPc9XyLDKTE6jVcXqlYtKiCvSNnf1URGjYBmB5dioilwb65/Y24TaGb90W/AIoIsLWTRU0XRjg346eu+z9jt5hflHfxhfXFc9Yy9ZqEdbZc7VHr1SMCijQi8idInJSRBpE5PEp3v+eiBzyfp0Ske5J72eJSJuIfD9YDZ+r/hEX53qGKY/Qg1iApAQLBZnJE1MsnQOjvLD/NHevKQxaHvnJ/ujqZZTlp7OtpvGyRU8/eKsZl8fDI7fP/kumym7jw47+ea24VUpF1qyBXkSswDbgM8Bq4F4RWe2/jzHmMWPMGmPMGuCfgVcmneavgT3BafL8NHZE9kGsT2HOxwVIfvx2M8MuN1s2hq6codUiPHJ7OSfO9bLrZOfE9q6BUX72biufu76QkiWz/5Kp9s6n12mWSsWeQHr01UCDMabJGDMK7ADunmH/e4EXfS9EZB2wFHhjIQ1dqEglM5usKCeVs93D9A2P8fw7LfzR6mVUFIQ27849a4soyknl+zUNE736599pYXDUzSMbKwI6x3XF2SQlWHT4RqkYFEigLwL8J2O3ebddRkRKAQfwpve1BfhfwF8srJkL19DZT4JFKA2g9xpKRd4e/U/2tdI37GLrpsAC7UIkWi186/YyDrR2sb/ZSf+Ii+ffaeHTq5dy5bLAfskkJ1hZU5yjPXqlYlCwH8ZuBl42xvhWBG0BdhpjZix8KiIPiUi9iNR3dnbOtOu8NXb0Y89Ln3NGyGAryk1l1OXh6V2N3HZFPtcWZ4flul+uXEFeRhLbahr4+but9AyNsWWOv2SqHTaOnu1lYMQVolYqpUIhkKjXDqzwe13s3TaVzfgN2wA3AY+KSAvwD8CfisjfTj7IGPOsMabSGFOZn58fUMPnqqEzMsnMJvNNsewbcbE1hGPzk6UkWvnmhjL2fniB77/ZwIaKPNasyJnTOaocNtwew8HT3bPvrJSKGoEE+jpgpYg4RCSJ8WD+6uSdRGQVkAvs820zxvyJMabEGGNnfPjmJ8aYy2bthNqoy0PrxcGIj8/DxwVIquy5rPcW9giX+28sISslYfyXzDyGjG4oycEizHs+/YjLzf987QPOzZKTX6lgeu3oeX4RQCqQeDZr9ixjjEtEHgVeB6zAj4wxx0TkSaDeGOML+puBHSYKE5e3XhzA7TFREejL8tO5/Yp8/tMnV4b92pkpifzlZ1Zx6HQ3N5bZ5nX86sIsapsDL3Lu7xf1bTy1q5El6Un82QwLtJQKlv4RF3/5y/cZGnVz68r8GdeLxLOA0iQaY3YCOydt++6k10/Mco7ngefn1Log8eVSD3dVqakkJ1j5l29UR+z6f7K+lD9ZQBrkKruNF/afZtTlISkh8OcdLrdnonC5bwaUUqH2wv7x51EWgR/sbeK//ofVsx8UhxbFylhfYCmLgjH6WFdttzHi8nCkvWdOx/32/bO0dQ2RnmTVQK/CYnjMzXN7m9lQkcc9a4r4+f7TdC3SBX+LJtAXZqeQvsA87woq7XNfOOXxGLbXNLJqWSZ3XV+o1apUWPziQBudfSNs2VTOIxvLGRqbX4nNeLA4An1nf0RTH8ST/MxkyvLT57Rw6o3jH/FhRz9bNlVQUZBB1+AYF/tHQthKtdiNuT08s7uRtSU53FS2hJVLM/mjq5fOq8RmPIj7QO/xGBo7BqLiQWy8qLbbqGtx4pkmI6Y/YwzbdzVgX5LGZ69dPvE56PCNCqXfHh4fKnx0U8VEttqtmyroHXbx8/2nI9y68Iv7QH+ud5ihMbcG+iCqstvoHXZx8qO+Wfd9q+EC77f18PDt5VgtMpFrqEGHb1SIeDyG7bvGhwo/sapgYvt1xTncujKPH+xtZngssBKb8SLuA/1EjpsIJzOLJ3NJcPb9NxtYlpXC528Yz5pRlJNKaqJ1oqyjUsH2xvHzNHiHCifXnti6qYIL/SMBldiMJ4sm0OsYffAU56ayLCtl1jqy9S1O9jc7eei2MpITrABYLEJZfrr26FVIGGPYVtM4MVQ42XqHjcrSXJ7Z3cSYe+YSm/FkUQT6nLRElkxTJk/NnYhQ5Rgfp59pfdz2XY3Y0pPYXL3iku0VBRkTaaOVCqY9H17gSHsPj2wcHyqczFeMp717iF8fnL7EZryJ+0DvqyoVqfKB8araYeOj3hHOOKdOZ3DsbA9vftDBN26xk5Z06bTWivwM2ruHNDmaCrptNQ0sz07h82uLp91n45X5rF6exVO7py+xGW/iP9B39OuD2BCo9s6nny7vzfZdjWQmJ/DVm+yXvecbRmvq1HF6FTx1LU5qvUOFM63aniix2TnA68fOh7GFkRPXgb5rYJSLA6MRryoVj1YWZJCdmjhl3pumzn52HjnHV28qJTs18bL3fb94deGUCqZtNQ3jQ4VVJbPue+c1yyjLS2ebXzGeeBbXgd73wE979MFnsQhV9lzqWroue++pXY0kJ1j4xgbHlMfal6RjtYjOpVdBc7S9h10nO/nmBgepSdZZ97dahIc3lnPsbC+7ToWmBkY0ie9AHyXlA+NVld1G84UBOvqGJ7a1dw/xq4PtbK4qIS8jecrjkhIslNrSNNCroHlqYqgw8IR9n/eW2Nxe0xDClkWHuA70jR39pCRaJop9qOCq8s6nr/fr1T+3pwmAh26bOQ1xWX6GTrFUQdHQ0c/Oo+f405tLyUq5fKhwOolWCw/dVkZdSxf7m+aXejtWxHWgb+jspywvA8sU06zUwl1TmE1KomViPn1n3wgv1p7mCzcUTRRYmU5FQQatFwcW1VxmFRpP7/YOFd4y9VDhTL5S5S2xuasxBC2LHvEd6Ds0mVkoJSVYuKEkdyLQ/+jtZsbcHh7ZOHv1qoqCDMbchtPOwVA3U8Wxtq5Bfn2wnXurS1gyzVDhTHwlNvec6uRI29xSb8eSuA30Q6Nu2ruHNPVBiFXZbZw430tb1yA/3dfKH1+7HEfe7Hn/NbmZCoZn9zQhAg8uoGLZ/TeWkJmSwLY4HquP20DfdKEfY/RBbKhVO2wYA9/5P4fpH3GxJYDePDBRqF0DvZqvjr5hdtSd4Qtri2cdKpxJZkoiD9xs57Vj5/kwgER9sShuA73OuAmPtSU5JFiE2hYnn1xVwOrCrICOy0xJZGlWss6lV/P2o7dacLk9PLyxfMHn+votDlITrTy1Oz7H6uO25FJjRz8WAXteWqSbEtfSkhK4uiibw2e62bIpsN68j+a8UZP95lA7//r+uYD2fbvhAp+9rjCgocLZ2NKTuG99Cc+/00LvkItAMqZsurKA+9bPvjgrGsRtoG/o7KfEljaRNVGFztdvtnPoTDfrSnPndFxFfga/fK8dY4zmIlL0DY/x3359lKQEK/mZsz9YXbk0kz//5MqgXf9bt5Xxfls37d1T52/y19E7zHutXdxbvSImfnYDCvQicifwT4AV+IEx5m8nvf89YJP3ZRpQYIzJEZFS4FeMDxElAv9sjHk6WI2fSYPmuAmbe9YWcc/aojkfV1GQQf+Ii/O9wyzP1rUOi93P95+md9jFq4+u57rinLBfvyArhV88fHNA++6oPc3jrxyhsTM2qtfNGuhFxApsA+4A2oA6EXnVGHPct48x5jG//b8NrPW+PAfcZIwZEZEM4Kj32LPBvInJXG4PLRcG2eRXXUZFn4lqUx39GugXueExNz/Y28ytK/MiEuTnqsqv+E4sBPpAHsZWAw3GmCZjzCiwA7h7hv3vBV4EMMaMGmN8VaCTA7zegp3pGmLU7dGplVFuIrmZjtMvei/Vn+FC/whb5/icJ1LK8tLJy0iibpbiO9EikMBbBPjX3WrzbruMd6jGAbzpt22FiLzvPcf/nKo3LyIPiUi9iNR3di48wZBWlYoN+ZnJZKYkaCqERW7M7eGZ3U2sK81lvbenHO1EhCq7bdo03dEm2D3szcDLxpiJyrvGmDPGmOuACuBrIrJ08kHGmGeNMZXGmMr8/PwFN0KnVsYGEaGiIEPn0i9yvz7YTnv3EI9OUeM1mlXZbbR1DXGuZ/aHt5EWSKBvB/xrwRV7t01lM95hm8m8PfmjwK1zaeB8NHb2U5CZPKcERyoyKvIzaNBC4YuW22N4ancjVy3PYuOVC+/khVO196+P2WonR4NAAn0dsFJEHCKSxHgwf3XyTiKyCsgF9vltKxaRVO/3ucAG4GQwGj4TnXETO8oLMrjQP0LP4Fikm6Ii4PVj52nqHGDrpvKY6s0DXLU8i4zkhPgI9MYYF/Ao8DpwAnjJGHNMRJ4Ukc/57boZ2GEuLddyFbBfRA4Du4F/MMYcCV7zp2yvlg+MIb4H5jpOv/gYY9hW00BZXjqfuWZ5pJszZ1aLcENpLnUxME4f0Dx6Y8xOYOekbd+d9PqJKY77PXDdAto3Zx19I/SNuLR8YIzwn3kz1wVXKrbtOtXJsbO9/N2XrsMao6nE1zts/P3rJ+kaGCU3PSnSzZlW3OW60QexsWWFLY0kq0V79IvQ9poGCrNTuGfN3BfbRYsqu7f4TuvlJTWjSdwF+katExtTrBbBkZeuc+kXmdpmJ3UtXXzr9nKSEmI3DF1XnE2S1RL1wzex+394Gg0d/WQmJ1AQQK4MFR0qCrSs4GLz/ZoG8jKS+ErVitl3jmIpiVauX5Ed9Q9k4zLQlxVkxNwT/MWsvCCDM85Bhsfcs++sYt6Rth72nOrkGxscpCTGftLBKruNo+09DI66It2UacVloNfUB7GloiADj4HmCzqffjHYvquBzJQE7r+xNNJNCYoqhw2Xx3DwdHekmzKtuAr0vcNjdPSN6Ph8jKnI17KCi0VDRx+vHTvPAzfb42ZB47rSXCwS3Qun4irQN+qMm5hUlp+OCFptahHYvquRlAQrX7/FEemmBE1WSiJXLc+K6geycRXodWplbEpJtFKcm6o9+jh3xjnIbw6d5d7qEmxRPOd8PqrsNg6e7mbM7Yl0U6YUX4G+s58kq4UVuZrbPNaM57zRQB/PntnTiEXgodvKIt2UoKt22Bgac3O0vSfSTZlSXAX6xo5+7HlpJFjj6rYWhYqCDJouDOD2mNl3VjGno3eYl+rb+NK6YpZlp0S6OUHnWzgVrcM3cRURY6Wsl7pceX4Goy4P7V3Rn/JVzd0P32rG5fbwrdvKI92UkMjPTMaRlx61D2Tjpjj4iMtN68UB7rou9pIjqY+fqzR09lGyJC2ibRkec/Pd3xxly8YK7HnpEW3LTPqGx3j8l0foHQ5u5s8kq4W/uuvqoH0O3YOj/OzdVu66vjCq/38uVLXdxmvHzuPxGCxRlrsnbnr0PYNjVJbauLooO9JNUfMwEeijYJx+X+NFXqpv4x/eCHlG7QX5yb5WfnfkHH3DLvpHgve1t+EC//iHU0Fr5/PvtDAw6uaRjfHZm/epctjoGRrjwyj4GZ4sbnr0BVkpvPTwTZFuhpqnnLQk8jKSoiLQ+8rD/e7IOb7T2U9ZFC7AGxx18cO3mtl0ZT4//np1UM/91/96nOffaeGxT13BCtvCevX9Iy5+/HYLn7pqKauWZQWphdGp2jtOX9vi5MplmRFuzaXipkevYl95fgaNnZFfHVvX7KQsP50kq4WndzdGujlT2lF7BufAaEiKaT94axkWGZ8ls1Av7G+lZ2iMrZviuzcPsMKWytKs5KgsGK6BXkWNcm/92Etr14TX8Jib99t6+NRVS7m3uoRX3huvZxpNRl0ent3TRLXDRqU9+MW0l81txy0AABXgSURBVGWn8KV1xbxU30ZH7/C8zzM85ua5vc3cUrGEtSXxX2tgomB4szOiP8NT0UCvokZFfgY9Q2Nc6B+NWBsOn+lm1O2hym7jQe987+f2NEWsPVN55b02zvcO82gIevM+D99ejsvt4QdvNc/7HC8faKOzb4StG0PXzmiz3mHjfO8wbVE2e0wDvYoa0fBA1jcPurI0l6KcVD6/togXa09zoX8kYm3y53J7eGp3I9cWZXPryryQXad0STp3XV/Iz95tpXtw7r94x9went7dyNqSHG4qXxKCFkanqigtGK6BXkWNj6dYRi7Q17Z0ccXSjImycI9sLGfU7eFHC+jZBtPOo+dpvTgYlmLaWzZWMDjq5vl3WuZ87G8Pn6Wta4itGysWVcrwKwoyyU5NjLqFUxroVdRYnp1CWpI1YtWmXG4PB1qcVDs+Hvcuy8/gj69dzk/3jT9UjCSPx7C9poGKggw+vXpZyK935bJM7li9lB+/3UL/SOC51j0ew/ZdjaxalsknVhWEsIXRx2IRKktzJ2ZuRYuAAr2I3CkiJ0WkQUQen+L974nIIe/XKRHp9m5fIyL7ROSYiLwvIl8J9g2o+CEi3pk3kQn0J871MTDqnljO7rNlYzl9Iy5+uq8lIu3yefODDj4438eWjeVhW5CzdVMFPUNjvLC/NeBj3jh+noaOfrZsqoi6hUPhUOWw0dQ5EDXDfRBAoBcRK7AN+AywGrhXRFb772OMecwYs8YYswb4Z+AV71uDwJ8aY64G7gT+UURygnkDKr5UFEQuuZmvF+bfowe4ujCbT6wq4Edvt0SsipAxhu/XNFCcm8pd1xeG7bprVuSwoSKP5/Y2B1QBzBjDtppG7EvS+Oy1i3OV+kTB8Cjq1QfSo68GGowxTcaYUWAHcPcM+98LvAhgjDlljPnQ+/1ZoAPIX1iTVTyrKMjgXM/wnIYKgqWu2UlxbirLsy/Pfrp1UznOgVFerD0T9nbB+GrdQ2e6+dbt5SSGOWnflk3ldPaN8IsDbbPuu/fDCxxp7+Hh28uxLsLePMC1RdmkJFrYH0UPZAP5iSkC/H+627zbLiMipYADeHOK96qBJOCyVRgi8pCI1ItIfWdnZyDtVnGq3LsKtSnMwzfGGOpanBOrGydbV2pjvcPGc3uaGHGFv7bttl0N5Gcm8x/XFYf92jeVLWFtSQ7P7G6cNd/6tpoGlmen8IUbwt/OaJGUYGHtityoeiAb7K7BZuBlY8wl/xJEZDnwU+DrxpjLflKMMc8aYyqNMZX5+drhX8wqCsaTXoV7+KbpwgAXB0YnpsdN5dFPVHC+d5hfvdcexpbBwdNdvN1wkQdvjUwxbRHh0U0VtHUN8dvDZ6fdr77Fyf5mJw/eWkZSwuKe51HlsHH8bC99QU44N1+BfBrtwAq/18XebVPZjHfYxkdEsoDfAf/FGPPufBqpFo/SJekkWCTsgd4373ny+Ly/DRV5XFeczVO7G3GFsZLQtppGslMTuW995Ippf2JVAauWZbJ9VyOeaWoGbKtpwJaexObqFVO+v5hU2214DLwXJQXDAwn0dcBKEXGISBLjwfzVyTuJyCogF9jnty0J+BXwE2PMy8FpsopniVYLpUvSwh7o65qd5GUkUTZDGl0RYcvGClovDvK7I+fC0q4Pzvfy7yc+4uu32MlIjlwOQhFhy6YKGjr6eeP4+cveP3a2h5qTnXxzg4O0pLjJlThva0tysFokavLezBrojTEu4FHgdeAE8JIx5piIPCkin/PbdTOww1ya5OHLwG3AA37TL9cEsf0qDlUUhH+KZW2Lk8pS26yLez69eikrCzLYXjN9zzaYntrVSFqSlQdutof8WrP57LXLsS9JY1tN42W5XLbXNJKZnMD9N0bur45okp6cwDWFWVEznz6ggTRjzE5jzBXGmHJjzN94t33XGPOq3z5PGGMen3Tcz4wxib6pl96vQ8G9BRVvKgoyaL04GLZCy+d6hmjrGppxfN7HYhG2bCrn5Ed9/OGDjpC2q+XCAL89fJb7bywlJy3yxbStFuGRjeUcae9hz4cXJrY3dvaz8+g5vnpTKdmpiRFsYXSpdtg4dKY7Ig/vJ1vcT0xUVCrPz8DlMbReDE/K4onx+QAzQd51XSErbKl8v6YhpFkKn9nTSILVwp9tcITsGnP1+bXFLM9OYVtNw8S2p3c1kpxg4RtR1M5oUGW3Mery8H5b5AuGa6BXUSfcyc3qWpykJ1m5anlgxSISrBYevr2cw2e6eafxYkjadK5niJcPtPHlymIKsqKnmHZSgoWHbiujttlJXYuTtq5BfnWwnc1VJeRlJEe6eVHFt3AqGhKcaaBXUcc3lz5sgb65i3V2GwlzWIj0xRuKKchMvqRnG0zP7WnGY4jKYtqbq0pYkp7EtpoGntvThAg85E3prD6Wm57EyoKMqJhPr4FeRZ305AQKs1PCUm2qa2CUkx/1UW2fW2GMlEQrD95axjuNF3nvdFdQ23Sxf4QXa09z9/WFCy7lFwqpSVa+scHBrpOdvFB7mi+sLaYw5/LVxGp8Pv2Bli7cYXhwPxMN9CoqlRdkcPJ8X8ivU986HqQnJzILxH3rS8hJS+SpXcEtN/gv+1oZdrnZEsXl9756UymZyQm4PYaH47zo90JU2230jbg4frY3ou3QQK+i0i0VeRw/18vR9tA+yKprcZJktXD9irnn2ktPTuDza4vYfaozqDMr/v34R6x32KgoiK4C0/6yUhJ54nNX8//euQrHDGsPFrtbV+aRnGDhZ+8Gnv0zFDTQq6h03/oSMlMS2L4rNGPgPrXNTq4rzp53aoH1jiWMujwcCdLMit7hMU6c72W9I/qrMn1xXTEP3669+ZksyUhmc9UKXjnYxtkI1h7WQK+iUlZKIl+7yc6/HT0fsoeyg6Mujrb3BDR/fjpV3rH9YC2MOdDahTEzp2JQseXB28owBp6NYO1hDfQqan39FjvJCZagj4H7HDrdjctjFhRUl2QkU56fHrSl7nXNThIswtoSLdsQL4pz07hnbRE76iJXe1gDvYpaSzKSube6hF8faueMczDo59/f7EQE1pXObcbNZNWOJdS3BmdmRV2Lk2uKsjVfTJx5ZGM5Iy4PP347MrWHNdCrqPbQbWVYBJ7bG/w/e+tanFy1LIuslIUt26925NI37OKD8wubWTE85ubwmR4dtolD5fkZ/PE1y/nJO5GpPayBXkW15dmpfPGGYnbUnaGjbzho5x1zezh4ujsoQdU3NXOhwzeHz3Qz6vbMa6qnin6PeGsPR2IGjgZ6FfUevr0cl9vDD98K3p+9R9t7GBq7vBD4fBTnplGYnUJdy8IWTvlWUFYucChJRadrirLZdGU+P3yrmaHR8CY600Cvop49L53/cF0hP9vXSs9gcP7s9QXVKkdwgmqVw0Zti3NBSc5qW7q4YmkGuemRz1SpQmPrpgpv7eHTYb2uBnoVEx7ZWM7AqJvn32kJyvlqm7tw5KVTkBmchGFVdhudfSO0XpzfQ2O3x/Bea5cO28S5SruNaoeNZ/c0MeoKX5UyDfQqJly1PItPXVXAj99pZmDEtaBzeTyG+lbnxBz4YFjvHeuf73z6E+d66R9x6YPYReDRTd7awwfbwnZNDfQqZmzZVEH34NiC/+z9sKOf7sGxoPaeKwoyyE1LnHdK2v0B1KxV8eHWlXlcW5TNU7vCV3tYA72KGTeU5HJz+RKe3dPE8Nj8H2b5et3BDKoiQqXdNu+UtHXNTopzU1merVkg452IsHVTOS0XB9l59PL6u6GggV7FlK2bKujoG+GX783/z966ZicFmcmUBDkFcLXdRuvFQTp65zYN1BhDXYsz4ApXKvZ9evUyKgoy2B7iKmU+GuhVTLm5fAnXr8jh6d3z+7PXF1SrHLMXAp+rqnmO0zddGODiwOiCcu6o2GKxCFs2lvPB+T7+cCK0tYdBA72KMSLCo5sqOOMc4rfvn53z8W1dQ5zrGZ54eBpMVxdmkZponfPCKd/+OuNmcbnr+kKKc0NfexgCDPQicqeInBSRBhF5fIr3vycih7xfp0Sk2++910SkW0T+NZgNV4vXJ1cVcOXSTLbXNOKZY36ZifnzIQiqiVYL60pzqZ3jwqnaFidL0pMoz9e87otJorf28KEz3exrCk3tYZ9ZA72IWIFtwGeA1cC9IrLafx9jzGPGmDXGmDXAPwOv+L3998BXg9dktdhZLMKWTeV82NHPG8c/mtOxdS1OslISuHJpaIp6VNltfHC+d075TGqbnVTZgz+UpKLfl9aFtvawTyA9+mqgwRjTZIwZBXYAd8+w/73Ai74Xxpg/AKGvCacWlc9eu5wSWxrbd83tz979zU4q7TYsltAE1SpHLsbAgdbAhm/O9QzR1jWk4/OLlK/28NsNFzkY5NrD/gIJ9EXAGb/Xbd5tlxGRUsABvDmXRojIQyJSLyL1nZ2dczlULVIJVguPbCzn/bYe3mq4ENAxF/pHaOocCOlY+NoVuSRahdrmwP7R+ubd64ybxeu+9SVkpyayrSY0dRcg+A9jNwMvG2PmNMnZGPOsMabSGFOZn58f5CapePWFG4pYmpXM998M7M/e+on586FLGpaaZOWaouyA59PXtThJT7Jy1fLorQ+rQis9OYGv32Ln3098tOBU19MJpLpBO7DC73Wxd9tUNgNbF9oopQKRnDD+Z+//+N0JPvv/78Uyyxh3Z98IyQkWri0KbfWmaruNH73dzPCYe9ZatHXNXdxQmkuCVSfALWYP3GznuT1NPLWrkX/avDbo5w8k0NcBK0XEwXiA3wzcN3knEVkF5AL7gtpCpWZw3/oSjp/tpTuAh5/5mcmsd9hISghtUK122HhmTxOHznRzY9n0Rb67B0c5+VEfd12/PKTtUdEvJy2JP7u1jKExN8aYoD+YnzXQG2NcIvIo8DpgBX5kjDkmIk8C9caYV727bgZ2mElPxkRkL7AKyBCRNuCbxpjXg3oXatFKS0rgf39lTaSbcYnKUhsi4+PvMwV6X/56nT+vAB6744qQnTugwpTGmJ3Azknbvjvp9RPTHHvrfBunVCzKTkvkyqWZs47T17U4SbJauH6FFgJXoaUDg0qFQJXdxnutXTOmaahtdnJdcfas4/hKLZQGeqVCoMphY2DUzfFzU8+iGBx1cbS9R+fPq7DQQK9UCPjmxU+Xn/7Q6W5cHqPz51VYaKBXKgSWZadQYkubdpy+tsWJCKwLYpUrpaajgV6pEKmy26hr6ZoyRUNts5OrlmWRlZIYgZapxUYDvVIhUu3IxTkwSmNn/yXbx9weDp7u1rKBKmw00CsVIlUT4/SX5r052t7D0Jhb58+rsNFAr1SIOPLSyctIumycfiInfghz7ijlTwO9UiEiIlTZbZfNvKlt7sK+JI2CzJQItUwtNhrolQqhaoeN9u4h2ruHAPB4DPWtTh2fV2GlgV6pEPKNw/vqwn7Y0U/34JiOz6uw0kCvVAhdtTyLzOQEar3j8rUTOfE10Kvw0UCvVAhZLcINpbkTPfq6ZicFmcmU2NIi3DK1mGigVyrEqh02Puzop2tglLoWJ1UOLQSuwksDvVIh5huP//Whds71DGt+GxV2GuiVCrHrirNJSrDw7J4mQMfnVfhpoFcqxFISrawpzuFczzBZKQlcuVQLgavw0kCvVBj4VsFW2m1YLDo+r8JLA71SYeAbp9f58yoSNNArFQY3lS/hwVsdfPGGokg3RS1CAQV6EblTRE6KSIOIPD7F+98TkUPer1Mi0u333tdE5EPv19eC2XilYkVygpX/8tnVFGRpfhsVfgmz7SAiVmAbcAfQBtSJyKvGmOO+fYwxj/nt/21grfd7G/BXQCVggAPeYy/N26qUUipkAunRVwMNxpgmY8wosAO4e4b97wVe9H7/R8DvjTFOb3D/PXDnQhqslFJqbgIJ9EXAGb/Xbd5tlxGRUsABvDmXY0XkIRGpF5H6zs7OQNqtlFIqQMF+GLsZeNkY457LQcaYZ40xlcaYyvz8/CA3SSmlFrdAAn07sMLvdbF321Q28/GwzVyPVUopFQKBBPo6YKWIOEQkifFg/urknURkFZAL7PPb/DrwaRHJFZFc4NPebUoppcJk1lk3xhiXiDzKeIC2Aj8yxhwTkSeBemOML+hvBnYYY4zfsU4R+WvGf1kAPGmMubSumlJKqZASv7gcFSorK019fX2km6GUUjFFRA4YYyqnfC/aAr2IdAKtkzbnARci0JxQird7irf7gfi7p3i7H4i/e1rI/ZQaY6aczRJ1gX4qIlI/3W+qWBVv9xRv9wPxd0/xdj8Qf/cUqvvRXDdKKRXnNNArpVSci5VA/2ykGxAC8XZP8XY/EH/3FG/3A/F3TyG5n5gYo1dKKTV/sdKjV0opNU8a6JVSKs5FfaCfrehJrBGRFhE54i3SEpMrw0TkRyLSISJH/bbZROT33gIzv/emvIgJ09zPEyLS7ldQ548j2ca5EpEVIlIjIsdF5JiI/Ll3e0x+TjPcT8x+TiKSIiK1InLYe0//3bvdISL7vTHv/3hTzyzsWtE8Ru8tenIKv6InwL3+RU9ijYi0AJXGmJhd5CEitwH9wE+MMdd4t/0d4DTG/K33F3KuMeYvI9nOQE1zP08A/caYf4hk2+ZLRJYDy40x74lIJnAAuAd4gBj8nGa4ny8To5+TiAiQbozpF5FE4C3gz4HvAK8YY3aIyNPAYWPMUwu5VrT36Oda9ESFgTFmDzA5Z9HdwL94v/8Xxv8RxoRp7iemGWPOGWPe837fB5xgvBZETH5OM9xPzDLj+r0vE71fBvgE8LJ3e1A+o2gP9AEXPYkhBnhDRA6IyEORbkwQLTXGnPN+fx5YGsnGBMmjIvK+d2gnJoY4piIidsbLe+4nDj6nSfcDMfw5iYhVRA4BHYxX4GsEuo0xLu8uQYl50R7o49EGY8wNwGeArd5hg7jizWAavWOCgXkKKAfWAOeA/xXZ5syPiGQAvwT+H2NMr/97sfg5TXE/Mf05GWPcxpg1jNfqqAZWheI60R7o465wiTGm3fvfDuBXjH+48eAj7ziqbzy1I8LtWRBjzEfef4Qe4Dli8HPyjvv+Evi5MeYV7+aY/Zymup94+JwAjDHdQA1wE5AjIr4U8kGJedEe6AMqehIrRCTd+yAJEUlnvBDL0ZmPihmvAl/zfv814DcRbMuC+YKh1+eJsc/J+6Dvh8AJY8z/9nsrJj+n6e4nlj8nEckXkRzv96mMTzo5wXjA/5J3t6B8RlE96wbAO13qH/m46MnfRLhJ8yYiZYz34mG86MsLsXg/IvIisJHxlKofAX8F/Bp4CShhPM30l2OlyMw097OR8eEAA7QA3/Ib2456IrIB2AscATzezf+Z8XHtmPucZrife4nRz0lErmP8YauV8U73S8aYJ71xYgdgAw4C9xtjRhZ0rWgP9EoppRYm2odulFJKLZAGeqWUinMa6JVSKs5poFdKqTingV4ppeKcBnqllIpzGuiVUirO/V/LWPqzHAm3wgAAAABJRU5ErkJggg==\n","text/plain":["<Figure size 432x288 with 1 Axes>"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"W3IOtXWFl0ML"},"source":["**Which value of k produces the best classification accuracy?**"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"t9-kdB1il3n8","executionInfo":{"status":"ok","timestamp":1607101292901,"user_tz":-120,"elapsed":41182,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"40ccf92b-74d7-4553-eac5-b0062dac3d64"},"source":["print(np.argmax(means)+1)\n","print(np.max(means))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["4\n","0.7686567164179104\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"nm2QJEZzpjLR"},"source":["k = 4 with accuracy of 77%"]},{"cell_type":"markdown","metadata":{"id":"wdM68_-2nNZf"},"source":["**If the number of k is still increased, what is the limit that the classification accuracy approaches? Why?**"]},{"cell_type":"markdown","metadata":{"id":"XeHC-r_RnPIK"},"source":["It approaches the ratio of the most abundant value in the data. Because when k = n, it is comparing a new datapoint with all of the training data so it always classifies it as the most abundant one."]},{"cell_type":"markdown","metadata":{"id":"eEsqYbZMlo3l"},"source":["**Can you say something about the performance of this selected model with new, unseen data? Explain, how you could you estimate the performance of this selected model. 1p**"]},{"cell_type":"markdown","metadata":{"id":"OJWbq69OoLgG"},"source":["Leave-one-out is a great way to approximate the performance of the model with new data. So this guesses new data correctly approximately 77% of the time."]},{"cell_type":"markdown","metadata":{"id":"xAWiKYYtl0Sa"},"source":["## Testing with training data (this should not be used!)"]},{"cell_type":"markdown","metadata":{"id":"-0wZYT8_l0Sa"},"source":["- Repeat the previous task but use the whole data for training **2p**\n","    - Plot the resulting classification accuracy versus k=1...30. Include the values from the previous task in the same figure\n","    - Comment your result. Why shouldn't you test with training data?"]},{"cell_type":"markdown","metadata":{"id":"SbJMb59yzLOD"},"source":["**Plot the resulting classification accuracy versus k=1...30. Include the values from the previous task in the same figure**"]},{"cell_type":"code","metadata":{"id":"0s_kAEE3l0Sa"},"source":["loo = LeaveOneOut()\n","\n","accuracy = np.zeros(loo.get_n_splits(X))\n","\n","means = np.zeros(30)\n","\n","for i in range(30):\n","\n","  neigh = KNeighborsClassifier(n_neighbors=i+1)\n","\n","  for train_index, test_index in loo.split(X):\n","    X_train, X_test = X.iloc[train_index,:], X.iloc[test_index,:]\n","    y_train, y_test = y.iloc[train_index], y.iloc[test_index]\n","\n","    neigh.fit(X, np.ravel(y))  # Lets use whole data to fit\n","    \n","    accuracy[test_index] = neigh.score(X_test, np.ravel(y_test))\n","\n","  means[i]=np.mean(accuracy)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":282},"id":"PBIWLBMFqZuG","executionInfo":{"status":"ok","timestamp":1607101307634,"user_tz":-120,"elapsed":55901,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"3ace75de-4174-4c0a-e5e9-b87249f51eeb"},"source":["plt.plot(np.array(range(30))+1, means)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[<matplotlib.lines.Line2D at 0x7feab4b3e240>]"]},"metadata":{"tags":[]},"execution_count":35},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXxU1d3H8c8ve4AkLAkJEJYAYQcBQ3ABl6qA1oLa2orijjxdtIva1vZpq6V7a+tSt7rgVpRal4qPFkQFERcgyA4Swk7YwhpCyH6eP2bAAAmZJJNMZub7fr3yYuZu87vOy29uzj33HHPOISIioSsi0AWIiEjTUtCLiIQ4Bb2ISIhT0IuIhDgFvYhIiIsKdAEnS05Odj169Ah0GSIiQWXJkiV7nXMpNa1rcUHfo0cPcnJyAl2GiEhQMbMtta1T042ISIhT0IuIhDgFvYhIiFPQi4iEOAW9iEiIqzPozWyame0xs1W1rDcze9jM8sxshZkNr7buRjNb7/250Z+Fi4iIb3y5on8OGHea9ZcCmd6fKcDjAGbWHrgXGAlkA/eaWbvGFCsiIvVXZ9A75+YD+0+zyQTgBefxGdDWzDoBY4E5zrn9zrkDwBxO/wujUQpLynnwvVyWbzvYVB8hIhKU/NFG3wXYVu39du+y2pafwsymmFmOmeUUFBQ0qAjn4MH31rNo0+l+J4mIhJ8WcTPWOfekcy7LOZeVklLjE7x1SoyLIj46kp2HSvxcnYhIcPNH0OcDXau9T/cuq215kzAzOiXFsbtQQS8iUp0/gn4mcIO3981ZwCHn3E5gNjDGzNp5b8KO8S5rMqmJcexS0IuInKDOQc3M7GXgAiDZzLbj6UkTDeCcewJ4B7gMyAOKgZu96/ab2W+Axd5DTXXONWkDelpSnNroRUROUmfQO+cm1rHeAd+rZd00YFrDSqu/1MQ49hwuoarKERFhzfWxIiItWou4GesvaYmxlFc69heXBboUEZEWI7SCPikOgF3qeSMiclyIBX08oKAXEakutII+0XtFr543IiLHhVTQJ7eJIcJQX3oRkWpCKuijIiNISYhV042ISDUhFfTgab5R042IyJdCL+iT4nRFLyJSTegFva7oRUROEHJBn5oUx+GSCorLKgJdiohIixByQX+8i6Wab0REgFAOejXfiIgAIRj0qRoGQUTkBCEX9LqiFxE5UcgFfevYKBLiotitK3oRESAEgx7UxVJEpLrQDPqkOHYVlga6DBGRFiEkgz41MY5dh44GugwRkRYhJIO+U1IcBYdLqaisCnQpIiIBF5JBn5oYR5WDvUWaUlBEJCSDXl0sRUS+FJpBr4emRESOC8mgT/Ve0WumKRGREA36Dq1jiI40duqKXkQkNIM+IsLomBCnK3oREUI06EEzTYmIHBO6QZ+oK3oREQjhoE/1jnfjnAt0KSIiAeVT0JvZODNbZ2Z5ZnZPDeu7m9n7ZrbCzOaZWXq1dZVmtsz7M9OfxZ9OWlIsxWWVFJZoSkERCW91Br2ZRQKPApcCA4CJZjbgpM3uB15wzg0BpgJ/qLbuqHNuqPdnvJ/qrlNaUjygLpYiIr5c0WcDec65jc65MmAGMOGkbQYAH3hfz61hfbPT3LEiIh6+BH0XYFu199u9y6pbDlzlfX0lkGBmHbzv48wsx8w+M7MravoAM5vi3SanoKCgHuXXTsMgiIh4+Otm7N3A+Wa2FDgfyAcqveu6O+eygGuBB82s18k7O+eedM5lOeeyUlJS/FJQx8RYAM00JSJhL8qHbfKBrtXep3uXHeec24H3it7M2gBfd84d9K7L9/670czmAcOADY2uvA5x0ZG0axWtK3oRCXu+XNEvBjLNLMPMYoBrgBN6z5hZspkdO9bPgGne5e3MLPbYNsC5wBp/FV8XzwQkCnoRCW91Br1zrgK4HZgNrAVecc6tNrOpZnasF80FwDozywVSgd95l/cHcsxsOZ6btH90zjVb0HdK0tyxIiK+NN3gnHsHeOekZb+q9vpV4NUa9vsEGNzIGhssLSmOlfmHAvXxIiItQsg+GQueppu9RWWUVWhKQREJXyEd9Me6WO45rOYbEQlfIR30qZppSkQktIO+U5IemhIRCemg1zAIIiIhHvRJ8dHERkVoYDMRCWshHfRm5plpqrA00KWIiARMSAc9eLpYarwbEQlnIR/0nZLi2Fl4NNBliIgETMgHvWfu2FJNKSgiYSvkgz41MY6yiioOFJcHuhQRkYAI+aBP00NTIhLmQj7oU7196dXFUkTCVcgH/bEr+p26oheRMBXyQd8xIRYzDYMgIuEr5IM+OjKC5Dax6ksvImEr5IMePF0sdUUvIuEqLII+NTFON2NFJGyFRdCnJcXqil5EwlZYBH2npHgOFpdTUl4Z6FJERJpdWAR9qsalF5EwFhZBf3wCEjXfiEgYCo+gT4oF9HSsiISnsAh6Nd2ISDgLi6BPiIumdUykhkEQkbAUFkEPnjFv1HQjIuEorIJeN2NFJByFTdBr7lgRCVc+Bb2ZjTOzdWaWZ2b31LC+u5m9b2YrzGyemaVXW3ejma33/tzoz+LrIy0xjj2HS6mq0pSCIhJe6gx6M4sEHgUuBQYAE81swEmb3Q+84JwbAkwF/uDdtz1wLzASyAbuNbN2/ivfd2lJcVRUOfYeKQ3Ex4uIBIwvV/TZQJ5zbqNzrgyYAUw4aZsBwAfe13OrrR8LzHHO7XfOHQDmAOMaX3b9pamLpYiEKV+Cvguwrdr77d5l1S0HrvK+vhJIMLMOPu6LmU0xsxwzyykoKPC19nrR3LEiEq78dTP2buB8M1sKnA/kAz6PIOace9I5l+Wcy0pJSfFTSSdK09yxIhKmonzYJh/oWu19unfZcc65HXiv6M2sDfB159xBM8sHLjhp33mNqLfBOrSJJTLC1MVSRMKOL1f0i4FMM8swsxjgGmBm9Q3MLNnMjh3rZ8A07+vZwBgza+e9CTvGu6zZRUYYHRNi2XVIN2NFJLzUGfTOuQrgdjwBvRZ4xTm32symmtl472YXAOvMLBdIBX7n3Xc/8Bs8vywWA1O9ywIiNTGOXYVHA/XxIiIB4UvTDc65d4B3Tlr2q2qvXwVerWXfaXx5hR9QnZLiyN19ONBliIg0q7B5MhaOzR2rphsRCS9hFfRpSXEUlVZQVFoR6FJERJpNeAW9HpoSkTAUVkGfqr70IhKGwiroO3mfjtUEJCISTsIq6I8Ng6ArehEJJ2EV9HHRkSTFR6uNXkTCSlgFPXhuyGoYBBEJJ2EX9KmaO1ZEwkzYBX1aYqxuxopIWAm/oE+KZ29RKeWVVYEuRUSkWYRf0CfG4RwUHNZQCCISHsIv6JNiAXy+Ibv/SBnOaUJxEQleYRf0x5+OraOdfmNBEXe8vJThv5nDo3PzmqM0EZEm4dMwxaHk+Hg3tVzR7zh4lIffX8+/l2wnNiqCfmkJPPx+HuMGpdG7Y0Jzlioi4hdhF/TtW8cQExlxykNTe4tKeXRuHtM/2wrADWd357sX9Abg4r99yM9eX8m/ppxNRIQ1e80iIo0RdkFvZqQmxR6/oj90tJyn5m9k2sebKCmv5Oozu/L9izPp0jb++D7/e1l/fvLaCl5evJXrRnYPVOkiIg0SdkEPnuabLfuKeWxeHk/M20BhSQWXD+nEjy7pQ6+UNqdsf3VWOm8szeeP73zBxf1Tj7fzi4gEg7C7GQueG7LLth3kz7PWkdWjPW9/fxSPXDu8xpAHz18Bv79qMGWVVdz75upmrlZEpHHC8or+8iGdKKuoYsp5Pcnq0d6nfTKSW/ODizP586x1zF69i7ED05q4ShER/7CW1kc8KyvL5eTkBLqMGpVXVjH+kY/Zf6SUOXeeT2JcdKBLEhEBwMyWOOeyaloXlk03DRUdGcEfrxpMweFS/jzri0CXIyLiEwV9PZ3RtS03nZPBPz/bSs7m/YEuR0SkTgr6BrhrTB+6tI3np6+toLSiMtDliIicloK+AVrHRvHbKwexoeAIj83dEOhyREROS0HfQBf27cj4Mzrz2Lw81u8+HOhyRERqpaBvhF99bQCtY6O45/WVVFW1rN5LIiLH+BT0ZjbOzNaZWZ6Z3VPD+m5mNtfMlprZCjO7zLu8h5kdNbNl3p8n/H0CgZTcJpZffHUAS7YcYPqirYEuR0SkRnUGvZlFAo8ClwIDgIlmNuCkzX4BvOKcGwZcAzxWbd0G59xQ78+3/VR3i/H14V0Y1TuZP/33i1MGShMRaQl8uaLPBvKccxudc2XADGDCSds4INH7OgnY4b8SWzYz43dXDqKiqopfv6XhEUSk5fEl6LsA26q93+5dVt19wCQz2w68A9xRbV2Gt0nnQzMb3ZhiW6ruHVpz66gMZq3epSkKRaTF8dfN2InAc865dOAy4EUziwB2At28TTp3Ai+ZWeLJO5vZFDPLMbOcgoICP5XUvC4f0hnn4L21uwNdiojICXwJ+nyga7X36d5l1d0KvALgnPsUiAOSnXOlzrl93uVLgA1An5M/wDn3pHMuyzmXlZKSUv+zaAH6pSXQvUMrZq3aFehSRERO4EvQLwYyzSzDzGLw3GydedI2W4GLAMysP56gLzCzFO/NXMysJ5AJbPRX8S2JmTF2YBqfbNhLYUl5oMsRETmuzqB3zlUAtwOzgbV4etesNrOpZjbeu9ldwG1mthx4GbjJeYbFPA9YYWbLgFeBbzvnQnaAmLED0yivdMz9Yk+gSxEROc6n8eidc+/guclafdmvqr1eA5xbw36vAa81ssagMaxrWzomxDJr1S4mDD35frWISGDoyVg/iogwxgxMZd66AkrKNdiZiLQMCno/GzewE0fLK5mfG5y9h0Qk9Cjo/Wxkz/YkxUcza7V634hIy6Cg97PoyAgu6t+R99fuobyyKtDliIgo6JvC2IFpHDpazsKNIdvBSESCiIK+CZyXmUJ8dCSz1XwjIi2Agr4JxMdEcn6fFGav3qVx6kUk4BT0TWTcoDT2HC5l2faDgS5FRMKcgr6JXNivI9GRxmyNfSMiAaagbyJJ8dGc3SuZWat34RkNov5KyivZW6Rhj0WkcRT0TWjcwDS27CtmXQMmD3fOMeXFJYz/+wK184tIoyjom9AlA1Ixo0FDF7+xNJ/5uQXsOFTCF7vq/4tCROQYBX0TSkmIJat7O2avrt9kJPuKSvnN/62hX1oCAB+t13AKItJwCvomNnZgGmt3FrJ1X7HP+/z27bUUlVbw8MRh9E1N4KP1e5uwQhEJdQr6JjZ2YBqAzw9PfZhbwBtL8/nO+b3ok5rA6MxkFm3ez9EyjYYpIg2joG9iXdu3YkCnRJ8GOSsuq+B/31hJz5TWfPfC3gCMykymrKKKRZs1nIKINIyCvhmMG5TGki0H2FNYctrtHpiTy/YDR/njVUOIi44EYGRGB2IiI1igdnoRaSAFfTM41nzz7prab8qu3H6IZxZsYmJ2N7Iz2h9fHh8TyYiMdmqnF5EGU9A3gz6pbchIbl1rO315ZRU/fW0FyW1iuefSfqesH52Zwhe7Dtf5F4GISE0U9M3AzBg7MI1PN+zjUHH5KeufWbCJNTsLmTphIEnx0aesH52ZDKCrehFpEAV9Mxk7MJWKKsf7X5zYfLNl3xEemJPLmAGpjBvUqcZ9+6clktwmRv3pRaRBFPTN5Iz0tqQlxp3QfOOc4+dvrCQmMoKpEwbVum9EhDGqdzIL8vZqOAQRqTcFfTOJiDDGDEzlw9wCissqAHjt83w+ztvHTy7tR1pS3Gn3H52Zwt6iMtbuKmyOckUkhCjom9G4gWmUlFcxP7eAvUWl/PbtNWR1b8d12d3q3Fft9CLSUAr6ZpSd0Z62raKZvXo3v/m/NRwpreAPVw0mIsLq3LdjYhz90hLUTi8i9RYV6ALCSVRkBBf3T2Xmsh2UVVbxg4syyUxN8Hn/0ZnJPP/JFo6WVRIfE9mElYpIKNEVfTMbOzCNssoqendsw3cv7FWvfUdnplBWWcXCTfsa/Pnvrt7FH/67lorKqgYfQ0SCi67om9l5fZK5clgXbjk3g9io+l2VZ2e0JyYqggXr93JB3471/uyS8kp+/sYq9haVsqewlL9efYZPzUYiEtx8uqI3s3Fmts7M8szsnhrWdzOzuWa21MxWmNll1db9zLvfOjMb68/ig1FsVCQPfGsog9OT6r1vXHQk2T3aN/iG7L9ztrG3qJSvDu7EG0vz+cWbqxo8zaGIBI86g97MIoFHgUuBAcBEMxtw0ma/AF5xzg0DrgEe8+47wPt+IDAOeMx7PGmg0ZnJrNt9mN31HA6hvLKKJz7cyPBubXnk2mF894JevLRwK799e63CXiTE+XJFnw3kOec2OufKgBnAhJO2cUCi93USsMP7egIwwzlX6pzbBOR5jycNNDozBah/N8uZy3aQf/Ao37uwN2bGj8f25aZzevDMgk08MCe3KUoVkRbCl6DvAmyr9n67d1l19wGTzGw78A5wRz32xcymmFmOmeUUFKj74On0S0sguU1svbpZVlU5HpuXR7+0BL7Sz9O2b2b86vIBfCurKw9/kMfj8zY0VckiEmD+6nUzEXjOOZcOXAa8aGY+H9s596RzLss5l5WSkuKnkkJTRIQxOjOZBet9Hw5h9updbCg4cvxqvvqxfn/VYCYM7cyfZn3B859sbqKqRSSQfAnjfKBrtffp3mXV3Qq8AuCc+xSIA5J93FfqaXRmMvuOlLFmZ93DITjneHReHj06tOKywacOmhYZYdx/9RmMGZDKvTNX88ribTUcRUSCmS9BvxjINLMMM4vBc3N15knbbAUuAjCz/niCvsC73TVmFmtmGUAmsMhfxYerUb19Hw7hw9wCVuUX8p0LehFZS1fK6MgI/n7tMM7rk8JPX1/BzOU7atxORIJTnUHvnKsAbgdmA2vx9K5ZbWZTzWy8d7O7gNvMbDnwMnCT81iN50p/DTAL+J5zTrNcN1J9hkN4bO4GOiXFceWw9NNuFxsVyT8mncmIHu350b+W8a6Pk5mLSMvnUzu6c+4d51wf51wv59zvvMt+5Zyb6X29xjl3rnPuDOfcUOfcu9X2/Z13v77Ouf82zWmEn/P6pJCz+QBHy2r/vblo034Wbd7PlPN6EhNV91cdHxPJtJtGMLhLEre/tJT5uboxLhIKNARCkBqdmVzncAiPzs2jQ+sYrhlR9+iYx7SJjeL5m7Pp1bENU17MYeHGhg+3ICItg4I+SI3o0Z7YqIha2+lX5R/iw9wCbhmVUe8B0JJaRfPirdl0aRvPrc/nsGzbQX+ULCIBoqAPUnHRkWRntK+1nf6xeXkkxEZx/dndG3T85DaxTJ98Fu1bx3DjtEWs9aGHj4i0TAr6IDY6M5nc3UXsOnTicAh5ew7z31W7uOGc7iTGnTrZuK/SkuKYPnkkrWIimfT0QvL2FDW2ZBEJAAV9EPtyOIQTr+ofn7eR2KgIbjk3o9Gf0bV9K6ZPHomZMenphWzbX9zoY4pI81LQB7Evh0P4sp1+2/5i/rMsn4nZ3ejQJtYvn9MzpQ3/nJxNSUUlE5/6jJ2HjvrluCLSPBT0QczMOC8zmQV5Xw6H8OT8jUQYTDmvp18/q19aIi/cks2h4nKue2ohBYdL/Xp8EWk6CvogN7pPMvu9wyHsOVzCv3K28fXh6XRKivf7Zw1Jb8uzN49g56ESrn9mIQeLy/z+GSLifwr6IHeudziE+esLeGbBJioqq/j2+fWborA+snq05+kbs9i49wg3TlvE4ZLyJvssEfEPBX2Q65gQR/9Oibyzcif//HQLlw/pTI/k1k36mef2Tubx64azekchtzy3mOKyiib9PBFpHAV9CDgvM5lV+YUcKavkOxc03dV8dRf1T+XBa4ayZMsB/ufFJZSUawgjkZZKk4OHgNGZKfxj/kYu7t+R/p0S697BTy4f0pmS8iru/vdyxj04n3atY+rcJyYygrvG9CU7o30zVCgioKAPCdkZ7ZmY3Y1bR/Vo9s/+xpnpREbA65/7Ns3Ahj1F3PzsIl6cPJLh3do1cXUiAmAtbWLorKwsl5OTE+gypInsLizhm//4lANHynh5ylkM7JwU6JJEQoKZLXHOZdW0Tm300qxSEz3DKrSJjeL6ZxaRt+dwoEsSCXkKeml26e1aMf22s4gw49qnFrJl35FAlyQS0hT0EhAZya2ZPnkk5ZVVXPvUQvIPalgFkaaioJeA6ZuWwAu3jKTwaDmTnl7InsMlde8kIvWmoJeAGpyexHO3jGDXoRImPb2QA0c0rIKIvynoJeDO7O4ZVmHzvmJumLaIQg2rIOJXCnppEc7tncwTk4bzxa5Cbn52MUdKNayCiL8o6KXF+Eq/VB66ZhhLtx7gthdyNKyCiJ8o6KVFuWxwJ/7yjTP4ZMM+v85Vu6ewhPtmruZnr6+kvLLKL8cEePqjjfz+nbW0tAcPRarTEAjS4nz9zHQcMPWt1Vz60EeMP6MzP7qkDxkNGJXzYHEZT3y4kec+2UR5paOyynG4pJyHrhlGZIQ1qs7H5uXx51nrABiSnsTlQzo36ngiTUVBLy3SN85M55L+qfxj/gae/Xgzb6/cyTez0rnjK5l0blv3pCpFpRVMW7CJp+ZvpKisgiuGduGHF2cya9Uu/vDfL4iLjuTPXx9CRAPD/rmPN/HnWev42hmd2bz3CPfNXMPo3ikktWr4ZOwiTUVj3UiLt+dwCY/N3cD0hVswM64/qzvfvaBXjXPilpRX8s/PtvDYvA3sP1LGmAGp3DWmL33TEo5v88CcXB56fz03nN2dX48fiFn9wv6Vxdv4yWsruGRAKo9dN5zc3YcZ/8jHfGN4On/6xpBGn69IQ5xurBtd0UuL1zEhjvvGD2Ty6Awefn89z368iRmLtnLrqAwmn9eTxLhoyiureHXJdh5+fz07D5Uwqncyd4/ty9CubU853g8vzqS4rIKnPtpEfEwk94zr53PYv7ksn5++voLz+qTwyLXDiI6MYGDnJCaPzuAfH27kimFdOLtXB3//JxBpFF3RS9DJ21PEA+/l8vaKnSTFR/OtEV2Zs2Y3m/YeYVi3tvx4TF/O8U6xWBvnHL98cxX//Gwrd17Sh+9flFnn5767ehffmf45Z3Zvx/M3ZxMfE3l83dGySsY+OJ/ICOO/PxhNXHTkaY50evuKSjlSWkm3Dq0afAwJP40evdLMxpnZOjPLM7N7alj/gJkt8/7kmtnBausqq62b2fDTEPHo3bENj147nP+7YxTDu7XlyfkbiY2K4Okbsnj9O+fUGfIAZsbU8YO4angX/jYnl6c/2nja7T/MLeD2l5YyqEsS024acULIA8THRPL7Kwezae8R/v7B+gaf27b9xVz+9wWMe2g+S7bsb/BxRKqr84rezCKBXOASYDuwGJjonFtTy/Z3AMOcc7d43xc559r4WpCu6KW+9hwuIbl1bINurFZUVvGDGct4e+VOfnvFICad1f2UbRZu3MeNzy4iI7kNM24767Q3XO96ZTlvLsvnrTtG1Xu2r92FJVz9xKccLC6jfesY9hV5xuwf1EVj9kvdGntFnw3kOec2OufKgBnAhNNsPxF4uf5lijRMx4S4BveeiYqM4IFvDeUr/Tryi/+s4rUl209Yv2zbQW55bjFd2sbz4q3Zdfaq+cVX+5MUH809r62gssr3ZtF9RaVc9/RC9hWV8vwt2bx021kkxkdz/TMLWbdLY/ZL4/gS9F2AbdXeb/cuO4WZdQcygA+qLY4zsxwz+8zMrqhlvynebXIKCgp8LF3EP2KiInjsuuGc06sDP351Oe+s3AnAmh2F3PDMQjq0iWX65LNIrqGXz8natY7hV18bwPLth3j+k80+ff6h4nImPbOI7QeKmXbTCIZ1a0fntvG8dNtIYqIimPTMQjbt1Zj90nD+fjL2GuBV51z1Z9e7e/+cuBZ40Mx6nbyTc+5J51yWcy4rJSXFzyWJ1C0uOpKnbshiWLd2fP/lpTz38Sauf2YhrWOjmD55JGlJcT4fa/wZnbmgbwr3v7uO7QeKT7ttUWkFNz67iA17ivjH9VmM7Pllj53uHTxj9ldVOa576rM6jyVSG1+CPh/oWu19undZTa7hpGYb51y+99+NwDxgWL2rFGkGrWOjePbmEfTvlMh9b63BzJg+eSRd29ev94uZ8dsrBgHwy/+sqnV4hKNlldz63GJW5h/i79cO4/w+p17k9O6YwAu3ZlNUWsF1Ty9kd6HG7G8KlVWO15Zs54Zpi3jx082UVfhvmIyWwJegXwxkmlmGmcXgCfNTes+YWT+gHfBptWXtzCzW+zoZOBeo8SauSEuQGBfNC7dkc+PZ3XnptpH0TPG5H8EJ0tu14q4xfZm7roC3Vuw8ZX1pRSX/888lLNq8n7998wzGDkyr9VgDOyfx/C3Z7D1cyiRvO774h3OOWat2MvbB+dz17+Ws2XGIX765mq/8dR6vLdler/ssLVmdQe+cqwBuB2YDa4FXnHOrzWyqmY2vtuk1wAx34uVLfyDHzJYDc4E/1tZbR6SlaNc6hl9PGESf1IS6Nz6Nm87pwRnpSUx9azUHi7+cUKW8soo7XlrK/NwC/njVYCYMrfGW1wmGdWvHMzeNYNsBz5j9h45qzP7GcM4xP7eA8Y98zLf/+TnOOR6/bjiLfn4xz9+STdtW0dz17+WMfXA+s1btDPpB6/TAlEgTWrOjkK89soCrhnXhL1efQWWV485XlvHmsh3c+7UB3HxuRr2O92FuAZOfX8ygLkm8eOtI2sTq4fb6ytm8n7/MXsfCTfvp0jaeH13ShyuGdiYq8svrXs+V/i7uf3cdGwqOMLhLEneP7ct5mcn1HjKjuZyue6WCXqSJ/WnWFzw+bwPTJ4/kreU7mLF4Gz8e25fvXdi7QcebvXoX353+OSN6tOO5m7Mb9RRuOFmVf4i/vruOuesKSG4Ty/cv6s23RnQlNqr2/34VlVX8Z9kOHpiTS/7Bo2RntOcnY/uS1aN9M1buGwW9SACVlFcy7sH57DxUQmlFFbdf2Ju7x/Zt1DHfXJbPD/+1jNGZKUwc0bXuHYAeya3r/RCXP1RUVrF8+0EGdEo65Yni5rCxoIi/zvlyyIxvn9+LG8/pTqsY3/8aKq2o5F+Lt/Hw+3nsLSrlwr4pXDU8nSgfnt+IiTScm9QAAAdzSURBVIrg3N7JTf4LWUEvEmCfbNjL9c8s4saze/DLy/v75c//GYu2cs/rK+u1z7iBadw1pg+Zjbz/4IuqKsfbK3fytzm5bNp7hI4JsdxxUSbfyupKTFTTz3mUf/AoD72Xy6tLthMXHekZBG90T5LiGz6UdHFZBc9/soUnPtxQr/sknZLi+P5FmXzjzHSiI5vm3BX0Ii3AoeJyv49Xv+PgUZ8mU3cO3l29m6c+2siRsgquHNqFH17cp0kGTnPO8cEXe7j/3VzW7iykb2oC15/dnTeX5bN48wG6to/nRxf3YcLQLo2e/KUmBYdLeXRuHi8t3ArApLO6890Le/n0wJuvjpRWsM3H5xp2Hirh4ffXs3TrQXp0aMWPLunD14Z0bvDT3LVR0IsIAAeOlPHEhxt47pPNVFY5rsnuyh1fySQ10fcHwk7n0w37+MvsL/h860G6d2jFnZf04fIhnYmMMJxzzMst4P7Z61i9o5DMjm24a0xfxg5M9ctfOIeKy3nyow1MW7CZssqqek1U09SO/fL7y+x1fLHrMP3SErh7TF8u6t/Rbzd3FfQicoLdhSX8/YP1zFi0jcgI46ZzevDt83vRrnVMg463fNtB7n93HR+t30taoqeZ4uqsmpspqqoc/121i7/OWcfGgiMMSU/ix2P7Mqp3w3q0FJdV8OzHm/nHhxsoLKnga2d05s4GTj3Z1KqqHG+t8Nzc3byv2DOs9ti+nNOr7hFX66KgF5Eabd1XzIPv5fLGsnzaxEQxeXRPbh2d4XO3zdzdh/nru+uYvXo37VpF870LezPprO4+3XisqKzi9aX5PPTeevIPHuWsnu358di+nNndtx4tpRWVvLxwK4/M3cDeolIu7t+ROy/py4DOzX/Dub6OTZTz0Hvr2VVYwrm9O3D3mL4M69auwcdU0IvIaVUP7MS4KJ+achywoaDo+C+IW0b1ICGu/vcgSisqmbFoG3//wNOjJSO5tU+9WfYfKWPfkTLvL4h+nNm94SEZKCXllUxfuJVH5+ax/0gZXx3ciUeuHdagv2wU9CLik+XbDvLiZ1soLqvwafueyW24dVRGg5t8qisuq+CFT7ewYvvBujcGoiMjuPrMrpzbu0OLfYjJV0WlFTy7YBMlFZX8eGy/Bh1DQS8iEuIaPZWgiIgELwW9iEiIU9CLiIQ4Bb2ISIhT0IuIhDgFvYhIiFPQi4iEOAW9iEiIa3EPTJlZAbDlpMXJwN4AlNOUQu2cQu18IPTOKdTOB0LvnBpzPt2dcyk1rWhxQV8TM8up7YmvYBVq5xRq5wOhd06hdj4QeufUVOejphsRkRCnoBcRCXHBEvRPBrqAJhBq5xRq5wOhd06hdj4QeufUJOcTFG30IiLScMFyRS8iIg2koBcRCXEtPujNbJyZrTOzPDO7J9D1NJaZbTazlWa2zMyCcoYVM5tmZnvMbFW1Ze3NbI6Zrff+GzTzutVyPveZWb73e1pmZpcFssb6MrOuZjbXzNaY2Woz+4F3eVB+T6c5n6D9nswszswWmdly7zn92rs8w8wWejPvX2bW6Om7WnQbvZlFArnAJcB2YDEw0Tm3JqCFNYKZbQaynHNB+5CHmZ0HFAEvOOcGeZf9GdjvnPuj9xdyO+fcTwNZp69qOZ/7gCLn3P2BrK2hzKwT0Mk597mZJQBLgCuAmwjC7+k05/NNgvR7Ms/8h62dc0VmFg0sAH4A3Am87pybYWZPAMudc4835rNa+hV9NpDnnNvonCsDZgATAlxT2HPOzQf2n7R4AvC89/XzeP4nDAq1nE9Qc87tdM597n19GFgLdCFIv6fTnE/Qch5F3rfR3h8HfAV41bvcL99RSw/6LsC2au+3E+RfLp4v8l0zW2JmUwJdjB+lOud2el/vAlIDWYyf3G5mK7xNO0HRxFETM+sBDAMWEgLf00nnA0H8PZlZpJktA/YAc4ANwEHn3LHZ2f2SeS096EPRKOfccOBS4HveZoOQ4jztgS23TdA3jwO9gKHATuCvgS2nYcysDfAa8EPnXGH1dcH4PdVwPkH9PTnnKp1zQ4F0PC0Y/Zric1p60OcDXau9T/cuC1rOuXzvv3uAN/B8uaFgt7cd9Vh76p4A19Mozrnd3v8Jq4CnCMLvydvu+xow3Tn3undx0H5PNZ1PKHxPAM65g8Bc4GygrZlFeVf5JfNaetAvBjK9d6FjgGuAmQGuqcHMrLX3RhJm1hoYA6w6/V5BYyZwo/f1jcCbAayl0Y6FodeVBNn35L3R9wyw1jn3t2qrgvJ7qu18gvl7MrMUM2vrfR2Pp9PJWjyB/w3vZn75jlp0rxsAb3epB4FIYJpz7ncBLqnBzKwnnqt4gCjgpWA8HzN7GbgAz5Cqu4F7gf8ArwDd8Awz/U3nXFDc4KzlfC7A0xzggM3A/1Rr227xzGwU8BGwEqjyLv45nnbtoPueTnM+EwnS78nMhuC52RqJ56L7FefcVG9OzADaA0uBSc650kZ9VksPehERaZyW3nQjIiKNpKAXEQlxCnoRkRCnoBcRCXEKehGREKegFxEJcQp6EZEQ9/8yP0XyhqdyoAAAAABJRU5ErkJggg==\n","text/plain":["<Figure size 432x288 with 1 Axes>"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"cell_type":"markdown","metadata":{"id":"OiCG6rViqnhs"},"source":["**Comment your result. Why shouldn't you test with training data?**"]},{"cell_type":"markdown","metadata":{"id":"fRHI31UDqp_2"},"source":["One might interpret this as k = 1 being the best. But because the test data is also in the training data, one of the closest neighbours is always itself. That is what makes it biased. And thats why k=1 it gives 100% accuracy. "]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":732},"id":"ehg2UssczQkQ","executionInfo":{"status":"ok","timestamp":1607103352794,"user_tz":-120,"elapsed":13690,"user":{"displayName":"Elias Ervelä","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhcVQbqAobpSX3NE6w5d6aZPU_VzlnsvBC9GkyMtw=s64","userId":"11858975235946053692"}},"outputId":"058939ed-2979-4842-e906-2e601a88eb45"},"source":["!wget -nc https://raw.githubusercontent.com/brpy/colab-pdf/master/colab_pdf.py\n","from colab_pdf import colab_pdf\n","colab_pdf('DAKD2020_ex3_Elias_Ervela.ipynb', notebookpath = '/content/drive/My Drive/Colab Notebooks/DAKD/')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["File ‘colab_pdf.py’ already there; not retrieving.\n","\n","Ign:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease\n","Hit:2 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease\n","Ign:3 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease\n","Hit:4 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  Release\n","Hit:5 http://security.ubuntu.com/ubuntu bionic-security InRelease\n","Hit:6 http://archive.ubuntu.com/ubuntu bionic InRelease\n","Hit:7 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release\n","Hit:8 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu bionic InRelease\n","Hit:9 http://archive.ubuntu.com/ubuntu bionic-updates InRelease\n","Hit:10 http://archive.ubuntu.com/ubuntu bionic-backports InRelease\n","Hit:11 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease\n","Reading package lists... Done\n","Building dependency tree       \n","Reading state information... Done\n","68 packages can be upgraded. Run 'apt list --upgradable' to see them.\n","Reading package lists... Done\n","Building dependency tree       \n","Reading state information... Done\n","texlive-fonts-recommended is already the newest version (2017.20180305-1).\n","texlive-generic-recommended is already the newest version (2017.20180305-1).\n","texlive-xetex is already the newest version (2017.20180305-1).\n","0 upgraded, 0 newly installed, 0 to remove and 68 not upgraded.\n","[NbConvertApp] Converting notebook /content/drive/My Drive/Colab Notebooks/DAKD/DAKD2020_ex3_Elias_Ervela.ipynb to pdf\n","[NbConvertApp] Support files will be in DAKD2020_ex3_Elias_Ervela_files/\n","[NbConvertApp] Making directory ./DAKD2020_ex3_Elias_Ervela_files\n","[NbConvertApp] Making directory ./DAKD2020_ex3_Elias_Ervela_files\n","[NbConvertApp] Making directory ./DAKD2020_ex3_Elias_Ervela_files\n","[NbConvertApp] Making directory ./DAKD2020_ex3_Elias_Ervela_files\n","[NbConvertApp] Making directory ./DAKD2020_ex3_Elias_Ervela_files\n","[NbConvertApp] Making directory ./DAKD2020_ex3_Elias_Ervela_files\n","[NbConvertApp] Making directory ./DAKD2020_ex3_Elias_Ervela_files\n","[NbConvertApp] Making directory ./DAKD2020_ex3_Elias_Ervela_files\n","[NbConvertApp] Writing 108970 bytes to ./notebook.tex\n","[NbConvertApp] Building PDF\n","[NbConvertApp] Running xelatex 3 times: [u'xelatex', u'./notebook.tex', '-quiet']\n","[NbConvertApp] Running bibtex 1 time: [u'bibtex', u'./notebook']\n","[NbConvertApp] WARNING | bibtex had problems, most likely because there were no citations\n","[NbConvertApp] PDF successfully created\n","[NbConvertApp] Writing 150344 bytes to /content/drive/My Drive/DAKD2020_ex3_Elias_Ervela.pdf\n"],"name":"stdout"},{"output_type":"display_data","data":{"application/javascript":["\n","    async function download(id, filename, size) {\n","      if (!google.colab.kernel.accessAllowed) {\n","        return;\n","      }\n","      const div = document.createElement('div');\n","      const label = document.createElement('label');\n","      label.textContent = `Downloading \"${filename}\": `;\n","      div.appendChild(label);\n","      const progress = document.createElement('progress');\n","      progress.max = size;\n","      div.appendChild(progress);\n","      document.body.appendChild(div);\n","\n","      const buffers = [];\n","      let downloaded = 0;\n","\n","      const channel = await google.colab.kernel.comms.open(id);\n","      // Send a message to notify the kernel that we're ready.\n","      channel.send({})\n","\n","      for await (const message of channel.messages) {\n","        // Send a message to notify the kernel that we're ready.\n","        channel.send({})\n","        if (message.buffers) {\n","          for (const buffer of message.buffers) {\n","            buffers.push(buffer);\n","            downloaded += buffer.byteLength;\n","            progress.value = downloaded;\n","          }\n","        }\n","      }\n","      const blob = new Blob(buffers, {type: 'application/binary'});\n","      const a = document.createElement('a');\n","      a.href = window.URL.createObjectURL(blob);\n","      a.download = filename;\n","      div.appendChild(a);\n","      a.click();\n","      div.remove();\n","    }\n","  "],"text/plain":["<IPython.core.display.Javascript object>"]},"metadata":{"tags":[]}},{"output_type":"display_data","data":{"application/javascript":["download(\"download_44a7d4d4-3052-4e39-8884-f5c6e17f4abd\", \"DAKD2020_ex3_Elias_Ervela.pdf\", 150344)"],"text/plain":["<IPython.core.display.Javascript object>"]},"metadata":{"tags":[]}},{"output_type":"execute_result","data":{"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"},"text/plain":["'File ready to be Downloaded and Saved to Drive'"]},"metadata":{"tags":[]},"execution_count":46}]}]}
\ No newline at end of file