diff --git a/exercise_1.ipynb b/exercise_1.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..e9cd366a516cee08666e7ddcb16a3d283b9dfd1c
--- /dev/null
+++ b/exercise_1.ipynb
@@ -0,0 +1,1881 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Exercise 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 603,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import csv as C\n",
+    "import numpy as N\n",
+    "import pandas as P"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1.1 Data structures"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 604,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0    5\n",
+      "1    8\n",
+      "2    7\n",
+      "3    6\n",
+      "4    8\n",
+      "5    4\n",
+      "Name: A, dtype: int64\n",
+      "0    1.3\n",
+      "1    2.1\n",
+      "2    1.8\n",
+      "3    1.2\n",
+      "4    1.4\n",
+      "5    2.3\n",
+      "Name: B, dtype: float64\n",
+      "0    y\n",
+      "1    y\n",
+      "2    n\n",
+      "3    y\n",
+      "4    n\n",
+      "5    n\n",
+      "Name: C, dtype: object\n"
+     ]
+    }
+   ],
+   "source": [
+    "A = P.Series([5,8,7,6,8,4],\n",
+    "             name = \"A\")\n",
+    "B = P.Series([1.3, 2.1, 1.8, 1.2, 1.4, 2.3],\n",
+    "             name = \"B\")\n",
+    "C = P.Series([\"y\",\"y\",\"n\",\"y\",\"n\",\"n\"],\n",
+    "             name = \"C\")\n",
+    "\n",
+    "print(A)\n",
+    "print(B)\n",
+    "print(C)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 605,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>A</th>\n",
+       "      <th>B</th>\n",
+       "      <th>C</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>5</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>y</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>8</td>\n",
+       "      <td>2.1</td>\n",
+       "      <td>y</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>7</td>\n",
+       "      <td>1.8</td>\n",
+       "      <td>n</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>6</td>\n",
+       "      <td>1.2</td>\n",
+       "      <td>y</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>8</td>\n",
+       "      <td>1.4</td>\n",
+       "      <td>n</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2.3</td>\n",
+       "      <td>n</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   A    B  C\n",
+       "0  5  1.3  y\n",
+       "1  8  2.1  y\n",
+       "2  7  1.8  n\n",
+       "3  6  1.2  y\n",
+       "4  8  1.4  n\n",
+       "5  4  2.3  n"
+      ]
+     },
+     "execution_count": 605,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df1 = P.concat([A,B,C], axis=1)\n",
+    "df1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 606,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.8"
+      ]
+     },
+     "execution_count": 606,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df1.iloc[2,1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 607,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "A      6\n",
+       "B    1.2\n",
+       "C      y\n",
+       "Name: 3, dtype: object"
+      ]
+     },
+     "execution_count": 607,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df1.iloc[3]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 608,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>B</th>\n",
+       "      <th>C</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2.1</td>\n",
+       "      <td>y</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1.8</td>\n",
+       "      <td>n</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1.2</td>\n",
+       "      <td>y</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1.4</td>\n",
+       "      <td>n</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     B  C\n",
+       "1  2.1  y\n",
+       "2  1.8  n\n",
+       "3  1.2  y\n",
+       "4  1.4  n"
+      ]
+     },
+     "execution_count": 608,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "subset = df1.iloc[1:5,[1,2]]\n",
+    "subset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 609,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>A</th>\n",
+       "      <td>5</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B</th>\n",
+       "      <td>1.3</td>\n",
+       "      <td>2.1</td>\n",
+       "      <td>1.8</td>\n",
+       "      <td>1.2</td>\n",
+       "      <td>1.4</td>\n",
+       "      <td>2.3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>C</th>\n",
+       "      <td>y</td>\n",
+       "      <td>y</td>\n",
+       "      <td>n</td>\n",
+       "      <td>y</td>\n",
+       "      <td>n</td>\n",
+       "      <td>n</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     0    1    2    3    4    5\n",
+       "A    5    8    7    6    8    4\n",
+       "B  1.3  2.1  1.8  1.2  1.4  2.3\n",
+       "C    y    y    n    y    n    n"
+      ]
+     },
+     "execution_count": 609,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df1.transpose()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1.2 Thyroid Disease"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 610,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df2 = P.read_csv('allbp.data', header=None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 611,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
+       "      <th>26</th>\n",
+       "      <th>27</th>\n",
+       "      <th>28</th>\n",
+       "      <th>29</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>41</td>\n",
+       "      <td>F</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>125</td>\n",
+       "      <td>t</td>\n",
+       "      <td>1.14</td>\n",
+       "      <td>t</td>\n",
+       "      <td>109</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>SVHC</td>\n",
+       "      <td>negative.|3733</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>23</td>\n",
+       "      <td>F</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>102</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>other</td>\n",
+       "      <td>negative.|1442</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>46</td>\n",
+       "      <td>M</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>109</td>\n",
+       "      <td>t</td>\n",
+       "      <td>0.91</td>\n",
+       "      <td>t</td>\n",
+       "      <td>120</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>other</td>\n",
+       "      <td>negative.|2965</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>70</td>\n",
+       "      <td>F</td>\n",
+       "      <td>t</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>175</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>other</td>\n",
+       "      <td>negative.|806</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>70</td>\n",
+       "      <td>F</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>61</td>\n",
+       "      <td>t</td>\n",
+       "      <td>0.87</td>\n",
+       "      <td>t</td>\n",
+       "      <td>70</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>SVI</td>\n",
+       "      <td>negative.|2807</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 30 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   0  1  2  3  4  5  6  7  8  9   ... 20   21 22    23 24   25 26 27     28  \\\n",
+       "0  41  F  f  f  f  f  f  f  f  f  ...  t  125  t  1.14  t  109  f  ?   SVHC   \n",
+       "1  23  F  f  f  f  f  f  f  f  f  ...  t  102  f     ?  f    ?  f  ?  other   \n",
+       "2  46  M  f  f  f  f  f  f  f  f  ...  t  109  t  0.91  t  120  f  ?  other   \n",
+       "3  70  F  t  f  f  f  f  f  f  f  ...  t  175  f     ?  f    ?  f  ?  other   \n",
+       "4  70  F  f  f  f  f  f  f  f  f  ...  t   61  t  0.87  t   70  f  ?    SVI   \n",
+       "\n",
+       "               29  \n",
+       "0  negative.|3733  \n",
+       "1  negative.|1442  \n",
+       "2  negative.|2965  \n",
+       "3   negative.|806  \n",
+       "4  negative.|2807  \n",
+       "\n",
+       "[5 rows x 30 columns]"
+      ]
+     },
+     "execution_count": 611,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df2.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 612,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 2800 entries, 0 to 2799\n",
+      "Data columns (total 30 columns):\n",
+      " #   Column  Non-Null Count  Dtype \n",
+      "---  ------  --------------  ----- \n",
+      " 0   0       2800 non-null   object\n",
+      " 1   1       2800 non-null   object\n",
+      " 2   2       2800 non-null   object\n",
+      " 3   3       2800 non-null   object\n",
+      " 4   4       2800 non-null   object\n",
+      " 5   5       2800 non-null   object\n",
+      " 6   6       2800 non-null   object\n",
+      " 7   7       2800 non-null   object\n",
+      " 8   8       2800 non-null   object\n",
+      " 9   9       2800 non-null   object\n",
+      " 10  10      2800 non-null   object\n",
+      " 11  11      2800 non-null   object\n",
+      " 12  12      2800 non-null   object\n",
+      " 13  13      2800 non-null   object\n",
+      " 14  14      2800 non-null   object\n",
+      " 15  15      2800 non-null   object\n",
+      " 16  16      2800 non-null   object\n",
+      " 17  17      2800 non-null   object\n",
+      " 18  18      2800 non-null   object\n",
+      " 19  19      2800 non-null   object\n",
+      " 20  20      2800 non-null   object\n",
+      " 21  21      2800 non-null   object\n",
+      " 22  22      2800 non-null   object\n",
+      " 23  23      2800 non-null   object\n",
+      " 24  24      2800 non-null   object\n",
+      " 25  25      2800 non-null   object\n",
+      " 26  26      2800 non-null   object\n",
+      " 27  27      2800 non-null   object\n",
+      " 28  28      2800 non-null   object\n",
+      " 29  29      2800 non-null   object\n",
+      "dtypes: object(30)\n",
+      "memory usage: 656.4+ KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "df2.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 613,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
+       "      <th>26</th>\n",
+       "      <th>27</th>\n",
+       "      <th>28</th>\n",
+       "      <th>29</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>unique</th>\n",
+       "      <td>94</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>218</td>\n",
+       "      <td>2</td>\n",
+       "      <td>139</td>\n",
+       "      <td>2</td>\n",
+       "      <td>210</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>2800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>top</th>\n",
+       "      <td>59</td>\n",
+       "      <td>F</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>?</td>\n",
+       "      <td>t</td>\n",
+       "      <td>?</td>\n",
+       "      <td>t</td>\n",
+       "      <td>?</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>other</td>\n",
+       "      <td>negative.|3733</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>freq</th>\n",
+       "      <td>75</td>\n",
+       "      <td>1830</td>\n",
+       "      <td>2470</td>\n",
+       "      <td>2760</td>\n",
+       "      <td>2766</td>\n",
+       "      <td>2690</td>\n",
+       "      <td>2759</td>\n",
+       "      <td>2761</td>\n",
+       "      <td>2752</td>\n",
+       "      <td>2637</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2616</td>\n",
+       "      <td>184</td>\n",
+       "      <td>2503</td>\n",
+       "      <td>297</td>\n",
+       "      <td>2505</td>\n",
+       "      <td>295</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>2800</td>\n",
+       "      <td>1632</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>4 rows × 30 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          0     1     2     3     4     5     6     7     8     9   ...    20  \\\n",
+       "count   2800  2800  2800  2800  2800  2800  2800  2800  2800  2800  ...  2800   \n",
+       "unique    94     3     2     2     2     2     2     2     2     2  ...     2   \n",
+       "top       59     F     f     f     f     f     f     f     f     f  ...     t   \n",
+       "freq      75  1830  2470  2760  2766  2690  2759  2761  2752  2637  ...  2616   \n",
+       "\n",
+       "          21    22    23    24    25    26    27     28              29  \n",
+       "count   2800  2800  2800  2800  2800  2800  2800   2800            2800  \n",
+       "unique   218     2   139     2   210     1     1      5            2800  \n",
+       "top        ?     t     ?     t     ?     f     ?  other  negative.|3733  \n",
+       "freq     184  2503   297  2505   295  2800  2800   1632               1  \n",
+       "\n",
+       "[4 rows x 30 columns]"
+      ]
+     },
+     "execution_count": 613,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df2.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 614,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df2.columns =[\n",
+    "    \"age\",\n",
+    "    \"sex\",\n",
+    "    \"on thyroxine\",\n",
+    "    \"query on thyroxine\",\n",
+    "    \"on antithyroid medication\",\n",
+    "    \"sick\",\n",
+    "    \"pregnant\",\n",
+    "    \"thyroid surgery\",\n",
+    "    \"I131 treatment\",\n",
+    "    \"query hypothyroid\",\n",
+    "    \"query hyperthyroid\",\n",
+    "    \"lithium\",\n",
+    "    \"goitre\",\n",
+    "    \"tumor\",\n",
+    "    \"hypopituitary\",\n",
+    "    \"psych\",\n",
+    "    \"TSH measured\",\n",
+    "    \"TSH\",\n",
+    "    \"T3 measured\",\n",
+    "    \"T3\",\n",
+    "    \"TT4 measured\",\n",
+    "    \"TT4\",\n",
+    "    \"T4U measured\",\n",
+    "    \"T4U\",\n",
+    "    \"FTI measured\",\n",
+    "    \"FTI\",\n",
+    "    \"TBG measured\",\n",
+    "    \"TBG\",\n",
+    "    \"referral source\",\n",
+    "    \"classes\"\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 615,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>age</th>\n",
+       "      <th>sex</th>\n",
+       "      <th>on thyroxine</th>\n",
+       "      <th>query on thyroxine</th>\n",
+       "      <th>on antithyroid medication</th>\n",
+       "      <th>sick</th>\n",
+       "      <th>pregnant</th>\n",
+       "      <th>thyroid surgery</th>\n",
+       "      <th>I131 treatment</th>\n",
+       "      <th>query hypothyroid</th>\n",
+       "      <th>...</th>\n",
+       "      <th>TT4 measured</th>\n",
+       "      <th>TT4</th>\n",
+       "      <th>T4U measured</th>\n",
+       "      <th>T4U</th>\n",
+       "      <th>FTI measured</th>\n",
+       "      <th>FTI</th>\n",
+       "      <th>TBG measured</th>\n",
+       "      <th>TBG</th>\n",
+       "      <th>referral source</th>\n",
+       "      <th>classes</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>41</td>\n",
+       "      <td>F</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>125</td>\n",
+       "      <td>t</td>\n",
+       "      <td>1.14</td>\n",
+       "      <td>t</td>\n",
+       "      <td>109</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>SVHC</td>\n",
+       "      <td>negative.|3733</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>23</td>\n",
+       "      <td>F</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>102</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>other</td>\n",
+       "      <td>negative.|1442</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>46</td>\n",
+       "      <td>M</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>109</td>\n",
+       "      <td>t</td>\n",
+       "      <td>0.91</td>\n",
+       "      <td>t</td>\n",
+       "      <td>120</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>other</td>\n",
+       "      <td>negative.|2965</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>70</td>\n",
+       "      <td>F</td>\n",
+       "      <td>t</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>175</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>other</td>\n",
+       "      <td>negative.|806</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>70</td>\n",
+       "      <td>F</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>61</td>\n",
+       "      <td>t</td>\n",
+       "      <td>0.87</td>\n",
+       "      <td>t</td>\n",
+       "      <td>70</td>\n",
+       "      <td>f</td>\n",
+       "      <td>?</td>\n",
+       "      <td>SVI</td>\n",
+       "      <td>negative.|2807</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 30 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  age sex on thyroxine query on thyroxine on antithyroid medication sick  \\\n",
+       "0  41   F            f                  f                         f    f   \n",
+       "1  23   F            f                  f                         f    f   \n",
+       "2  46   M            f                  f                         f    f   \n",
+       "3  70   F            t                  f                         f    f   \n",
+       "4  70   F            f                  f                         f    f   \n",
+       "\n",
+       "  pregnant thyroid surgery I131 treatment query hypothyroid  ... TT4 measured  \\\n",
+       "0        f               f              f                 f  ...            t   \n",
+       "1        f               f              f                 f  ...            t   \n",
+       "2        f               f              f                 f  ...            t   \n",
+       "3        f               f              f                 f  ...            t   \n",
+       "4        f               f              f                 f  ...            t   \n",
+       "\n",
+       "   TT4 T4U measured   T4U FTI measured  FTI TBG measured TBG referral source  \\\n",
+       "0  125            t  1.14            t  109            f   ?            SVHC   \n",
+       "1  102            f     ?            f    ?            f   ?           other   \n",
+       "2  109            t  0.91            t  120            f   ?           other   \n",
+       "3  175            f     ?            f    ?            f   ?           other   \n",
+       "4   61            t  0.87            t   70            f   ?             SVI   \n",
+       "\n",
+       "          classes  \n",
+       "0  negative.|3733  \n",
+       "1  negative.|1442  \n",
+       "2  negative.|2965  \n",
+       "3   negative.|806  \n",
+       "4  negative.|2807  \n",
+       "\n",
+       "[5 rows x 30 columns]"
+      ]
+     },
+     "execution_count": 615,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df2.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 616,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(2800, 30)"
+      ]
+     },
+     "execution_count": 616,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df2.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- How many observations and how many variables are there in the data? <br>\n",
+    "2800 observations, 30 variables\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 617,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/fp/cf7b8z110lj8yjpy9rj8f5fr0000gn/T/ipykernel_5773/4070518099.py:1: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n",
+      "  df2.replace(['?', 'nan', 'missing'], N.nan, inplace=True)\n"
+     ]
+    }
+   ],
+   "source": [
+    "df2.replace(['?', 'nan', 'missing'], N.nan, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 618,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "age                             1\n",
+       "sex                           110\n",
+       "on thyroxine                    0\n",
+       "query on thyroxine              0\n",
+       "on antithyroid medication       0\n",
+       "sick                            0\n",
+       "pregnant                        0\n",
+       "thyroid surgery                 0\n",
+       "I131 treatment                  0\n",
+       "query hypothyroid               0\n",
+       "query hyperthyroid              0\n",
+       "lithium                         0\n",
+       "goitre                          0\n",
+       "tumor                           0\n",
+       "hypopituitary                   0\n",
+       "psych                           0\n",
+       "TSH measured                    0\n",
+       "TSH                           284\n",
+       "T3 measured                     0\n",
+       "T3                            585\n",
+       "TT4 measured                    0\n",
+       "TT4                           184\n",
+       "T4U measured                    0\n",
+       "T4U                           297\n",
+       "FTI measured                    0\n",
+       "FTI                           295\n",
+       "TBG measured                    0\n",
+       "TBG                          2800\n",
+       "referral source                 0\n",
+       "classes                         0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 618,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df2.isna().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 619,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "age                           object\n",
+       "sex                           object\n",
+       "on thyroxine                  object\n",
+       "query on thyroxine            object\n",
+       "on antithyroid medication     object\n",
+       "sick                          object\n",
+       "pregnant                      object\n",
+       "thyroid surgery               object\n",
+       "I131 treatment                object\n",
+       "query hypothyroid             object\n",
+       "query hyperthyroid            object\n",
+       "lithium                       object\n",
+       "goitre                        object\n",
+       "tumor                         object\n",
+       "hypopituitary                 object\n",
+       "psych                         object\n",
+       "TSH measured                  object\n",
+       "TSH                           object\n",
+       "T3 measured                   object\n",
+       "T3                            object\n",
+       "TT4 measured                  object\n",
+       "TT4                           object\n",
+       "T4U measured                  object\n",
+       "T4U                           object\n",
+       "FTI measured                  object\n",
+       "FTI                           object\n",
+       "TBG measured                  object\n",
+       "TBG                          float64\n",
+       "referral source               object\n",
+       "classes                       object\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 619,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df2.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 620,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "columns_to_change = [\"age\",\"TSH\", \"T3\", \"TT4\", \"T4U\", \"FTI\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 621,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for column in columns_to_change:\n",
+    "    df2[column] = df2[column].astype(float)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 622,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>age</th>\n",
+       "      <th>sex</th>\n",
+       "      <th>on thyroxine</th>\n",
+       "      <th>query on thyroxine</th>\n",
+       "      <th>on antithyroid medication</th>\n",
+       "      <th>sick</th>\n",
+       "      <th>pregnant</th>\n",
+       "      <th>thyroid surgery</th>\n",
+       "      <th>I131 treatment</th>\n",
+       "      <th>query hypothyroid</th>\n",
+       "      <th>...</th>\n",
+       "      <th>TT4 measured</th>\n",
+       "      <th>TT4</th>\n",
+       "      <th>T4U measured</th>\n",
+       "      <th>T4U</th>\n",
+       "      <th>FTI measured</th>\n",
+       "      <th>FTI</th>\n",
+       "      <th>TBG measured</th>\n",
+       "      <th>TBG</th>\n",
+       "      <th>referral source</th>\n",
+       "      <th>classes</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>41.0</td>\n",
+       "      <td>F</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>125.0</td>\n",
+       "      <td>t</td>\n",
+       "      <td>1.14</td>\n",
+       "      <td>t</td>\n",
+       "      <td>109.0</td>\n",
+       "      <td>f</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>SVHC</td>\n",
+       "      <td>negative.|3733</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>23.0</td>\n",
+       "      <td>F</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>102.0</td>\n",
+       "      <td>f</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>f</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>f</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>other</td>\n",
+       "      <td>negative.|1442</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>46.0</td>\n",
+       "      <td>M</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>109.0</td>\n",
+       "      <td>t</td>\n",
+       "      <td>0.91</td>\n",
+       "      <td>t</td>\n",
+       "      <td>120.0</td>\n",
+       "      <td>f</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>other</td>\n",
+       "      <td>negative.|2965</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>70.0</td>\n",
+       "      <td>F</td>\n",
+       "      <td>t</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>175.0</td>\n",
+       "      <td>f</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>f</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>f</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>other</td>\n",
+       "      <td>negative.|806</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>70.0</td>\n",
+       "      <td>F</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>t</td>\n",
+       "      <td>0.87</td>\n",
+       "      <td>t</td>\n",
+       "      <td>70.0</td>\n",
+       "      <td>f</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>SVI</td>\n",
+       "      <td>negative.|2807</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2795</th>\n",
+       "      <td>70.0</td>\n",
+       "      <td>M</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>155.0</td>\n",
+       "      <td>t</td>\n",
+       "      <td>1.05</td>\n",
+       "      <td>t</td>\n",
+       "      <td>148.0</td>\n",
+       "      <td>f</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>SVI</td>\n",
+       "      <td>negative.|3689</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2796</th>\n",
+       "      <td>73.0</td>\n",
+       "      <td>M</td>\n",
+       "      <td>f</td>\n",
+       "      <td>t</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>t</td>\n",
+       "      <td>0.88</td>\n",
+       "      <td>t</td>\n",
+       "      <td>72.0</td>\n",
+       "      <td>f</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>other</td>\n",
+       "      <td>negative.|3652</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2797</th>\n",
+       "      <td>75.0</td>\n",
+       "      <td>M</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>147.0</td>\n",
+       "      <td>t</td>\n",
+       "      <td>0.80</td>\n",
+       "      <td>t</td>\n",
+       "      <td>183.0</td>\n",
+       "      <td>f</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>other</td>\n",
+       "      <td>negative.|1287</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2798</th>\n",
+       "      <td>60.0</td>\n",
+       "      <td>F</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>t</td>\n",
+       "      <td>0.83</td>\n",
+       "      <td>t</td>\n",
+       "      <td>121.0</td>\n",
+       "      <td>f</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>other</td>\n",
+       "      <td>negative.|3496</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2799</th>\n",
+       "      <td>81.0</td>\n",
+       "      <td>F</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>f</td>\n",
+       "      <td>...</td>\n",
+       "      <td>t</td>\n",
+       "      <td>114.0</td>\n",
+       "      <td>t</td>\n",
+       "      <td>0.99</td>\n",
+       "      <td>t</td>\n",
+       "      <td>115.0</td>\n",
+       "      <td>f</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>SVI</td>\n",
+       "      <td>negative.|724</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2800 rows × 30 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       age sex on thyroxine query on thyroxine on antithyroid medication sick  \\\n",
+       "0     41.0   F            f                  f                         f    f   \n",
+       "1     23.0   F            f                  f                         f    f   \n",
+       "2     46.0   M            f                  f                         f    f   \n",
+       "3     70.0   F            t                  f                         f    f   \n",
+       "4     70.0   F            f                  f                         f    f   \n",
+       "...    ...  ..          ...                ...                       ...  ...   \n",
+       "2795  70.0   M            f                  f                         f    f   \n",
+       "2796  73.0   M            f                  t                         f    f   \n",
+       "2797  75.0   M            f                  f                         f    f   \n",
+       "2798  60.0   F            f                  f                         f    f   \n",
+       "2799  81.0   F            f                  f                         f    f   \n",
+       "\n",
+       "     pregnant thyroid surgery I131 treatment query hypothyroid  ...  \\\n",
+       "0           f               f              f                 f  ...   \n",
+       "1           f               f              f                 f  ...   \n",
+       "2           f               f              f                 f  ...   \n",
+       "3           f               f              f                 f  ...   \n",
+       "4           f               f              f                 f  ...   \n",
+       "...       ...             ...            ...               ...  ...   \n",
+       "2795        f               f              f                 f  ...   \n",
+       "2796        f               f              f                 f  ...   \n",
+       "2797        f               f              f                 f  ...   \n",
+       "2798        f               f              f                 f  ...   \n",
+       "2799        f               f              f                 f  ...   \n",
+       "\n",
+       "     TT4 measured    TT4 T4U measured   T4U FTI measured    FTI TBG measured  \\\n",
+       "0               t  125.0            t  1.14            t  109.0            f   \n",
+       "1               t  102.0            f   NaN            f    NaN            f   \n",
+       "2               t  109.0            t  0.91            t  120.0            f   \n",
+       "3               t  175.0            f   NaN            f    NaN            f   \n",
+       "4               t   61.0            t  0.87            t   70.0            f   \n",
+       "...           ...    ...          ...   ...          ...    ...          ...   \n",
+       "2795            t  155.0            t  1.05            t  148.0            f   \n",
+       "2796            t   63.0            t  0.88            t   72.0            f   \n",
+       "2797            t  147.0            t  0.80            t  183.0            f   \n",
+       "2798            t  100.0            t  0.83            t  121.0            f   \n",
+       "2799            t  114.0            t  0.99            t  115.0            f   \n",
+       "\n",
+       "      TBG referral source         classes  \n",
+       "0     NaN            SVHC  negative.|3733  \n",
+       "1     NaN           other  negative.|1442  \n",
+       "2     NaN           other  negative.|2965  \n",
+       "3     NaN           other   negative.|806  \n",
+       "4     NaN             SVI  negative.|2807  \n",
+       "...   ...             ...             ...  \n",
+       "2795  NaN             SVI  negative.|3689  \n",
+       "2796  NaN           other  negative.|3652  \n",
+       "2797  NaN           other  negative.|1287  \n",
+       "2798  NaN           other  negative.|3496  \n",
+       "2799  NaN             SVI   negative.|724  \n",
+       "\n",
+       "[2800 rows x 30 columns]"
+      ]
+     },
+     "execution_count": 622,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 623,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "age                          float64\n",
+       "sex                           object\n",
+       "on thyroxine                  object\n",
+       "query on thyroxine            object\n",
+       "on antithyroid medication     object\n",
+       "sick                          object\n",
+       "pregnant                      object\n",
+       "thyroid surgery               object\n",
+       "I131 treatment                object\n",
+       "query hypothyroid             object\n",
+       "query hyperthyroid            object\n",
+       "lithium                       object\n",
+       "goitre                        object\n",
+       "tumor                         object\n",
+       "hypopituitary                 object\n",
+       "psych                         object\n",
+       "TSH measured                  object\n",
+       "TSH                          float64\n",
+       "T3 measured                   object\n",
+       "T3                           float64\n",
+       "TT4 measured                  object\n",
+       "TT4                          float64\n",
+       "T4U measured                  object\n",
+       "T4U                          float64\n",
+       "FTI measured                  object\n",
+       "FTI                          float64\n",
+       "TBG measured                  object\n",
+       "TBG                          float64\n",
+       "referral source               object\n",
+       "classes                       object\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 623,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df2.dtypes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1.3 Thyroid disease (continued)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 624,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "yes_no_columns = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 26]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 625,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Column: on thyroxine %Yes: 11.79%\n",
+      "\n",
+      "Column: query on thyroxine %Yes: 1.43%\n",
+      "\n",
+      "Column: on antithyroid medication %Yes: 1.21%\n",
+      "\n",
+      "Column: sick %Yes: 3.93%\n",
+      "\n",
+      "Column: pregnant %Yes: 1.46%\n",
+      "\n",
+      "Column: thyroid surgery %Yes: 1.39%\n",
+      "\n",
+      "Column: I131 treatment %Yes: 1.71%\n",
+      "\n",
+      "Column: query hypothyroid %Yes: 5.82%\n",
+      "\n",
+      "Column: query hyperthyroid %Yes: 6.18%\n",
+      "\n",
+      "Column: lithium %Yes: 0.50%\n",
+      "\n",
+      "Column: goitre %Yes: 0.89%\n",
+      "\n",
+      "Column: tumor %Yes: 2.54%\n",
+      "\n",
+      "Column: hypopituitary %Yes: 0.04%\n",
+      "\n",
+      "Column: psych %Yes: 4.82%\n",
+      "\n",
+      "Column: TSH measured %Yes: 89.86%\n",
+      "\n",
+      "Column: T3 measured %Yes: 79.11%\n",
+      "\n",
+      "Column: TT4 measured %Yes: 93.43%\n",
+      "\n",
+      "Column: T4U measured %Yes: 89.39%\n",
+      "\n",
+      "Column: FTI measured %Yes: 89.46%\n",
+      "\n",
+      "Column: TBG measured %Yes: 0.00%\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "for column in yes_no_columns:\n",
+    "    yes_count = ((df2.iloc[:, column] == \"t\").sum())/2800\n",
+    "    print(f\"Column: {df2.columns[column]} %Yes: {yes_count:.2%}\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 626,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "calculate_columns = [\"TSH\", \"T3\", \"TT4\", \"T4U\", \"FTI\", \"TBG\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 627,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "481.7251481915739"
+      ]
+     },
+     "execution_count": 627,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "((df2[\"TSH\"] ** 2).sum())/df2[\"TSH\"].notna().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 628,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0       1.212020e+06\n",
+      "1       1.212020e+06\n",
+      "2       1.212020e+06\n",
+      "3       1.212020e+06\n",
+      "4       1.212020e+06\n",
+      "            ...     \n",
+      "2795    1.212020e+06\n",
+      "2796             inf\n",
+      "2797             inf\n",
+      "2798    1.212020e+06\n",
+      "2799    1.212020e+06\n",
+      "Name: TSH, Length: 2800, dtype: float64\n",
+      "0       10588.025\n",
+      "1       10588.025\n",
+      "2             inf\n",
+      "3       10588.025\n",
+      "4       10588.025\n",
+      "          ...    \n",
+      "2795          inf\n",
+      "2796    10588.025\n",
+      "2797          inf\n",
+      "2798          inf\n",
+      "2799    10588.025\n",
+      "Name: T3, Length: 2800, dtype: float64\n",
+      "0       34397613.32\n",
+      "1       34397613.32\n",
+      "2       34397613.32\n",
+      "3       34397613.32\n",
+      "4       34397613.32\n",
+      "           ...     \n",
+      "2795    34397613.32\n",
+      "2796    34397613.32\n",
+      "2797    34397613.32\n",
+      "2798    34397613.32\n",
+      "2799    34397613.32\n",
+      "Name: TT4, Length: 2800, dtype: float64\n",
+      "0       2587.103636\n",
+      "1               inf\n",
+      "2       2587.103636\n",
+      "3               inf\n",
+      "4       2587.103636\n",
+      "           ...     \n",
+      "2795    2587.103636\n",
+      "2796    2587.103636\n",
+      "2797    2587.103636\n",
+      "2798    2587.103636\n",
+      "2799    2587.103636\n",
+      "Name: T4U, Length: 2800, dtype: float64\n",
+      "0       33454030.13\n",
+      "1               inf\n",
+      "2       33454030.13\n",
+      "3               inf\n",
+      "4       33454030.13\n",
+      "           ...     \n",
+      "2795    33454030.13\n",
+      "2796    33454030.13\n",
+      "2797    33454030.13\n",
+      "2798    33454030.13\n",
+      "2799    33454030.13\n",
+      "Name: FTI, Length: 2800, dtype: float64\n",
+      "0      NaN\n",
+      "1      NaN\n",
+      "2      NaN\n",
+      "3      NaN\n",
+      "4      NaN\n",
+      "        ..\n",
+      "2795   NaN\n",
+      "2796   NaN\n",
+      "2797   NaN\n",
+      "2798   NaN\n",
+      "2799   NaN\n",
+      "Name: TBG, Length: 2800, dtype: float64\n"
+     ]
+    }
+   ],
+   "source": [
+    "for column in calculate_columns:\n",
+    "    calculation = ((df2[column] ** 2).sum())/(df2[column].notna())\n",
+    "    print(calculation)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}