diff --git a/exercise_1.ipynb b/exercise_1.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..e9cd366a516cee08666e7ddcb16a3d283b9dfd1c --- /dev/null +++ b/exercise_1.ipynb @@ -0,0 +1,1881 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Exercise 1" + ] + }, + { + "cell_type": "code", + "execution_count": 603, + "metadata": {}, + "outputs": [], + "source": [ + "import csv as C\n", + "import numpy as N\n", + "import pandas as P" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.1 Data structures" + ] + }, + { + "cell_type": "code", + "execution_count": 604, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 5\n", + "1 8\n", + "2 7\n", + "3 6\n", + "4 8\n", + "5 4\n", + "Name: A, dtype: int64\n", + "0 1.3\n", + "1 2.1\n", + "2 1.8\n", + "3 1.2\n", + "4 1.4\n", + "5 2.3\n", + "Name: B, dtype: float64\n", + "0 y\n", + "1 y\n", + "2 n\n", + "3 y\n", + "4 n\n", + "5 n\n", + "Name: C, dtype: object\n" + ] + } + ], + "source": [ + "A = P.Series([5,8,7,6,8,4],\n", + " name = \"A\")\n", + "B = P.Series([1.3, 2.1, 1.8, 1.2, 1.4, 2.3],\n", + " name = \"B\")\n", + "C = P.Series([\"y\",\"y\",\"n\",\"y\",\"n\",\"n\"],\n", + " name = \"C\")\n", + "\n", + "print(A)\n", + "print(B)\n", + "print(C)" + ] + }, + { + "cell_type": "code", + "execution_count": 605, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>A</th>\n", + " <th>B</th>\n", + " <th>C</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>5</td>\n", + " <td>1.3</td>\n", + " <td>y</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>8</td>\n", + " <td>2.1</td>\n", + " <td>y</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>7</td>\n", + " <td>1.8</td>\n", + " <td>n</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>6</td>\n", + " <td>1.2</td>\n", + " <td>y</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>8</td>\n", + " <td>1.4</td>\n", + " <td>n</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>4</td>\n", + " <td>2.3</td>\n", + " <td>n</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " A B C\n", + "0 5 1.3 y\n", + "1 8 2.1 y\n", + "2 7 1.8 n\n", + "3 6 1.2 y\n", + "4 8 1.4 n\n", + "5 4 2.3 n" + ] + }, + "execution_count": 605, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1 = P.concat([A,B,C], axis=1)\n", + "df1" + ] + }, + { + "cell_type": "code", + "execution_count": 606, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.8" + ] + }, + "execution_count": 606, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.iloc[2,1]" + ] + }, + { + "cell_type": "code", + "execution_count": 607, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "A 6\n", + "B 1.2\n", + "C y\n", + "Name: 3, dtype: object" + ] + }, + "execution_count": 607, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.iloc[3]" + ] + }, + { + "cell_type": "code", + "execution_count": 608, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>B</th>\n", + " <th>C</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>2.1</td>\n", + " <td>y</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>1.8</td>\n", + " <td>n</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>1.2</td>\n", + " <td>y</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>1.4</td>\n", + " <td>n</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " B C\n", + "1 2.1 y\n", + "2 1.8 n\n", + "3 1.2 y\n", + "4 1.4 n" + ] + }, + "execution_count": 608, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "subset = df1.iloc[1:5,[1,2]]\n", + "subset" + ] + }, + { + "cell_type": "code", + "execution_count": 609, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " <th>4</th>\n", + " <th>5</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>A</th>\n", + " <td>5</td>\n", + " <td>8</td>\n", + " <td>7</td>\n", + " <td>6</td>\n", + " <td>8</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>B</th>\n", + " <td>1.3</td>\n", + " <td>2.1</td>\n", + " <td>1.8</td>\n", + " <td>1.2</td>\n", + " <td>1.4</td>\n", + " <td>2.3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>C</th>\n", + " <td>y</td>\n", + " <td>y</td>\n", + " <td>n</td>\n", + " <td>y</td>\n", + " <td>n</td>\n", + " <td>n</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2 3 4 5\n", + "A 5 8 7 6 8 4\n", + "B 1.3 2.1 1.8 1.2 1.4 2.3\n", + "C y y n y n n" + ] + }, + "execution_count": 609, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.transpose()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.2 Thyroid Disease" + ] + }, + { + "cell_type": "code", + "execution_count": 610, + "metadata": {}, + "outputs": [], + "source": [ + "df2 = P.read_csv('allbp.data', header=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 611, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " <th>4</th>\n", + " <th>5</th>\n", + " <th>6</th>\n", + " <th>7</th>\n", + " <th>8</th>\n", + " <th>9</th>\n", + " <th>...</th>\n", + " <th>20</th>\n", + " <th>21</th>\n", + " <th>22</th>\n", + " <th>23</th>\n", + " <th>24</th>\n", + " <th>25</th>\n", + " <th>26</th>\n", + " <th>27</th>\n", + " <th>28</th>\n", + " <th>29</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>41</td>\n", + " <td>F</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>125</td>\n", + " <td>t</td>\n", + " <td>1.14</td>\n", + " <td>t</td>\n", + " <td>109</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>SVHC</td>\n", + " <td>negative.|3733</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>23</td>\n", + " <td>F</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>102</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>other</td>\n", + " <td>negative.|1442</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>46</td>\n", + " <td>M</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>109</td>\n", + " <td>t</td>\n", + " <td>0.91</td>\n", + " <td>t</td>\n", + " <td>120</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>other</td>\n", + " <td>negative.|2965</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>70</td>\n", + " <td>F</td>\n", + " <td>t</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>175</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>other</td>\n", + " <td>negative.|806</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>70</td>\n", + " <td>F</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>61</td>\n", + " <td>t</td>\n", + " <td>0.87</td>\n", + " <td>t</td>\n", + " <td>70</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>SVI</td>\n", + " <td>negative.|2807</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 30 columns</p>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 ... 20 21 22 23 24 25 26 27 28 \\\n", + "0 41 F f f f f f f f f ... t 125 t 1.14 t 109 f ? SVHC \n", + "1 23 F f f f f f f f f ... t 102 f ? f ? f ? other \n", + "2 46 M f f f f f f f f ... t 109 t 0.91 t 120 f ? other \n", + "3 70 F t f f f f f f f ... t 175 f ? f ? f ? other \n", + "4 70 F f f f f f f f f ... t 61 t 0.87 t 70 f ? SVI \n", + "\n", + " 29 \n", + "0 negative.|3733 \n", + "1 negative.|1442 \n", + "2 negative.|2965 \n", + "3 negative.|806 \n", + "4 negative.|2807 \n", + "\n", + "[5 rows x 30 columns]" + ] + }, + "execution_count": 611, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 612, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 2800 entries, 0 to 2799\n", + "Data columns (total 30 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 0 2800 non-null object\n", + " 1 1 2800 non-null object\n", + " 2 2 2800 non-null object\n", + " 3 3 2800 non-null object\n", + " 4 4 2800 non-null object\n", + " 5 5 2800 non-null object\n", + " 6 6 2800 non-null object\n", + " 7 7 2800 non-null object\n", + " 8 8 2800 non-null object\n", + " 9 9 2800 non-null object\n", + " 10 10 2800 non-null object\n", + " 11 11 2800 non-null object\n", + " 12 12 2800 non-null object\n", + " 13 13 2800 non-null object\n", + " 14 14 2800 non-null object\n", + " 15 15 2800 non-null object\n", + " 16 16 2800 non-null object\n", + " 17 17 2800 non-null object\n", + " 18 18 2800 non-null object\n", + " 19 19 2800 non-null object\n", + " 20 20 2800 non-null object\n", + " 21 21 2800 non-null object\n", + " 22 22 2800 non-null object\n", + " 23 23 2800 non-null object\n", + " 24 24 2800 non-null object\n", + " 25 25 2800 non-null object\n", + " 26 26 2800 non-null object\n", + " 27 27 2800 non-null object\n", + " 28 28 2800 non-null object\n", + " 29 29 2800 non-null object\n", + "dtypes: object(30)\n", + "memory usage: 656.4+ KB\n" + ] + } + ], + "source": [ + "df2.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 613, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " <th>4</th>\n", + " <th>5</th>\n", + " <th>6</th>\n", + " <th>7</th>\n", + " <th>8</th>\n", + " <th>9</th>\n", + " <th>...</th>\n", + " <th>20</th>\n", + " <th>21</th>\n", + " <th>22</th>\n", + " <th>23</th>\n", + " <th>24</th>\n", + " <th>25</th>\n", + " <th>26</th>\n", + " <th>27</th>\n", + " <th>28</th>\n", + " <th>29</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>count</th>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>...</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " </tr>\n", + " <tr>\n", + " <th>unique</th>\n", + " <td>94</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>...</td>\n", + " <td>2</td>\n", + " <td>218</td>\n", + " <td>2</td>\n", + " <td>139</td>\n", + " <td>2</td>\n", + " <td>210</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " <td>2800</td>\n", + " </tr>\n", + " <tr>\n", + " <th>top</th>\n", + " <td>59</td>\n", + " <td>F</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>?</td>\n", + " <td>t</td>\n", + " <td>?</td>\n", + " <td>t</td>\n", + " <td>?</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>other</td>\n", + " <td>negative.|3733</td>\n", + " </tr>\n", + " <tr>\n", + " <th>freq</th>\n", + " <td>75</td>\n", + " <td>1830</td>\n", + " <td>2470</td>\n", + " <td>2760</td>\n", + " <td>2766</td>\n", + " <td>2690</td>\n", + " <td>2759</td>\n", + " <td>2761</td>\n", + " <td>2752</td>\n", + " <td>2637</td>\n", + " <td>...</td>\n", + " <td>2616</td>\n", + " <td>184</td>\n", + " <td>2503</td>\n", + " <td>297</td>\n", + " <td>2505</td>\n", + " <td>295</td>\n", + " <td>2800</td>\n", + " <td>2800</td>\n", + " <td>1632</td>\n", + " <td>1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>4 rows × 30 columns</p>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 ... 20 \\\n", + "count 2800 2800 2800 2800 2800 2800 2800 2800 2800 2800 ... 2800 \n", + "unique 94 3 2 2 2 2 2 2 2 2 ... 2 \n", + "top 59 F f f f f f f f f ... t \n", + "freq 75 1830 2470 2760 2766 2690 2759 2761 2752 2637 ... 2616 \n", + "\n", + " 21 22 23 24 25 26 27 28 29 \n", + "count 2800 2800 2800 2800 2800 2800 2800 2800 2800 \n", + "unique 218 2 139 2 210 1 1 5 2800 \n", + "top ? t ? t ? f ? other negative.|3733 \n", + "freq 184 2503 297 2505 295 2800 2800 1632 1 \n", + "\n", + "[4 rows x 30 columns]" + ] + }, + "execution_count": 613, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 614, + "metadata": {}, + "outputs": [], + "source": [ + "df2.columns =[\n", + " \"age\",\n", + " \"sex\",\n", + " \"on thyroxine\",\n", + " \"query on thyroxine\",\n", + " \"on antithyroid medication\",\n", + " \"sick\",\n", + " \"pregnant\",\n", + " \"thyroid surgery\",\n", + " \"I131 treatment\",\n", + " \"query hypothyroid\",\n", + " \"query hyperthyroid\",\n", + " \"lithium\",\n", + " \"goitre\",\n", + " \"tumor\",\n", + " \"hypopituitary\",\n", + " \"psych\",\n", + " \"TSH measured\",\n", + " \"TSH\",\n", + " \"T3 measured\",\n", + " \"T3\",\n", + " \"TT4 measured\",\n", + " \"TT4\",\n", + " \"T4U measured\",\n", + " \"T4U\",\n", + " \"FTI measured\",\n", + " \"FTI\",\n", + " \"TBG measured\",\n", + " \"TBG\",\n", + " \"referral source\",\n", + " \"classes\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 615, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>age</th>\n", + " <th>sex</th>\n", + " <th>on thyroxine</th>\n", + " <th>query on thyroxine</th>\n", + " <th>on antithyroid medication</th>\n", + " <th>sick</th>\n", + " <th>pregnant</th>\n", + " <th>thyroid surgery</th>\n", + " <th>I131 treatment</th>\n", + " <th>query hypothyroid</th>\n", + " <th>...</th>\n", + " <th>TT4 measured</th>\n", + " <th>TT4</th>\n", + " <th>T4U measured</th>\n", + " <th>T4U</th>\n", + " <th>FTI measured</th>\n", + " <th>FTI</th>\n", + " <th>TBG measured</th>\n", + " <th>TBG</th>\n", + " <th>referral source</th>\n", + " <th>classes</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>41</td>\n", + " <td>F</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>125</td>\n", + " <td>t</td>\n", + " <td>1.14</td>\n", + " <td>t</td>\n", + " <td>109</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>SVHC</td>\n", + " <td>negative.|3733</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>23</td>\n", + " <td>F</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>102</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>other</td>\n", + " <td>negative.|1442</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>46</td>\n", + " <td>M</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>109</td>\n", + " <td>t</td>\n", + " <td>0.91</td>\n", + " <td>t</td>\n", + " <td>120</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>other</td>\n", + " <td>negative.|2965</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>70</td>\n", + " <td>F</td>\n", + " <td>t</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>175</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>other</td>\n", + " <td>negative.|806</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>70</td>\n", + " <td>F</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>61</td>\n", + " <td>t</td>\n", + " <td>0.87</td>\n", + " <td>t</td>\n", + " <td>70</td>\n", + " <td>f</td>\n", + " <td>?</td>\n", + " <td>SVI</td>\n", + " <td>negative.|2807</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 30 columns</p>\n", + "</div>" + ], + "text/plain": [ + " age sex on thyroxine query on thyroxine on antithyroid medication sick \\\n", + "0 41 F f f f f \n", + "1 23 F f f f f \n", + "2 46 M f f f f \n", + "3 70 F t f f f \n", + "4 70 F f f f f \n", + "\n", + " pregnant thyroid surgery I131 treatment query hypothyroid ... TT4 measured \\\n", + "0 f f f f ... t \n", + "1 f f f f ... t \n", + "2 f f f f ... t \n", + "3 f f f f ... t \n", + "4 f f f f ... t \n", + "\n", + " TT4 T4U measured T4U FTI measured FTI TBG measured TBG referral source \\\n", + "0 125 t 1.14 t 109 f ? SVHC \n", + "1 102 f ? f ? f ? other \n", + "2 109 t 0.91 t 120 f ? other \n", + "3 175 f ? f ? f ? other \n", + "4 61 t 0.87 t 70 f ? SVI \n", + "\n", + " classes \n", + "0 negative.|3733 \n", + "1 negative.|1442 \n", + "2 negative.|2965 \n", + "3 negative.|806 \n", + "4 negative.|2807 \n", + "\n", + "[5 rows x 30 columns]" + ] + }, + "execution_count": 615, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 616, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2800, 30)" + ] + }, + "execution_count": 616, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- How many observations and how many variables are there in the data? <br>\n", + "2800 observations, 30 variables\n" + ] + }, + { + "cell_type": "code", + "execution_count": 617, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/fp/cf7b8z110lj8yjpy9rj8f5fr0000gn/T/ipykernel_5773/4070518099.py:1: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " df2.replace(['?', 'nan', 'missing'], N.nan, inplace=True)\n" + ] + } + ], + "source": [ + "df2.replace(['?', 'nan', 'missing'], N.nan, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 618, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "age 1\n", + "sex 110\n", + "on thyroxine 0\n", + "query on thyroxine 0\n", + "on antithyroid medication 0\n", + "sick 0\n", + "pregnant 0\n", + "thyroid surgery 0\n", + "I131 treatment 0\n", + "query hypothyroid 0\n", + "query hyperthyroid 0\n", + "lithium 0\n", + "goitre 0\n", + "tumor 0\n", + "hypopituitary 0\n", + "psych 0\n", + "TSH measured 0\n", + "TSH 284\n", + "T3 measured 0\n", + "T3 585\n", + "TT4 measured 0\n", + "TT4 184\n", + "T4U measured 0\n", + "T4U 297\n", + "FTI measured 0\n", + "FTI 295\n", + "TBG measured 0\n", + "TBG 2800\n", + "referral source 0\n", + "classes 0\n", + "dtype: int64" + ] + }, + "execution_count": 618, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 619, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "age object\n", + "sex object\n", + "on thyroxine object\n", + "query on thyroxine object\n", + "on antithyroid medication object\n", + "sick object\n", + "pregnant object\n", + "thyroid surgery object\n", + "I131 treatment object\n", + "query hypothyroid object\n", + "query hyperthyroid object\n", + "lithium object\n", + "goitre object\n", + "tumor object\n", + "hypopituitary object\n", + "psych object\n", + "TSH measured object\n", + "TSH object\n", + "T3 measured object\n", + "T3 object\n", + "TT4 measured object\n", + "TT4 object\n", + "T4U measured object\n", + "T4U object\n", + "FTI measured object\n", + "FTI object\n", + "TBG measured object\n", + "TBG float64\n", + "referral source object\n", + "classes object\n", + "dtype: object" + ] + }, + "execution_count": 619, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 620, + "metadata": {}, + "outputs": [], + "source": [ + "columns_to_change = [\"age\",\"TSH\", \"T3\", \"TT4\", \"T4U\", \"FTI\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 621, + "metadata": {}, + "outputs": [], + "source": [ + "for column in columns_to_change:\n", + " df2[column] = df2[column].astype(float)" + ] + }, + { + "cell_type": "code", + "execution_count": 622, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>age</th>\n", + " <th>sex</th>\n", + " <th>on thyroxine</th>\n", + " <th>query on thyroxine</th>\n", + " <th>on antithyroid medication</th>\n", + " <th>sick</th>\n", + " <th>pregnant</th>\n", + " <th>thyroid surgery</th>\n", + " <th>I131 treatment</th>\n", + " <th>query hypothyroid</th>\n", + " <th>...</th>\n", + " <th>TT4 measured</th>\n", + " <th>TT4</th>\n", + " <th>T4U measured</th>\n", + " <th>T4U</th>\n", + " <th>FTI measured</th>\n", + " <th>FTI</th>\n", + " <th>TBG measured</th>\n", + " <th>TBG</th>\n", + " <th>referral source</th>\n", + " <th>classes</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>41.0</td>\n", + " <td>F</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>125.0</td>\n", + " <td>t</td>\n", + " <td>1.14</td>\n", + " <td>t</td>\n", + " <td>109.0</td>\n", + " <td>f</td>\n", + " <td>NaN</td>\n", + " <td>SVHC</td>\n", + " <td>negative.|3733</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>23.0</td>\n", + " <td>F</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>102.0</td>\n", + " <td>f</td>\n", + " <td>NaN</td>\n", + " <td>f</td>\n", + " <td>NaN</td>\n", + " <td>f</td>\n", + " <td>NaN</td>\n", + " <td>other</td>\n", + " <td>negative.|1442</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>46.0</td>\n", + " <td>M</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>109.0</td>\n", + " <td>t</td>\n", + " <td>0.91</td>\n", + " <td>t</td>\n", + " <td>120.0</td>\n", + " <td>f</td>\n", + " <td>NaN</td>\n", + " <td>other</td>\n", + " <td>negative.|2965</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>70.0</td>\n", + " <td>F</td>\n", + " <td>t</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>175.0</td>\n", + " <td>f</td>\n", + " <td>NaN</td>\n", + " <td>f</td>\n", + " <td>NaN</td>\n", + " <td>f</td>\n", + " <td>NaN</td>\n", + " <td>other</td>\n", + " <td>negative.|806</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>70.0</td>\n", + " <td>F</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>61.0</td>\n", + " <td>t</td>\n", + " <td>0.87</td>\n", + " <td>t</td>\n", + " <td>70.0</td>\n", + " <td>f</td>\n", + " <td>NaN</td>\n", + " <td>SVI</td>\n", + " <td>negative.|2807</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2795</th>\n", + " <td>70.0</td>\n", + " <td>M</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>155.0</td>\n", + " <td>t</td>\n", + " <td>1.05</td>\n", + " <td>t</td>\n", + " <td>148.0</td>\n", + " <td>f</td>\n", + " <td>NaN</td>\n", + " <td>SVI</td>\n", + " <td>negative.|3689</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2796</th>\n", + " <td>73.0</td>\n", + " <td>M</td>\n", + " <td>f</td>\n", + " <td>t</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>63.0</td>\n", + " <td>t</td>\n", + " <td>0.88</td>\n", + " <td>t</td>\n", + " <td>72.0</td>\n", + " <td>f</td>\n", + " <td>NaN</td>\n", + " <td>other</td>\n", + " <td>negative.|3652</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2797</th>\n", + " <td>75.0</td>\n", + " <td>M</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>147.0</td>\n", + " <td>t</td>\n", + " <td>0.80</td>\n", + " <td>t</td>\n", + " <td>183.0</td>\n", + " <td>f</td>\n", + " <td>NaN</td>\n", + " <td>other</td>\n", + " <td>negative.|1287</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2798</th>\n", + " <td>60.0</td>\n", + " <td>F</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>100.0</td>\n", + " <td>t</td>\n", + " <td>0.83</td>\n", + " <td>t</td>\n", + " <td>121.0</td>\n", + " <td>f</td>\n", + " <td>NaN</td>\n", + " <td>other</td>\n", + " <td>negative.|3496</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2799</th>\n", + " <td>81.0</td>\n", + " <td>F</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>f</td>\n", + " <td>...</td>\n", + " <td>t</td>\n", + " <td>114.0</td>\n", + " <td>t</td>\n", + " <td>0.99</td>\n", + " <td>t</td>\n", + " <td>115.0</td>\n", + " <td>f</td>\n", + " <td>NaN</td>\n", + " <td>SVI</td>\n", + " <td>negative.|724</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>2800 rows × 30 columns</p>\n", + "</div>" + ], + "text/plain": [ + " age sex on thyroxine query on thyroxine on antithyroid medication sick \\\n", + "0 41.0 F f f f f \n", + "1 23.0 F f f f f \n", + "2 46.0 M f f f f \n", + "3 70.0 F t f f f \n", + "4 70.0 F f f f f \n", + "... ... .. ... ... ... ... \n", + "2795 70.0 M f f f f \n", + "2796 73.0 M f t f f \n", + "2797 75.0 M f f f f \n", + "2798 60.0 F f f f f \n", + "2799 81.0 F f f f f \n", + "\n", + " pregnant thyroid surgery I131 treatment query hypothyroid ... \\\n", + "0 f f f f ... \n", + "1 f f f f ... \n", + "2 f f f f ... \n", + "3 f f f f ... \n", + "4 f f f f ... \n", + "... ... ... ... ... ... \n", + "2795 f f f f ... \n", + "2796 f f f f ... \n", + "2797 f f f f ... \n", + "2798 f f f f ... \n", + "2799 f f f f ... \n", + "\n", + " TT4 measured TT4 T4U measured T4U FTI measured FTI TBG measured \\\n", + "0 t 125.0 t 1.14 t 109.0 f \n", + "1 t 102.0 f NaN f NaN f \n", + "2 t 109.0 t 0.91 t 120.0 f \n", + "3 t 175.0 f NaN f NaN f \n", + "4 t 61.0 t 0.87 t 70.0 f \n", + "... ... ... ... ... ... ... ... \n", + "2795 t 155.0 t 1.05 t 148.0 f \n", + "2796 t 63.0 t 0.88 t 72.0 f \n", + "2797 t 147.0 t 0.80 t 183.0 f \n", + "2798 t 100.0 t 0.83 t 121.0 f \n", + "2799 t 114.0 t 0.99 t 115.0 f \n", + "\n", + " TBG referral source classes \n", + "0 NaN SVHC negative.|3733 \n", + "1 NaN other negative.|1442 \n", + "2 NaN other negative.|2965 \n", + "3 NaN other negative.|806 \n", + "4 NaN SVI negative.|2807 \n", + "... ... ... ... \n", + "2795 NaN SVI negative.|3689 \n", + "2796 NaN other negative.|3652 \n", + "2797 NaN other negative.|1287 \n", + "2798 NaN other negative.|3496 \n", + "2799 NaN SVI negative.|724 \n", + "\n", + "[2800 rows x 30 columns]" + ] + }, + "execution_count": 622, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": 623, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "age float64\n", + "sex object\n", + "on thyroxine object\n", + "query on thyroxine object\n", + "on antithyroid medication object\n", + "sick object\n", + "pregnant object\n", + "thyroid surgery object\n", + "I131 treatment object\n", + "query hypothyroid object\n", + "query hyperthyroid object\n", + "lithium object\n", + "goitre object\n", + "tumor object\n", + "hypopituitary object\n", + "psych object\n", + "TSH measured object\n", + "TSH float64\n", + "T3 measured object\n", + "T3 float64\n", + "TT4 measured object\n", + "TT4 float64\n", + "T4U measured object\n", + "T4U float64\n", + "FTI measured object\n", + "FTI float64\n", + "TBG measured object\n", + "TBG float64\n", + "referral source object\n", + "classes object\n", + "dtype: object" + ] + }, + "execution_count": 623, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.dtypes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.3 Thyroid disease (continued)" + ] + }, + { + "cell_type": "code", + "execution_count": 624, + "metadata": {}, + "outputs": [], + "source": [ + "yes_no_columns = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 26]" + ] + }, + { + "cell_type": "code", + "execution_count": 625, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Column: on thyroxine %Yes: 11.79%\n", + "\n", + "Column: query on thyroxine %Yes: 1.43%\n", + "\n", + "Column: on antithyroid medication %Yes: 1.21%\n", + "\n", + "Column: sick %Yes: 3.93%\n", + "\n", + "Column: pregnant %Yes: 1.46%\n", + "\n", + "Column: thyroid surgery %Yes: 1.39%\n", + "\n", + "Column: I131 treatment %Yes: 1.71%\n", + "\n", + "Column: query hypothyroid %Yes: 5.82%\n", + "\n", + "Column: query hyperthyroid %Yes: 6.18%\n", + "\n", + "Column: lithium %Yes: 0.50%\n", + "\n", + "Column: goitre %Yes: 0.89%\n", + "\n", + "Column: tumor %Yes: 2.54%\n", + "\n", + "Column: hypopituitary %Yes: 0.04%\n", + "\n", + "Column: psych %Yes: 4.82%\n", + "\n", + "Column: TSH measured %Yes: 89.86%\n", + "\n", + "Column: T3 measured %Yes: 79.11%\n", + "\n", + "Column: TT4 measured %Yes: 93.43%\n", + "\n", + "Column: T4U measured %Yes: 89.39%\n", + "\n", + "Column: FTI measured %Yes: 89.46%\n", + "\n", + "Column: TBG measured %Yes: 0.00%\n", + "\n" + ] + } + ], + "source": [ + "for column in yes_no_columns:\n", + " yes_count = ((df2.iloc[:, column] == \"t\").sum())/2800\n", + " print(f\"Column: {df2.columns[column]} %Yes: {yes_count:.2%}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 626, + "metadata": {}, + "outputs": [], + "source": [ + "calculate_columns = [\"TSH\", \"T3\", \"TT4\", \"T4U\", \"FTI\", \"TBG\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 627, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "481.7251481915739" + ] + }, + "execution_count": 627, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "((df2[\"TSH\"] ** 2).sum())/df2[\"TSH\"].notna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 628, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 1.212020e+06\n", + "1 1.212020e+06\n", + "2 1.212020e+06\n", + "3 1.212020e+06\n", + "4 1.212020e+06\n", + " ... \n", + "2795 1.212020e+06\n", + "2796 inf\n", + "2797 inf\n", + "2798 1.212020e+06\n", + "2799 1.212020e+06\n", + "Name: TSH, Length: 2800, dtype: float64\n", + "0 10588.025\n", + "1 10588.025\n", + "2 inf\n", + "3 10588.025\n", + "4 10588.025\n", + " ... \n", + "2795 inf\n", + "2796 10588.025\n", + "2797 inf\n", + "2798 inf\n", + "2799 10588.025\n", + "Name: T3, Length: 2800, dtype: float64\n", + "0 34397613.32\n", + "1 34397613.32\n", + "2 34397613.32\n", + "3 34397613.32\n", + "4 34397613.32\n", + " ... \n", + "2795 34397613.32\n", + "2796 34397613.32\n", + "2797 34397613.32\n", + "2798 34397613.32\n", + "2799 34397613.32\n", + "Name: TT4, Length: 2800, dtype: float64\n", + "0 2587.103636\n", + "1 inf\n", + "2 2587.103636\n", + "3 inf\n", + "4 2587.103636\n", + " ... \n", + "2795 2587.103636\n", + "2796 2587.103636\n", + "2797 2587.103636\n", + "2798 2587.103636\n", + "2799 2587.103636\n", + "Name: T4U, Length: 2800, dtype: float64\n", + "0 33454030.13\n", + "1 inf\n", + "2 33454030.13\n", + "3 inf\n", + "4 33454030.13\n", + " ... \n", + "2795 33454030.13\n", + "2796 33454030.13\n", + "2797 33454030.13\n", + "2798 33454030.13\n", + "2799 33454030.13\n", + "Name: FTI, Length: 2800, dtype: float64\n", + "0 NaN\n", + "1 NaN\n", + "2 NaN\n", + "3 NaN\n", + "4 NaN\n", + " ..\n", + "2795 NaN\n", + "2796 NaN\n", + "2797 NaN\n", + "2798 NaN\n", + "2799 NaN\n", + "Name: TBG, Length: 2800, dtype: float64\n" + ] + } + ], + "source": [ + "for column in calculate_columns:\n", + " calculation = ((df2[column] ** 2).sum())/(df2[column].notna())\n", + " print(calculation)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}