Files
Data_ScienceUse_Cases/Classification/Breast_Cancer_Wisconsin.ipynb
2023-12-01 12:52:36 +07:00

2803 lines
106 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "bd00ebdc",
"metadata": {},
"source": [
"# Load Basic Libraries\n",
" Load some libaries to read and display the data"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "375f1a0c",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b8760991",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>diagnosis</th>\n",
" <th>radius_mean</th>\n",
" <th>texture_mean</th>\n",
" <th>perimeter_mean</th>\n",
" <th>area_mean</th>\n",
" <th>smoothness_mean</th>\n",
" <th>compactness_mean</th>\n",
" <th>concavity_mean</th>\n",
" <th>concave points_mean</th>\n",
" <th>...</th>\n",
" <th>texture_worst</th>\n",
" <th>perimeter_worst</th>\n",
" <th>area_worst</th>\n",
" <th>smoothness_worst</th>\n",
" <th>compactness_worst</th>\n",
" <th>concavity_worst</th>\n",
" <th>concave points_worst</th>\n",
" <th>symmetry_worst</th>\n",
" <th>fractal_dimension_worst</th>\n",
" <th>Unnamed: 32</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>842302</td>\n",
" <td>M</td>\n",
" <td>17.99</td>\n",
" <td>10.38</td>\n",
" <td>122.80</td>\n",
" <td>1001.0</td>\n",
" <td>0.11840</td>\n",
" <td>0.27760</td>\n",
" <td>0.3001</td>\n",
" <td>0.14710</td>\n",
" <td>...</td>\n",
" <td>17.33</td>\n",
" <td>184.60</td>\n",
" <td>2019.0</td>\n",
" <td>0.1622</td>\n",
" <td>0.6656</td>\n",
" <td>0.7119</td>\n",
" <td>0.2654</td>\n",
" <td>0.4601</td>\n",
" <td>0.11890</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>842517</td>\n",
" <td>M</td>\n",
" <td>20.57</td>\n",
" <td>17.77</td>\n",
" <td>132.90</td>\n",
" <td>1326.0</td>\n",
" <td>0.08474</td>\n",
" <td>0.07864</td>\n",
" <td>0.0869</td>\n",
" <td>0.07017</td>\n",
" <td>...</td>\n",
" <td>23.41</td>\n",
" <td>158.80</td>\n",
" <td>1956.0</td>\n",
" <td>0.1238</td>\n",
" <td>0.1866</td>\n",
" <td>0.2416</td>\n",
" <td>0.1860</td>\n",
" <td>0.2750</td>\n",
" <td>0.08902</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>84300903</td>\n",
" <td>M</td>\n",
" <td>19.69</td>\n",
" <td>21.25</td>\n",
" <td>130.00</td>\n",
" <td>1203.0</td>\n",
" <td>0.10960</td>\n",
" <td>0.15990</td>\n",
" <td>0.1974</td>\n",
" <td>0.12790</td>\n",
" <td>...</td>\n",
" <td>25.53</td>\n",
" <td>152.50</td>\n",
" <td>1709.0</td>\n",
" <td>0.1444</td>\n",
" <td>0.4245</td>\n",
" <td>0.4504</td>\n",
" <td>0.2430</td>\n",
" <td>0.3613</td>\n",
" <td>0.08758</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>84348301</td>\n",
" <td>M</td>\n",
" <td>11.42</td>\n",
" <td>20.38</td>\n",
" <td>77.58</td>\n",
" <td>386.1</td>\n",
" <td>0.14250</td>\n",
" <td>0.28390</td>\n",
" <td>0.2414</td>\n",
" <td>0.10520</td>\n",
" <td>...</td>\n",
" <td>26.50</td>\n",
" <td>98.87</td>\n",
" <td>567.7</td>\n",
" <td>0.2098</td>\n",
" <td>0.8663</td>\n",
" <td>0.6869</td>\n",
" <td>0.2575</td>\n",
" <td>0.6638</td>\n",
" <td>0.17300</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>84358402</td>\n",
" <td>M</td>\n",
" <td>20.29</td>\n",
" <td>14.34</td>\n",
" <td>135.10</td>\n",
" <td>1297.0</td>\n",
" <td>0.10030</td>\n",
" <td>0.13280</td>\n",
" <td>0.1980</td>\n",
" <td>0.10430</td>\n",
" <td>...</td>\n",
" <td>16.67</td>\n",
" <td>152.20</td>\n",
" <td>1575.0</td>\n",
" <td>0.1374</td>\n",
" <td>0.2050</td>\n",
" <td>0.4000</td>\n",
" <td>0.1625</td>\n",
" <td>0.2364</td>\n",
" <td>0.07678</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 33 columns</p>\n",
"</div>"
],
"text/plain": [
" id diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n",
"0 842302 M 17.99 10.38 122.80 1001.0 \n",
"1 842517 M 20.57 17.77 132.90 1326.0 \n",
"2 84300903 M 19.69 21.25 130.00 1203.0 \n",
"3 84348301 M 11.42 20.38 77.58 386.1 \n",
"4 84358402 M 20.29 14.34 135.10 1297.0 \n",
"\n",
" smoothness_mean compactness_mean concavity_mean concave points_mean \\\n",
"0 0.11840 0.27760 0.3001 0.14710 \n",
"1 0.08474 0.07864 0.0869 0.07017 \n",
"2 0.10960 0.15990 0.1974 0.12790 \n",
"3 0.14250 0.28390 0.2414 0.10520 \n",
"4 0.10030 0.13280 0.1980 0.10430 \n",
"\n",
" ... texture_worst perimeter_worst area_worst smoothness_worst \\\n",
"0 ... 17.33 184.60 2019.0 0.1622 \n",
"1 ... 23.41 158.80 1956.0 0.1238 \n",
"2 ... 25.53 152.50 1709.0 0.1444 \n",
"3 ... 26.50 98.87 567.7 0.2098 \n",
"4 ... 16.67 152.20 1575.0 0.1374 \n",
"\n",
" compactness_worst concavity_worst concave points_worst symmetry_worst \\\n",
"0 0.6656 0.7119 0.2654 0.4601 \n",
"1 0.1866 0.2416 0.1860 0.2750 \n",
"2 0.4245 0.4504 0.2430 0.3613 \n",
"3 0.8663 0.6869 0.2575 0.6638 \n",
"4 0.2050 0.4000 0.1625 0.2364 \n",
"\n",
" fractal_dimension_worst Unnamed: 32 \n",
"0 0.11890 NaN \n",
"1 0.08902 NaN \n",
"2 0.08758 NaN \n",
"3 0.17300 NaN \n",
"4 0.07678 NaN \n",
"\n",
"[5 rows x 33 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv(\"https://raw.githubusercontent.com/youronlydimwit/Data_ScienceUse_Cases/main/Classification/Data/Breast_Cancer_Wisconsin.csv\")\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"id": "6eb25bbe",
"metadata": {},
"source": [
"# Preliminary check for missing values\n",
"Created a function to outputs a dataframe with columns:\n",
"- Column Name\n",
"- The Data type\n",
"- Count of missing data (Nulls)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "72bb3dcc",
"metadata": {},
"outputs": [],
"source": [
"# Missing data check function\n",
"def completeness_check(input_df):\n",
" # Create a new DataFrame\n",
" summary_df = pd.DataFrame(columns=['Column_Name', 'Data_Type', 'Missing_Data'])\n",
"\n",
" # Fill in the data\n",
" summary_df['Column_Name'] = input_df.columns\n",
" summary_df['Data_Type'] = input_df.dtypes.values\n",
" summary_df['Missing_Data'] = input_df.isnull().sum().values\n",
"\n",
" return summary_df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "dd4b5d0f",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Column_Name</th>\n",
" <th>Data_Type</th>\n",
" <th>Missing_Data</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>id</td>\n",
" <td>int64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>diagnosis</td>\n",
" <td>object</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>radius_mean</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>texture_mean</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>perimeter_mean</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>area_mean</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>smoothness_mean</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>compactness_mean</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>concavity_mean</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>concave points_mean</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>symmetry_mean</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>fractal_dimension_mean</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>radius_se</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>texture_se</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>perimeter_se</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>area_se</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>smoothness_se</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>compactness_se</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>concavity_se</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>concave points_se</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>symmetry_se</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>fractal_dimension_se</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>radius_worst</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>texture_worst</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>perimeter_worst</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>area_worst</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>smoothness_worst</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>compactness_worst</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>concavity_worst</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>concave points_worst</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>symmetry_worst</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>fractal_dimension_worst</td>\n",
" <td>float64</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>Unnamed: 32</td>\n",
" <td>float64</td>\n",
" <td>569</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Column_Name Data_Type Missing_Data\n",
"0 id int64 0\n",
"1 diagnosis object 0\n",
"2 radius_mean float64 0\n",
"3 texture_mean float64 0\n",
"4 perimeter_mean float64 0\n",
"5 area_mean float64 0\n",
"6 smoothness_mean float64 0\n",
"7 compactness_mean float64 0\n",
"8 concavity_mean float64 0\n",
"9 concave points_mean float64 0\n",
"10 symmetry_mean float64 0\n",
"11 fractal_dimension_mean float64 0\n",
"12 radius_se float64 0\n",
"13 texture_se float64 0\n",
"14 perimeter_se float64 0\n",
"15 area_se float64 0\n",
"16 smoothness_se float64 0\n",
"17 compactness_se float64 0\n",
"18 concavity_se float64 0\n",
"19 concave points_se float64 0\n",
"20 symmetry_se float64 0\n",
"21 fractal_dimension_se float64 0\n",
"22 radius_worst float64 0\n",
"23 texture_worst float64 0\n",
"24 perimeter_worst float64 0\n",
"25 area_worst float64 0\n",
"26 smoothness_worst float64 0\n",
"27 compactness_worst float64 0\n",
"28 concavity_worst float64 0\n",
"29 concave points_worst float64 0\n",
"30 symmetry_worst float64 0\n",
"31 fractal_dimension_worst float64 0\n",
"32 Unnamed: 32 float64 569"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"completeness_check(df)"
]
},
{
"cell_type": "markdown",
"id": "d91cd816",
"metadata": {},
"source": [
"Just dropping unnecessary columns, for this context, it is column **'id'** and **'Unnamed: 32'**"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "6ce4e125",
"metadata": {},
"outputs": [],
"source": [
"# We don't need these 2 columns\n",
"df = df.drop(['id','Unnamed: 32'], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "35e3462f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>diagnosis</th>\n",
" <th>radius_mean</th>\n",
" <th>texture_mean</th>\n",
" <th>perimeter_mean</th>\n",
" <th>area_mean</th>\n",
" <th>smoothness_mean</th>\n",
" <th>compactness_mean</th>\n",
" <th>concavity_mean</th>\n",
" <th>concave points_mean</th>\n",
" <th>symmetry_mean</th>\n",
" <th>...</th>\n",
" <th>radius_worst</th>\n",
" <th>texture_worst</th>\n",
" <th>perimeter_worst</th>\n",
" <th>area_worst</th>\n",
" <th>smoothness_worst</th>\n",
" <th>compactness_worst</th>\n",
" <th>concavity_worst</th>\n",
" <th>concave points_worst</th>\n",
" <th>symmetry_worst</th>\n",
" <th>fractal_dimension_worst</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>M</td>\n",
" <td>17.99</td>\n",
" <td>10.38</td>\n",
" <td>122.80</td>\n",
" <td>1001.0</td>\n",
" <td>0.11840</td>\n",
" <td>0.27760</td>\n",
" <td>0.3001</td>\n",
" <td>0.14710</td>\n",
" <td>0.2419</td>\n",
" <td>...</td>\n",
" <td>25.38</td>\n",
" <td>17.33</td>\n",
" <td>184.60</td>\n",
" <td>2019.0</td>\n",
" <td>0.1622</td>\n",
" <td>0.6656</td>\n",
" <td>0.7119</td>\n",
" <td>0.2654</td>\n",
" <td>0.4601</td>\n",
" <td>0.11890</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>M</td>\n",
" <td>20.57</td>\n",
" <td>17.77</td>\n",
" <td>132.90</td>\n",
" <td>1326.0</td>\n",
" <td>0.08474</td>\n",
" <td>0.07864</td>\n",
" <td>0.0869</td>\n",
" <td>0.07017</td>\n",
" <td>0.1812</td>\n",
" <td>...</td>\n",
" <td>24.99</td>\n",
" <td>23.41</td>\n",
" <td>158.80</td>\n",
" <td>1956.0</td>\n",
" <td>0.1238</td>\n",
" <td>0.1866</td>\n",
" <td>0.2416</td>\n",
" <td>0.1860</td>\n",
" <td>0.2750</td>\n",
" <td>0.08902</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M</td>\n",
" <td>19.69</td>\n",
" <td>21.25</td>\n",
" <td>130.00</td>\n",
" <td>1203.0</td>\n",
" <td>0.10960</td>\n",
" <td>0.15990</td>\n",
" <td>0.1974</td>\n",
" <td>0.12790</td>\n",
" <td>0.2069</td>\n",
" <td>...</td>\n",
" <td>23.57</td>\n",
" <td>25.53</td>\n",
" <td>152.50</td>\n",
" <td>1709.0</td>\n",
" <td>0.1444</td>\n",
" <td>0.4245</td>\n",
" <td>0.4504</td>\n",
" <td>0.2430</td>\n",
" <td>0.3613</td>\n",
" <td>0.08758</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M</td>\n",
" <td>11.42</td>\n",
" <td>20.38</td>\n",
" <td>77.58</td>\n",
" <td>386.1</td>\n",
" <td>0.14250</td>\n",
" <td>0.28390</td>\n",
" <td>0.2414</td>\n",
" <td>0.10520</td>\n",
" <td>0.2597</td>\n",
" <td>...</td>\n",
" <td>14.91</td>\n",
" <td>26.50</td>\n",
" <td>98.87</td>\n",
" <td>567.7</td>\n",
" <td>0.2098</td>\n",
" <td>0.8663</td>\n",
" <td>0.6869</td>\n",
" <td>0.2575</td>\n",
" <td>0.6638</td>\n",
" <td>0.17300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M</td>\n",
" <td>20.29</td>\n",
" <td>14.34</td>\n",
" <td>135.10</td>\n",
" <td>1297.0</td>\n",
" <td>0.10030</td>\n",
" <td>0.13280</td>\n",
" <td>0.1980</td>\n",
" <td>0.10430</td>\n",
" <td>0.1809</td>\n",
" <td>...</td>\n",
" <td>22.54</td>\n",
" <td>16.67</td>\n",
" <td>152.20</td>\n",
" <td>1575.0</td>\n",
" <td>0.1374</td>\n",
" <td>0.2050</td>\n",
" <td>0.4000</td>\n",
" <td>0.1625</td>\n",
" <td>0.2364</td>\n",
" <td>0.07678</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 31 columns</p>\n",
"</div>"
],
"text/plain": [
" diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n",
"0 M 17.99 10.38 122.80 1001.0 \n",
"1 M 20.57 17.77 132.90 1326.0 \n",
"2 M 19.69 21.25 130.00 1203.0 \n",
"3 M 11.42 20.38 77.58 386.1 \n",
"4 M 20.29 14.34 135.10 1297.0 \n",
"\n",
" smoothness_mean compactness_mean concavity_mean concave points_mean \\\n",
"0 0.11840 0.27760 0.3001 0.14710 \n",
"1 0.08474 0.07864 0.0869 0.07017 \n",
"2 0.10960 0.15990 0.1974 0.12790 \n",
"3 0.14250 0.28390 0.2414 0.10520 \n",
"4 0.10030 0.13280 0.1980 0.10430 \n",
"\n",
" symmetry_mean ... radius_worst texture_worst perimeter_worst \\\n",
"0 0.2419 ... 25.38 17.33 184.60 \n",
"1 0.1812 ... 24.99 23.41 158.80 \n",
"2 0.2069 ... 23.57 25.53 152.50 \n",
"3 0.2597 ... 14.91 26.50 98.87 \n",
"4 0.1809 ... 22.54 16.67 152.20 \n",
"\n",
" area_worst smoothness_worst compactness_worst concavity_worst \\\n",
"0 2019.0 0.1622 0.6656 0.7119 \n",
"1 1956.0 0.1238 0.1866 0.2416 \n",
"2 1709.0 0.1444 0.4245 0.4504 \n",
"3 567.7 0.2098 0.8663 0.6869 \n",
"4 1575.0 0.1374 0.2050 0.4000 \n",
"\n",
" concave points_worst symmetry_worst fractal_dimension_worst \n",
"0 0.2654 0.4601 0.11890 \n",
"1 0.1860 0.2750 0.08902 \n",
"2 0.2430 0.3613 0.08758 \n",
"3 0.2575 0.6638 0.17300 \n",
"4 0.1625 0.2364 0.07678 \n",
"\n",
"[5 rows x 31 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"id": "4b2e562f",
"metadata": {},
"source": [
"# Reformatting\n",
"From the source, the data is assumed already cleaned. Then, the _float_ in this dataset is being reformatted as **#.##** format for easier reading."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "b7eca327",
"metadata": {},
"outputs": [],
"source": [
"def format_float_columns(df):\n",
" # Get columns with float64 dtype\n",
" float_columns = df.select_dtypes(include='float64').columns\n",
" \n",
" # Format each column without changing the data type\n",
" for col in float_columns:\n",
" df[col] = df[col].round(2)\n",
" \n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "adc5f2f3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>diagnosis</th>\n",
" <th>radius_mean</th>\n",
" <th>texture_mean</th>\n",
" <th>perimeter_mean</th>\n",
" <th>area_mean</th>\n",
" <th>smoothness_mean</th>\n",
" <th>compactness_mean</th>\n",
" <th>concavity_mean</th>\n",
" <th>concave points_mean</th>\n",
" <th>symmetry_mean</th>\n",
" <th>...</th>\n",
" <th>radius_worst</th>\n",
" <th>texture_worst</th>\n",
" <th>perimeter_worst</th>\n",
" <th>area_worst</th>\n",
" <th>smoothness_worst</th>\n",
" <th>compactness_worst</th>\n",
" <th>concavity_worst</th>\n",
" <th>concave points_worst</th>\n",
" <th>symmetry_worst</th>\n",
" <th>fractal_dimension_worst</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>M</td>\n",
" <td>17.99</td>\n",
" <td>10.38</td>\n",
" <td>122.80</td>\n",
" <td>1001.0</td>\n",
" <td>0.12</td>\n",
" <td>0.28</td>\n",
" <td>0.30</td>\n",
" <td>0.15</td>\n",
" <td>0.24</td>\n",
" <td>...</td>\n",
" <td>25.38</td>\n",
" <td>17.33</td>\n",
" <td>184.60</td>\n",
" <td>2019.0</td>\n",
" <td>0.16</td>\n",
" <td>0.67</td>\n",
" <td>0.71</td>\n",
" <td>0.27</td>\n",
" <td>0.46</td>\n",
" <td>0.12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>M</td>\n",
" <td>20.57</td>\n",
" <td>17.77</td>\n",
" <td>132.90</td>\n",
" <td>1326.0</td>\n",
" <td>0.08</td>\n",
" <td>0.08</td>\n",
" <td>0.09</td>\n",
" <td>0.07</td>\n",
" <td>0.18</td>\n",
" <td>...</td>\n",
" <td>24.99</td>\n",
" <td>23.41</td>\n",
" <td>158.80</td>\n",
" <td>1956.0</td>\n",
" <td>0.12</td>\n",
" <td>0.19</td>\n",
" <td>0.24</td>\n",
" <td>0.19</td>\n",
" <td>0.28</td>\n",
" <td>0.09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M</td>\n",
" <td>19.69</td>\n",
" <td>21.25</td>\n",
" <td>130.00</td>\n",
" <td>1203.0</td>\n",
" <td>0.11</td>\n",
" <td>0.16</td>\n",
" <td>0.20</td>\n",
" <td>0.13</td>\n",
" <td>0.21</td>\n",
" <td>...</td>\n",
" <td>23.57</td>\n",
" <td>25.53</td>\n",
" <td>152.50</td>\n",
" <td>1709.0</td>\n",
" <td>0.14</td>\n",
" <td>0.42</td>\n",
" <td>0.45</td>\n",
" <td>0.24</td>\n",
" <td>0.36</td>\n",
" <td>0.09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M</td>\n",
" <td>11.42</td>\n",
" <td>20.38</td>\n",
" <td>77.58</td>\n",
" <td>386.1</td>\n",
" <td>0.14</td>\n",
" <td>0.28</td>\n",
" <td>0.24</td>\n",
" <td>0.11</td>\n",
" <td>0.26</td>\n",
" <td>...</td>\n",
" <td>14.91</td>\n",
" <td>26.50</td>\n",
" <td>98.87</td>\n",
" <td>567.7</td>\n",
" <td>0.21</td>\n",
" <td>0.87</td>\n",
" <td>0.69</td>\n",
" <td>0.26</td>\n",
" <td>0.66</td>\n",
" <td>0.17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M</td>\n",
" <td>20.29</td>\n",
" <td>14.34</td>\n",
" <td>135.10</td>\n",
" <td>1297.0</td>\n",
" <td>0.10</td>\n",
" <td>0.13</td>\n",
" <td>0.20</td>\n",
" <td>0.10</td>\n",
" <td>0.18</td>\n",
" <td>...</td>\n",
" <td>22.54</td>\n",
" <td>16.67</td>\n",
" <td>152.20</td>\n",
" <td>1575.0</td>\n",
" <td>0.14</td>\n",
" <td>0.20</td>\n",
" <td>0.40</td>\n",
" <td>0.16</td>\n",
" <td>0.24</td>\n",
" <td>0.08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>564</th>\n",
" <td>M</td>\n",
" <td>21.56</td>\n",
" <td>22.39</td>\n",
" <td>142.00</td>\n",
" <td>1479.0</td>\n",
" <td>0.11</td>\n",
" <td>0.12</td>\n",
" <td>0.24</td>\n",
" <td>0.14</td>\n",
" <td>0.17</td>\n",
" <td>...</td>\n",
" <td>25.45</td>\n",
" <td>26.40</td>\n",
" <td>166.10</td>\n",
" <td>2027.0</td>\n",
" <td>0.14</td>\n",
" <td>0.21</td>\n",
" <td>0.41</td>\n",
" <td>0.22</td>\n",
" <td>0.21</td>\n",
" <td>0.07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>565</th>\n",
" <td>M</td>\n",
" <td>20.13</td>\n",
" <td>28.25</td>\n",
" <td>131.20</td>\n",
" <td>1261.0</td>\n",
" <td>0.10</td>\n",
" <td>0.10</td>\n",
" <td>0.14</td>\n",
" <td>0.10</td>\n",
" <td>0.18</td>\n",
" <td>...</td>\n",
" <td>23.69</td>\n",
" <td>38.25</td>\n",
" <td>155.00</td>\n",
" <td>1731.0</td>\n",
" <td>0.12</td>\n",
" <td>0.19</td>\n",
" <td>0.32</td>\n",
" <td>0.16</td>\n",
" <td>0.26</td>\n",
" <td>0.07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>566</th>\n",
" <td>M</td>\n",
" <td>16.60</td>\n",
" <td>28.08</td>\n",
" <td>108.30</td>\n",
" <td>858.1</td>\n",
" <td>0.08</td>\n",
" <td>0.10</td>\n",
" <td>0.09</td>\n",
" <td>0.05</td>\n",
" <td>0.16</td>\n",
" <td>...</td>\n",
" <td>18.98</td>\n",
" <td>34.12</td>\n",
" <td>126.70</td>\n",
" <td>1124.0</td>\n",
" <td>0.11</td>\n",
" <td>0.31</td>\n",
" <td>0.34</td>\n",
" <td>0.14</td>\n",
" <td>0.22</td>\n",
" <td>0.08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>567</th>\n",
" <td>M</td>\n",
" <td>20.60</td>\n",
" <td>29.33</td>\n",
" <td>140.10</td>\n",
" <td>1265.0</td>\n",
" <td>0.12</td>\n",
" <td>0.28</td>\n",
" <td>0.35</td>\n",
" <td>0.15</td>\n",
" <td>0.24</td>\n",
" <td>...</td>\n",
" <td>25.74</td>\n",
" <td>39.42</td>\n",
" <td>184.60</td>\n",
" <td>1821.0</td>\n",
" <td>0.16</td>\n",
" <td>0.87</td>\n",
" <td>0.94</td>\n",
" <td>0.26</td>\n",
" <td>0.41</td>\n",
" <td>0.12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>568</th>\n",
" <td>B</td>\n",
" <td>7.76</td>\n",
" <td>24.54</td>\n",
" <td>47.92</td>\n",
" <td>181.0</td>\n",
" <td>0.05</td>\n",
" <td>0.04</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" <td>0.16</td>\n",
" <td>...</td>\n",
" <td>9.46</td>\n",
" <td>30.37</td>\n",
" <td>59.16</td>\n",
" <td>268.6</td>\n",
" <td>0.09</td>\n",
" <td>0.06</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" <td>0.29</td>\n",
" <td>0.07</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>569 rows × 31 columns</p>\n",
"</div>"
],
"text/plain": [
" diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n",
"0 M 17.99 10.38 122.80 1001.0 \n",
"1 M 20.57 17.77 132.90 1326.0 \n",
"2 M 19.69 21.25 130.00 1203.0 \n",
"3 M 11.42 20.38 77.58 386.1 \n",
"4 M 20.29 14.34 135.10 1297.0 \n",
".. ... ... ... ... ... \n",
"564 M 21.56 22.39 142.00 1479.0 \n",
"565 M 20.13 28.25 131.20 1261.0 \n",
"566 M 16.60 28.08 108.30 858.1 \n",
"567 M 20.60 29.33 140.10 1265.0 \n",
"568 B 7.76 24.54 47.92 181.0 \n",
"\n",
" smoothness_mean compactness_mean concavity_mean concave points_mean \\\n",
"0 0.12 0.28 0.30 0.15 \n",
"1 0.08 0.08 0.09 0.07 \n",
"2 0.11 0.16 0.20 0.13 \n",
"3 0.14 0.28 0.24 0.11 \n",
"4 0.10 0.13 0.20 0.10 \n",
".. ... ... ... ... \n",
"564 0.11 0.12 0.24 0.14 \n",
"565 0.10 0.10 0.14 0.10 \n",
"566 0.08 0.10 0.09 0.05 \n",
"567 0.12 0.28 0.35 0.15 \n",
"568 0.05 0.04 0.00 0.00 \n",
"\n",
" symmetry_mean ... radius_worst texture_worst perimeter_worst \\\n",
"0 0.24 ... 25.38 17.33 184.60 \n",
"1 0.18 ... 24.99 23.41 158.80 \n",
"2 0.21 ... 23.57 25.53 152.50 \n",
"3 0.26 ... 14.91 26.50 98.87 \n",
"4 0.18 ... 22.54 16.67 152.20 \n",
".. ... ... ... ... ... \n",
"564 0.17 ... 25.45 26.40 166.10 \n",
"565 0.18 ... 23.69 38.25 155.00 \n",
"566 0.16 ... 18.98 34.12 126.70 \n",
"567 0.24 ... 25.74 39.42 184.60 \n",
"568 0.16 ... 9.46 30.37 59.16 \n",
"\n",
" area_worst smoothness_worst compactness_worst concavity_worst \\\n",
"0 2019.0 0.16 0.67 0.71 \n",
"1 1956.0 0.12 0.19 0.24 \n",
"2 1709.0 0.14 0.42 0.45 \n",
"3 567.7 0.21 0.87 0.69 \n",
"4 1575.0 0.14 0.20 0.40 \n",
".. ... ... ... ... \n",
"564 2027.0 0.14 0.21 0.41 \n",
"565 1731.0 0.12 0.19 0.32 \n",
"566 1124.0 0.11 0.31 0.34 \n",
"567 1821.0 0.16 0.87 0.94 \n",
"568 268.6 0.09 0.06 0.00 \n",
"\n",
" concave points_worst symmetry_worst fractal_dimension_worst \n",
"0 0.27 0.46 0.12 \n",
"1 0.19 0.28 0.09 \n",
"2 0.24 0.36 0.09 \n",
"3 0.26 0.66 0.17 \n",
"4 0.16 0.24 0.08 \n",
".. ... ... ... \n",
"564 0.22 0.21 0.07 \n",
"565 0.16 0.26 0.07 \n",
"566 0.14 0.22 0.08 \n",
"567 0.26 0.41 0.12 \n",
"568 0.00 0.29 0.07 \n",
"\n",
"[569 rows x 31 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"format_float_columns(df)"
]
},
{
"cell_type": "markdown",
"id": "c9aaef3a",
"metadata": {},
"source": [
"# Encoding\n",
"Since the labels to be predicted only has **M for Malignant** and **B for Benign**, we can encode it to become 0 and 1 respectively.\n",
"I also changed the type to float, so the data types are all the same type"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "60870d9a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>diagnosis</th>\n",
" <th>radius_mean</th>\n",
" <th>texture_mean</th>\n",
" <th>perimeter_mean</th>\n",
" <th>area_mean</th>\n",
" <th>smoothness_mean</th>\n",
" <th>compactness_mean</th>\n",
" <th>concavity_mean</th>\n",
" <th>concave points_mean</th>\n",
" <th>symmetry_mean</th>\n",
" <th>...</th>\n",
" <th>radius_worst</th>\n",
" <th>texture_worst</th>\n",
" <th>perimeter_worst</th>\n",
" <th>area_worst</th>\n",
" <th>smoothness_worst</th>\n",
" <th>compactness_worst</th>\n",
" <th>concavity_worst</th>\n",
" <th>concave points_worst</th>\n",
" <th>symmetry_worst</th>\n",
" <th>fractal_dimension_worst</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.0</td>\n",
" <td>17.99</td>\n",
" <td>10.38</td>\n",
" <td>122.8</td>\n",
" <td>1001.0</td>\n",
" <td>0.12</td>\n",
" <td>0.28</td>\n",
" <td>0.30</td>\n",
" <td>0.15</td>\n",
" <td>0.24</td>\n",
" <td>...</td>\n",
" <td>25.38</td>\n",
" <td>17.33</td>\n",
" <td>184.6</td>\n",
" <td>2019.0</td>\n",
" <td>0.16</td>\n",
" <td>0.67</td>\n",
" <td>0.71</td>\n",
" <td>0.27</td>\n",
" <td>0.46</td>\n",
" <td>0.12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.0</td>\n",
" <td>20.57</td>\n",
" <td>17.77</td>\n",
" <td>132.9</td>\n",
" <td>1326.0</td>\n",
" <td>0.08</td>\n",
" <td>0.08</td>\n",
" <td>0.09</td>\n",
" <td>0.07</td>\n",
" <td>0.18</td>\n",
" <td>...</td>\n",
" <td>24.99</td>\n",
" <td>23.41</td>\n",
" <td>158.8</td>\n",
" <td>1956.0</td>\n",
" <td>0.12</td>\n",
" <td>0.19</td>\n",
" <td>0.24</td>\n",
" <td>0.19</td>\n",
" <td>0.28</td>\n",
" <td>0.09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.0</td>\n",
" <td>19.69</td>\n",
" <td>21.25</td>\n",
" <td>130.0</td>\n",
" <td>1203.0</td>\n",
" <td>0.11</td>\n",
" <td>0.16</td>\n",
" <td>0.20</td>\n",
" <td>0.13</td>\n",
" <td>0.21</td>\n",
" <td>...</td>\n",
" <td>23.57</td>\n",
" <td>25.53</td>\n",
" <td>152.5</td>\n",
" <td>1709.0</td>\n",
" <td>0.14</td>\n",
" <td>0.42</td>\n",
" <td>0.45</td>\n",
" <td>0.24</td>\n",
" <td>0.36</td>\n",
" <td>0.09</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 31 columns</p>\n",
"</div>"
],
"text/plain": [
" diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n",
"0 0.0 17.99 10.38 122.8 1001.0 \n",
"1 0.0 20.57 17.77 132.9 1326.0 \n",
"2 0.0 19.69 21.25 130.0 1203.0 \n",
"\n",
" smoothness_mean compactness_mean concavity_mean concave points_mean \\\n",
"0 0.12 0.28 0.30 0.15 \n",
"1 0.08 0.08 0.09 0.07 \n",
"2 0.11 0.16 0.20 0.13 \n",
"\n",
" symmetry_mean ... radius_worst texture_worst perimeter_worst \\\n",
"0 0.24 ... 25.38 17.33 184.6 \n",
"1 0.18 ... 24.99 23.41 158.8 \n",
"2 0.21 ... 23.57 25.53 152.5 \n",
"\n",
" area_worst smoothness_worst compactness_worst concavity_worst \\\n",
"0 2019.0 0.16 0.67 0.71 \n",
"1 1956.0 0.12 0.19 0.24 \n",
"2 1709.0 0.14 0.42 0.45 \n",
"\n",
" concave points_worst symmetry_worst fractal_dimension_worst \n",
"0 0.27 0.46 0.12 \n",
"1 0.19 0.28 0.09 \n",
"2 0.24 0.36 0.09 \n",
"\n",
"[3 rows x 31 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Replace 'M' with 0 and 'B' with 1 in the 'Column' column\n",
"df['diagnosis'] = df['diagnosis'].replace({'M': 0, 'B': 1})\n",
"df['diagnosis'] = df['diagnosis'].astype(float)\n",
"df.head(3)"
]
},
{
"cell_type": "markdown",
"id": "ecbe4b22",
"metadata": {},
"source": [
"Correct data type"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "7e437274",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dtype('float64')"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['diagnosis'].dtypes"
]
},
{
"cell_type": "markdown",
"id": "5b18b919",
"metadata": {},
"source": [
"Running a correlation matrix to get a glimpse of who's affecting who the most."
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "2c3b26b6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>diagnosis</th>\n",
" <th>radius_mean</th>\n",
" <th>texture_mean</th>\n",
" <th>perimeter_mean</th>\n",
" <th>area_mean</th>\n",
" <th>smoothness_mean</th>\n",
" <th>compactness_mean</th>\n",
" <th>concavity_mean</th>\n",
" <th>concave points_mean</th>\n",
" <th>symmetry_mean</th>\n",
" <th>...</th>\n",
" <th>radius_worst</th>\n",
" <th>texture_worst</th>\n",
" <th>perimeter_worst</th>\n",
" <th>area_worst</th>\n",
" <th>smoothness_worst</th>\n",
" <th>compactness_worst</th>\n",
" <th>concavity_worst</th>\n",
" <th>concave points_worst</th>\n",
" <th>symmetry_worst</th>\n",
" <th>fractal_dimension_worst</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>diagnosis</th>\n",
" <td>1.000000</td>\n",
" <td>-0.730032</td>\n",
" <td>-0.415185</td>\n",
" <td>-0.742636</td>\n",
" <td>-0.708984</td>\n",
" <td>-0.330624</td>\n",
" <td>-0.597576</td>\n",
" <td>-0.695750</td>\n",
" <td>-0.774410</td>\n",
" <td>-0.333477</td>\n",
" <td>...</td>\n",
" <td>-0.776453</td>\n",
" <td>-0.456903</td>\n",
" <td>-0.782914</td>\n",
" <td>-0.733825</td>\n",
" <td>-0.419661</td>\n",
" <td>-0.590477</td>\n",
" <td>-0.659345</td>\n",
" <td>-0.791976</td>\n",
" <td>-0.417123</td>\n",
" <td>-0.322046</td>\n",
" </tr>\n",
" <tr>\n",
" <th>radius_mean</th>\n",
" <td>-0.730032</td>\n",
" <td>1.000000</td>\n",
" <td>0.323777</td>\n",
" <td>0.997856</td>\n",
" <td>0.987361</td>\n",
" <td>0.147104</td>\n",
" <td>0.505622</td>\n",
" <td>0.677041</td>\n",
" <td>0.821646</td>\n",
" <td>0.151195</td>\n",
" <td>...</td>\n",
" <td>0.969541</td>\n",
" <td>0.297000</td>\n",
" <td>0.965139</td>\n",
" <td>0.941087</td>\n",
" <td>0.119031</td>\n",
" <td>0.413065</td>\n",
" <td>0.527379</td>\n",
" <td>0.741494</td>\n",
" <td>0.164409</td>\n",
" <td>0.009169</td>\n",
" </tr>\n",
" <tr>\n",
" <th>texture_mean</th>\n",
" <td>-0.415185</td>\n",
" <td>0.323777</td>\n",
" <td>1.000000</td>\n",
" <td>0.329533</td>\n",
" <td>0.321086</td>\n",
" <td>-0.040419</td>\n",
" <td>0.237019</td>\n",
" <td>0.300536</td>\n",
" <td>0.287618</td>\n",
" <td>0.067193</td>\n",
" <td>...</td>\n",
" <td>0.352578</td>\n",
" <td>0.912045</td>\n",
" <td>0.358040</td>\n",
" <td>0.343546</td>\n",
" <td>0.075885</td>\n",
" <td>0.278112</td>\n",
" <td>0.300041</td>\n",
" <td>0.293490</td>\n",
" <td>0.105824</td>\n",
" <td>0.113425</td>\n",
" </tr>\n",
" <tr>\n",
" <th>perimeter_mean</th>\n",
" <td>-0.742636</td>\n",
" <td>0.997856</td>\n",
" <td>0.329533</td>\n",
" <td>1.000000</td>\n",
" <td>0.986507</td>\n",
" <td>0.183356</td>\n",
" <td>0.556485</td>\n",
" <td>0.716448</td>\n",
" <td>0.849875</td>\n",
" <td>0.186306</td>\n",
" <td>...</td>\n",
" <td>0.969476</td>\n",
" <td>0.303038</td>\n",
" <td>0.970387</td>\n",
" <td>0.941550</td>\n",
" <td>0.149530</td>\n",
" <td>0.455370</td>\n",
" <td>0.564315</td>\n",
" <td>0.768429</td>\n",
" <td>0.189385</td>\n",
" <td>0.052794</td>\n",
" </tr>\n",
" <tr>\n",
" <th>area_mean</th>\n",
" <td>-0.708984</td>\n",
" <td>0.987361</td>\n",
" <td>0.321086</td>\n",
" <td>0.986507</td>\n",
" <td>1.000000</td>\n",
" <td>0.151671</td>\n",
" <td>0.498038</td>\n",
" <td>0.686308</td>\n",
" <td>0.822500</td>\n",
" <td>0.154603</td>\n",
" <td>...</td>\n",
" <td>0.962745</td>\n",
" <td>0.287489</td>\n",
" <td>0.959120</td>\n",
" <td>0.959213</td>\n",
" <td>0.122984</td>\n",
" <td>0.389874</td>\n",
" <td>0.512943</td>\n",
" <td>0.719419</td>\n",
" <td>0.143762</td>\n",
" <td>0.006854</td>\n",
" </tr>\n",
" <tr>\n",
" <th>smoothness_mean</th>\n",
" <td>-0.330624</td>\n",
" <td>0.147104</td>\n",
" <td>-0.040419</td>\n",
" <td>0.183356</td>\n",
" <td>0.151671</td>\n",
" <td>1.000000</td>\n",
" <td>0.640292</td>\n",
" <td>0.501956</td>\n",
" <td>0.528881</td>\n",
" <td>0.544531</td>\n",
" <td>...</td>\n",
" <td>0.189984</td>\n",
" <td>0.022669</td>\n",
" <td>0.215574</td>\n",
" <td>0.182905</td>\n",
" <td>0.777084</td>\n",
" <td>0.461971</td>\n",
" <td>0.420549</td>\n",
" <td>0.483773</td>\n",
" <td>0.391210</td>\n",
" <td>0.498783</td>\n",
" </tr>\n",
" <tr>\n",
" <th>compactness_mean</th>\n",
" <td>-0.597576</td>\n",
" <td>0.505622</td>\n",
" <td>0.237019</td>\n",
" <td>0.556485</td>\n",
" <td>0.498038</td>\n",
" <td>0.640292</td>\n",
" <td>1.000000</td>\n",
" <td>0.883729</td>\n",
" <td>0.829022</td>\n",
" <td>0.602728</td>\n",
" <td>...</td>\n",
" <td>0.534939</td>\n",
" <td>0.247564</td>\n",
" <td>0.589944</td>\n",
" <td>0.509449</td>\n",
" <td>0.559318</td>\n",
" <td>0.864209</td>\n",
" <td>0.815163</td>\n",
" <td>0.813038</td>\n",
" <td>0.508330</td>\n",
" <td>0.681343</td>\n",
" </tr>\n",
" <tr>\n",
" <th>concavity_mean</th>\n",
" <td>-0.695750</td>\n",
" <td>0.677041</td>\n",
" <td>0.300536</td>\n",
" <td>0.716448</td>\n",
" <td>0.686308</td>\n",
" <td>0.501956</td>\n",
" <td>0.883729</td>\n",
" <td>1.000000</td>\n",
" <td>0.918687</td>\n",
" <td>0.500739</td>\n",
" <td>...</td>\n",
" <td>0.688703</td>\n",
" <td>0.298983</td>\n",
" <td>0.729963</td>\n",
" <td>0.676656</td>\n",
" <td>0.446247</td>\n",
" <td>0.755167</td>\n",
" <td>0.883088</td>\n",
" <td>0.859451</td>\n",
" <td>0.409733</td>\n",
" <td>0.513497</td>\n",
" </tr>\n",
" <tr>\n",
" <th>concave points_mean</th>\n",
" <td>-0.774410</td>\n",
" <td>0.821646</td>\n",
" <td>0.287618</td>\n",
" <td>0.849875</td>\n",
" <td>0.822500</td>\n",
" <td>0.528881</td>\n",
" <td>0.829022</td>\n",
" <td>0.918687</td>\n",
" <td>1.000000</td>\n",
" <td>0.460745</td>\n",
" <td>...</td>\n",
" <td>0.828401</td>\n",
" <td>0.286828</td>\n",
" <td>0.853642</td>\n",
" <td>0.807978</td>\n",
" <td>0.446157</td>\n",
" <td>0.665741</td>\n",
" <td>0.748687</td>\n",
" <td>0.905058</td>\n",
" <td>0.370446</td>\n",
" <td>0.366058</td>\n",
" </tr>\n",
" <tr>\n",
" <th>symmetry_mean</th>\n",
" <td>-0.333477</td>\n",
" <td>0.151195</td>\n",
" <td>0.067193</td>\n",
" <td>0.186306</td>\n",
" <td>0.154603</td>\n",
" <td>0.544531</td>\n",
" <td>0.602728</td>\n",
" <td>0.500739</td>\n",
" <td>0.460745</td>\n",
" <td>1.000000</td>\n",
" <td>...</td>\n",
" <td>0.189763</td>\n",
" <td>0.084924</td>\n",
" <td>0.222908</td>\n",
" <td>0.181731</td>\n",
" <td>0.424135</td>\n",
" <td>0.474981</td>\n",
" <td>0.432004</td>\n",
" <td>0.428627</td>\n",
" <td>0.698086</td>\n",
" <td>0.427079</td>\n",
" </tr>\n",
" <tr>\n",
" <th>fractal_dimension_mean</th>\n",
" <td>0.027269</td>\n",
" <td>-0.290001</td>\n",
" <td>-0.080337</td>\n",
" <td>-0.245096</td>\n",
" <td>-0.260436</td>\n",
" <td>0.556840</td>\n",
" <td>0.506156</td>\n",
" <td>0.301176</td>\n",
" <td>0.142307</td>\n",
" <td>0.411393</td>\n",
" <td>...</td>\n",
" <td>-0.242125</td>\n",
" <td>-0.051211</td>\n",
" <td>-0.199454</td>\n",
" <td>-0.218566</td>\n",
" <td>0.451897</td>\n",
" <td>0.410627</td>\n",
" <td>0.305228</td>\n",
" <td>0.148275</td>\n",
" <td>0.283061</td>\n",
" <td>0.701592</td>\n",
" </tr>\n",
" <tr>\n",
" <th>radius_se</th>\n",
" <td>-0.567328</td>\n",
" <td>0.678590</td>\n",
" <td>0.275973</td>\n",
" <td>0.691268</td>\n",
" <td>0.732125</td>\n",
" <td>0.277600</td>\n",
" <td>0.497381</td>\n",
" <td>0.631258</td>\n",
" <td>0.699764</td>\n",
" <td>0.304353</td>\n",
" <td>...</td>\n",
" <td>0.714872</td>\n",
" <td>0.195092</td>\n",
" <td>0.719492</td>\n",
" <td>0.751431</td>\n",
" <td>0.140187</td>\n",
" <td>0.286572</td>\n",
" <td>0.380110</td>\n",
" <td>0.528842</td>\n",
" <td>0.094549</td>\n",
" <td>0.049351</td>\n",
" </tr>\n",
" <tr>\n",
" <th>texture_se</th>\n",
" <td>0.008299</td>\n",
" <td>-0.097230</td>\n",
" <td>0.386443</td>\n",
" <td>-0.086669</td>\n",
" <td>-0.066119</td>\n",
" <td>0.059149</td>\n",
" <td>0.044875</td>\n",
" <td>0.075893</td>\n",
" <td>0.025260</td>\n",
" <td>0.126154</td>\n",
" <td>...</td>\n",
" <td>-0.111592</td>\n",
" <td>0.409071</td>\n",
" <td>-0.102161</td>\n",
" <td>-0.083078</td>\n",
" <td>-0.073042</td>\n",
" <td>-0.092846</td>\n",
" <td>-0.070533</td>\n",
" <td>-0.120938</td>\n",
" <td>-0.127607</td>\n",
" <td>-0.044761</td>\n",
" </tr>\n",
" <tr>\n",
" <th>perimeter_se</th>\n",
" <td>-0.556055</td>\n",
" <td>0.674109</td>\n",
" <td>0.281590</td>\n",
" <td>0.693069</td>\n",
" <td>0.726564</td>\n",
" <td>0.272183</td>\n",
" <td>0.548665</td>\n",
" <td>0.659685</td>\n",
" <td>0.712066</td>\n",
" <td>0.313482</td>\n",
" <td>...</td>\n",
" <td>0.697133</td>\n",
" <td>0.200270</td>\n",
" <td>0.720966</td>\n",
" <td>0.730647</td>\n",
" <td>0.125791</td>\n",
" <td>0.340951</td>\n",
" <td>0.418269</td>\n",
" <td>0.552873</td>\n",
" <td>0.108877</td>\n",
" <td>0.083512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>area_se</th>\n",
" <td>-0.548236</td>\n",
" <td>0.735868</td>\n",
" <td>0.259844</td>\n",
" <td>0.744983</td>\n",
" <td>0.800086</td>\n",
" <td>0.222595</td>\n",
" <td>0.456053</td>\n",
" <td>0.617299</td>\n",
" <td>0.690642</td>\n",
" <td>0.226088</td>\n",
" <td>...</td>\n",
" <td>0.757372</td>\n",
" <td>0.196496</td>\n",
" <td>0.761213</td>\n",
" <td>0.811408</td>\n",
" <td>0.124392</td>\n",
" <td>0.282682</td>\n",
" <td>0.384820</td>\n",
" <td>0.535417</td>\n",
" <td>0.072572</td>\n",
" <td>0.016228</td>\n",
" </tr>\n",
" <tr>\n",
" <th>smoothness_se</th>\n",
" <td>0.012117</td>\n",
" <td>-0.167456</td>\n",
" <td>0.019777</td>\n",
" <td>-0.149510</td>\n",
" <td>-0.130192</td>\n",
" <td>0.268150</td>\n",
" <td>0.133392</td>\n",
" <td>0.110590</td>\n",
" <td>0.051658</td>\n",
" <td>0.153937</td>\n",
" <td>...</td>\n",
" <td>-0.152307</td>\n",
" <td>-0.008034</td>\n",
" <td>-0.136532</td>\n",
" <td>-0.115951</td>\n",
" <td>0.309111</td>\n",
" <td>0.033085</td>\n",
" <td>0.018581</td>\n",
" <td>-0.027878</td>\n",
" <td>-0.032829</td>\n",
" <td>0.119089</td>\n",
" </tr>\n",
" <tr>\n",
" <th>compactness_se</th>\n",
" <td>-0.288713</td>\n",
" <td>0.204341</td>\n",
" <td>0.192029</td>\n",
" <td>0.248683</td>\n",
" <td>0.211126</td>\n",
" <td>0.313043</td>\n",
" <td>0.729164</td>\n",
" <td>0.665217</td>\n",
" <td>0.490719</td>\n",
" <td>0.412566</td>\n",
" <td>...</td>\n",
" <td>0.202027</td>\n",
" <td>0.146121</td>\n",
" <td>0.256901</td>\n",
" <td>0.196258</td>\n",
" <td>0.221708</td>\n",
" <td>0.672119</td>\n",
" <td>0.633328</td>\n",
" <td>0.477739</td>\n",
" <td>0.271770</td>\n",
" <td>0.574867</td>\n",
" </tr>\n",
" <tr>\n",
" <th>concavity_se</th>\n",
" <td>-0.255041</td>\n",
" <td>0.194151</td>\n",
" <td>0.145536</td>\n",
" <td>0.228160</td>\n",
" <td>0.206659</td>\n",
" <td>0.249132</td>\n",
" <td>0.571996</td>\n",
" <td>0.688883</td>\n",
" <td>0.441037</td>\n",
" <td>0.337796</td>\n",
" <td>...</td>\n",
" <td>0.187371</td>\n",
" <td>0.102586</td>\n",
" <td>0.227007</td>\n",
" <td>0.187836</td>\n",
" <td>0.172332</td>\n",
" <td>0.491365</td>\n",
" <td>0.662437</td>\n",
" <td>0.443167</td>\n",
" <td>0.199593</td>\n",
" <td>0.445392</td>\n",
" </tr>\n",
" <tr>\n",
" <th>concave points_se</th>\n",
" <td>-0.329287</td>\n",
" <td>0.325042</td>\n",
" <td>0.153373</td>\n",
" <td>0.353151</td>\n",
" <td>0.322610</td>\n",
" <td>0.282948</td>\n",
" <td>0.558232</td>\n",
" <td>0.582869</td>\n",
" <td>0.522044</td>\n",
" <td>0.312608</td>\n",
" <td>...</td>\n",
" <td>0.307288</td>\n",
" <td>0.079999</td>\n",
" <td>0.342227</td>\n",
" <td>0.297057</td>\n",
" <td>0.144157</td>\n",
" <td>0.389143</td>\n",
" <td>0.463453</td>\n",
" <td>0.502092</td>\n",
" <td>0.126508</td>\n",
" <td>0.237510</td>\n",
" </tr>\n",
" <tr>\n",
" <th>symmetry_se</th>\n",
" <td>0.025127</td>\n",
" <td>-0.117235</td>\n",
" <td>-0.017239</td>\n",
" <td>-0.093748</td>\n",
" <td>-0.084347</td>\n",
" <td>0.216286</td>\n",
" <td>0.236474</td>\n",
" <td>0.178725</td>\n",
" <td>0.098510</td>\n",
" <td>0.446116</td>\n",
" <td>...</td>\n",
" <td>-0.142188</td>\n",
" <td>-0.094773</td>\n",
" <td>-0.117920</td>\n",
" <td>-0.123926</td>\n",
" <td>-0.007047</td>\n",
" <td>0.070591</td>\n",
" <td>0.035520</td>\n",
" <td>-0.032924</td>\n",
" <td>0.382952</td>\n",
" <td>0.084610</td>\n",
" </tr>\n",
" <tr>\n",
" <th>fractal_dimension_se</th>\n",
" <td>-0.091669</td>\n",
" <td>-0.009516</td>\n",
" <td>0.079210</td>\n",
" <td>0.027147</td>\n",
" <td>0.002046</td>\n",
" <td>0.278176</td>\n",
" <td>0.496677</td>\n",
" <td>0.427309</td>\n",
" <td>0.260200</td>\n",
" <td>0.333318</td>\n",
" <td>...</td>\n",
" <td>-0.005759</td>\n",
" <td>0.008842</td>\n",
" <td>0.033505</td>\n",
" <td>-0.000367</td>\n",
" <td>0.167714</td>\n",
" <td>0.381239</td>\n",
" <td>0.363578</td>\n",
" <td>0.203468</td>\n",
" <td>0.098792</td>\n",
" <td>0.522940</td>\n",
" </tr>\n",
" <tr>\n",
" <th>radius_worst</th>\n",
" <td>-0.776453</td>\n",
" <td>0.969541</td>\n",
" <td>0.352578</td>\n",
" <td>0.969476</td>\n",
" <td>0.962745</td>\n",
" <td>0.189984</td>\n",
" <td>0.534939</td>\n",
" <td>0.688703</td>\n",
" <td>0.828401</td>\n",
" <td>0.189763</td>\n",
" <td>...</td>\n",
" <td>1.000000</td>\n",
" <td>0.359925</td>\n",
" <td>0.993707</td>\n",
" <td>0.984014</td>\n",
" <td>0.215895</td>\n",
" <td>0.475348</td>\n",
" <td>0.574562</td>\n",
" <td>0.784946</td>\n",
" <td>0.244034</td>\n",
" <td>0.092952</td>\n",
" </tr>\n",
" <tr>\n",
" <th>texture_worst</th>\n",
" <td>-0.456903</td>\n",
" <td>0.297000</td>\n",
" <td>0.912045</td>\n",
" <td>0.303038</td>\n",
" <td>0.287489</td>\n",
" <td>0.022669</td>\n",
" <td>0.247564</td>\n",
" <td>0.298983</td>\n",
" <td>0.286828</td>\n",
" <td>0.084924</td>\n",
" <td>...</td>\n",
" <td>0.359925</td>\n",
" <td>1.000000</td>\n",
" <td>0.365098</td>\n",
" <td>0.345842</td>\n",
" <td>0.225808</td>\n",
" <td>0.361123</td>\n",
" <td>0.367625</td>\n",
" <td>0.358467</td>\n",
" <td>0.234337</td>\n",
" <td>0.214237</td>\n",
" </tr>\n",
" <tr>\n",
" <th>perimeter_worst</th>\n",
" <td>-0.782914</td>\n",
" <td>0.965139</td>\n",
" <td>0.358040</td>\n",
" <td>0.970387</td>\n",
" <td>0.959120</td>\n",
" <td>0.215574</td>\n",
" <td>0.589944</td>\n",
" <td>0.729963</td>\n",
" <td>0.853642</td>\n",
" <td>0.222908</td>\n",
" <td>...</td>\n",
" <td>0.993707</td>\n",
" <td>0.365098</td>\n",
" <td>1.000000</td>\n",
" <td>0.977578</td>\n",
" <td>0.235168</td>\n",
" <td>0.528876</td>\n",
" <td>0.618906</td>\n",
" <td>0.813826</td>\n",
" <td>0.269788</td>\n",
" <td>0.137973</td>\n",
" </tr>\n",
" <tr>\n",
" <th>area_worst</th>\n",
" <td>-0.733825</td>\n",
" <td>0.941087</td>\n",
" <td>0.343546</td>\n",
" <td>0.941550</td>\n",
" <td>0.959213</td>\n",
" <td>0.182905</td>\n",
" <td>0.509449</td>\n",
" <td>0.676656</td>\n",
" <td>0.807978</td>\n",
" <td>0.181731</td>\n",
" <td>...</td>\n",
" <td>0.984014</td>\n",
" <td>0.345842</td>\n",
" <td>0.977578</td>\n",
" <td>1.000000</td>\n",
" <td>0.209064</td>\n",
" <td>0.437727</td>\n",
" <td>0.543774</td>\n",
" <td>0.745090</td>\n",
" <td>0.209443</td>\n",
" <td>0.079535</td>\n",
" </tr>\n",
" <tr>\n",
" <th>smoothness_worst</th>\n",
" <td>-0.419661</td>\n",
" <td>0.119031</td>\n",
" <td>0.075885</td>\n",
" <td>0.149530</td>\n",
" <td>0.122984</td>\n",
" <td>0.777084</td>\n",
" <td>0.559318</td>\n",
" <td>0.446247</td>\n",
" <td>0.446157</td>\n",
" <td>0.424135</td>\n",
" <td>...</td>\n",
" <td>0.215895</td>\n",
" <td>0.225808</td>\n",
" <td>0.235168</td>\n",
" <td>0.209064</td>\n",
" <td>1.000000</td>\n",
" <td>0.563176</td>\n",
" <td>0.513958</td>\n",
" <td>0.542461</td>\n",
" <td>0.486361</td>\n",
" <td>0.608113</td>\n",
" </tr>\n",
" <tr>\n",
" <th>compactness_worst</th>\n",
" <td>-0.590477</td>\n",
" <td>0.413065</td>\n",
" <td>0.278112</td>\n",
" <td>0.455370</td>\n",
" <td>0.389874</td>\n",
" <td>0.461971</td>\n",
" <td>0.864209</td>\n",
" <td>0.755167</td>\n",
" <td>0.665741</td>\n",
" <td>0.474981</td>\n",
" <td>...</td>\n",
" <td>0.475348</td>\n",
" <td>0.361123</td>\n",
" <td>0.528876</td>\n",
" <td>0.437727</td>\n",
" <td>0.563176</td>\n",
" <td>1.000000</td>\n",
" <td>0.892859</td>\n",
" <td>0.800307</td>\n",
" <td>0.614717</td>\n",
" <td>0.800823</td>\n",
" </tr>\n",
" <tr>\n",
" <th>concavity_worst</th>\n",
" <td>-0.659345</td>\n",
" <td>0.527379</td>\n",
" <td>0.300041</td>\n",
" <td>0.564315</td>\n",
" <td>0.512943</td>\n",
" <td>0.420549</td>\n",
" <td>0.815163</td>\n",
" <td>0.883088</td>\n",
" <td>0.748687</td>\n",
" <td>0.432004</td>\n",
" <td>...</td>\n",
" <td>0.574562</td>\n",
" <td>0.367625</td>\n",
" <td>0.618906</td>\n",
" <td>0.543774</td>\n",
" <td>0.513958</td>\n",
" <td>0.892859</td>\n",
" <td>1.000000</td>\n",
" <td>0.855035</td>\n",
" <td>0.530509</td>\n",
" <td>0.682274</td>\n",
" </tr>\n",
" <tr>\n",
" <th>concave points_worst</th>\n",
" <td>-0.791976</td>\n",
" <td>0.741494</td>\n",
" <td>0.293490</td>\n",
" <td>0.768429</td>\n",
" <td>0.719419</td>\n",
" <td>0.483773</td>\n",
" <td>0.813038</td>\n",
" <td>0.859451</td>\n",
" <td>0.905058</td>\n",
" <td>0.428627</td>\n",
" <td>...</td>\n",
" <td>0.784946</td>\n",
" <td>0.358467</td>\n",
" <td>0.813826</td>\n",
" <td>0.745090</td>\n",
" <td>0.542461</td>\n",
" <td>0.800307</td>\n",
" <td>0.855035</td>\n",
" <td>1.000000</td>\n",
" <td>0.502487</td>\n",
" <td>0.510454</td>\n",
" </tr>\n",
" <tr>\n",
" <th>symmetry_worst</th>\n",
" <td>-0.417123</td>\n",
" <td>0.164409</td>\n",
" <td>0.105824</td>\n",
" <td>0.189385</td>\n",
" <td>0.143762</td>\n",
" <td>0.391210</td>\n",
" <td>0.508330</td>\n",
" <td>0.409733</td>\n",
" <td>0.370446</td>\n",
" <td>0.698086</td>\n",
" <td>...</td>\n",
" <td>0.244034</td>\n",
" <td>0.234337</td>\n",
" <td>0.269788</td>\n",
" <td>0.209443</td>\n",
" <td>0.486361</td>\n",
" <td>0.614717</td>\n",
" <td>0.530509</td>\n",
" <td>0.502487</td>\n",
" <td>1.000000</td>\n",
" <td>0.527177</td>\n",
" </tr>\n",
" <tr>\n",
" <th>fractal_dimension_worst</th>\n",
" <td>-0.322046</td>\n",
" <td>0.009169</td>\n",
" <td>0.113425</td>\n",
" <td>0.052794</td>\n",
" <td>0.006854</td>\n",
" <td>0.498783</td>\n",
" <td>0.681343</td>\n",
" <td>0.513497</td>\n",
" <td>0.366058</td>\n",
" <td>0.427079</td>\n",
" <td>...</td>\n",
" <td>0.092952</td>\n",
" <td>0.214237</td>\n",
" <td>0.137973</td>\n",
" <td>0.079535</td>\n",
" <td>0.608113</td>\n",
" <td>0.800823</td>\n",
" <td>0.682274</td>\n",
" <td>0.510454</td>\n",
" <td>0.527177</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>31 rows × 31 columns</p>\n",
"</div>"
],
"text/plain": [
" diagnosis radius_mean texture_mean perimeter_mean \\\n",
"diagnosis 1.000000 -0.730032 -0.415185 -0.742636 \n",
"radius_mean -0.730032 1.000000 0.323777 0.997856 \n",
"texture_mean -0.415185 0.323777 1.000000 0.329533 \n",
"perimeter_mean -0.742636 0.997856 0.329533 1.000000 \n",
"area_mean -0.708984 0.987361 0.321086 0.986507 \n",
"smoothness_mean -0.330624 0.147104 -0.040419 0.183356 \n",
"compactness_mean -0.597576 0.505622 0.237019 0.556485 \n",
"concavity_mean -0.695750 0.677041 0.300536 0.716448 \n",
"concave points_mean -0.774410 0.821646 0.287618 0.849875 \n",
"symmetry_mean -0.333477 0.151195 0.067193 0.186306 \n",
"fractal_dimension_mean 0.027269 -0.290001 -0.080337 -0.245096 \n",
"radius_se -0.567328 0.678590 0.275973 0.691268 \n",
"texture_se 0.008299 -0.097230 0.386443 -0.086669 \n",
"perimeter_se -0.556055 0.674109 0.281590 0.693069 \n",
"area_se -0.548236 0.735868 0.259844 0.744983 \n",
"smoothness_se 0.012117 -0.167456 0.019777 -0.149510 \n",
"compactness_se -0.288713 0.204341 0.192029 0.248683 \n",
"concavity_se -0.255041 0.194151 0.145536 0.228160 \n",
"concave points_se -0.329287 0.325042 0.153373 0.353151 \n",
"symmetry_se 0.025127 -0.117235 -0.017239 -0.093748 \n",
"fractal_dimension_se -0.091669 -0.009516 0.079210 0.027147 \n",
"radius_worst -0.776453 0.969541 0.352578 0.969476 \n",
"texture_worst -0.456903 0.297000 0.912045 0.303038 \n",
"perimeter_worst -0.782914 0.965139 0.358040 0.970387 \n",
"area_worst -0.733825 0.941087 0.343546 0.941550 \n",
"smoothness_worst -0.419661 0.119031 0.075885 0.149530 \n",
"compactness_worst -0.590477 0.413065 0.278112 0.455370 \n",
"concavity_worst -0.659345 0.527379 0.300041 0.564315 \n",
"concave points_worst -0.791976 0.741494 0.293490 0.768429 \n",
"symmetry_worst -0.417123 0.164409 0.105824 0.189385 \n",
"fractal_dimension_worst -0.322046 0.009169 0.113425 0.052794 \n",
"\n",
" area_mean smoothness_mean compactness_mean \\\n",
"diagnosis -0.708984 -0.330624 -0.597576 \n",
"radius_mean 0.987361 0.147104 0.505622 \n",
"texture_mean 0.321086 -0.040419 0.237019 \n",
"perimeter_mean 0.986507 0.183356 0.556485 \n",
"area_mean 1.000000 0.151671 0.498038 \n",
"smoothness_mean 0.151671 1.000000 0.640292 \n",
"compactness_mean 0.498038 0.640292 1.000000 \n",
"concavity_mean 0.686308 0.501956 0.883729 \n",
"concave points_mean 0.822500 0.528881 0.829022 \n",
"symmetry_mean 0.154603 0.544531 0.602728 \n",
"fractal_dimension_mean -0.260436 0.556840 0.506156 \n",
"radius_se 0.732125 0.277600 0.497381 \n",
"texture_se -0.066119 0.059149 0.044875 \n",
"perimeter_se 0.726564 0.272183 0.548665 \n",
"area_se 0.800086 0.222595 0.456053 \n",
"smoothness_se -0.130192 0.268150 0.133392 \n",
"compactness_se 0.211126 0.313043 0.729164 \n",
"concavity_se 0.206659 0.249132 0.571996 \n",
"concave points_se 0.322610 0.282948 0.558232 \n",
"symmetry_se -0.084347 0.216286 0.236474 \n",
"fractal_dimension_se 0.002046 0.278176 0.496677 \n",
"radius_worst 0.962745 0.189984 0.534939 \n",
"texture_worst 0.287489 0.022669 0.247564 \n",
"perimeter_worst 0.959120 0.215574 0.589944 \n",
"area_worst 0.959213 0.182905 0.509449 \n",
"smoothness_worst 0.122984 0.777084 0.559318 \n",
"compactness_worst 0.389874 0.461971 0.864209 \n",
"concavity_worst 0.512943 0.420549 0.815163 \n",
"concave points_worst 0.719419 0.483773 0.813038 \n",
"symmetry_worst 0.143762 0.391210 0.508330 \n",
"fractal_dimension_worst 0.006854 0.498783 0.681343 \n",
"\n",
" concavity_mean concave points_mean symmetry_mean \\\n",
"diagnosis -0.695750 -0.774410 -0.333477 \n",
"radius_mean 0.677041 0.821646 0.151195 \n",
"texture_mean 0.300536 0.287618 0.067193 \n",
"perimeter_mean 0.716448 0.849875 0.186306 \n",
"area_mean 0.686308 0.822500 0.154603 \n",
"smoothness_mean 0.501956 0.528881 0.544531 \n",
"compactness_mean 0.883729 0.829022 0.602728 \n",
"concavity_mean 1.000000 0.918687 0.500739 \n",
"concave points_mean 0.918687 1.000000 0.460745 \n",
"symmetry_mean 0.500739 0.460745 1.000000 \n",
"fractal_dimension_mean 0.301176 0.142307 0.411393 \n",
"radius_se 0.631258 0.699764 0.304353 \n",
"texture_se 0.075893 0.025260 0.126154 \n",
"perimeter_se 0.659685 0.712066 0.313482 \n",
"area_se 0.617299 0.690642 0.226088 \n",
"smoothness_se 0.110590 0.051658 0.153937 \n",
"compactness_se 0.665217 0.490719 0.412566 \n",
"concavity_se 0.688883 0.441037 0.337796 \n",
"concave points_se 0.582869 0.522044 0.312608 \n",
"symmetry_se 0.178725 0.098510 0.446116 \n",
"fractal_dimension_se 0.427309 0.260200 0.333318 \n",
"radius_worst 0.688703 0.828401 0.189763 \n",
"texture_worst 0.298983 0.286828 0.084924 \n",
"perimeter_worst 0.729963 0.853642 0.222908 \n",
"area_worst 0.676656 0.807978 0.181731 \n",
"smoothness_worst 0.446247 0.446157 0.424135 \n",
"compactness_worst 0.755167 0.665741 0.474981 \n",
"concavity_worst 0.883088 0.748687 0.432004 \n",
"concave points_worst 0.859451 0.905058 0.428627 \n",
"symmetry_worst 0.409733 0.370446 0.698086 \n",
"fractal_dimension_worst 0.513497 0.366058 0.427079 \n",
"\n",
" ... radius_worst texture_worst perimeter_worst \\\n",
"diagnosis ... -0.776453 -0.456903 -0.782914 \n",
"radius_mean ... 0.969541 0.297000 0.965139 \n",
"texture_mean ... 0.352578 0.912045 0.358040 \n",
"perimeter_mean ... 0.969476 0.303038 0.970387 \n",
"area_mean ... 0.962745 0.287489 0.959120 \n",
"smoothness_mean ... 0.189984 0.022669 0.215574 \n",
"compactness_mean ... 0.534939 0.247564 0.589944 \n",
"concavity_mean ... 0.688703 0.298983 0.729963 \n",
"concave points_mean ... 0.828401 0.286828 0.853642 \n",
"symmetry_mean ... 0.189763 0.084924 0.222908 \n",
"fractal_dimension_mean ... -0.242125 -0.051211 -0.199454 \n",
"radius_se ... 0.714872 0.195092 0.719492 \n",
"texture_se ... -0.111592 0.409071 -0.102161 \n",
"perimeter_se ... 0.697133 0.200270 0.720966 \n",
"area_se ... 0.757372 0.196496 0.761213 \n",
"smoothness_se ... -0.152307 -0.008034 -0.136532 \n",
"compactness_se ... 0.202027 0.146121 0.256901 \n",
"concavity_se ... 0.187371 0.102586 0.227007 \n",
"concave points_se ... 0.307288 0.079999 0.342227 \n",
"symmetry_se ... -0.142188 -0.094773 -0.117920 \n",
"fractal_dimension_se ... -0.005759 0.008842 0.033505 \n",
"radius_worst ... 1.000000 0.359925 0.993707 \n",
"texture_worst ... 0.359925 1.000000 0.365098 \n",
"perimeter_worst ... 0.993707 0.365098 1.000000 \n",
"area_worst ... 0.984014 0.345842 0.977578 \n",
"smoothness_worst ... 0.215895 0.225808 0.235168 \n",
"compactness_worst ... 0.475348 0.361123 0.528876 \n",
"concavity_worst ... 0.574562 0.367625 0.618906 \n",
"concave points_worst ... 0.784946 0.358467 0.813826 \n",
"symmetry_worst ... 0.244034 0.234337 0.269788 \n",
"fractal_dimension_worst ... 0.092952 0.214237 0.137973 \n",
"\n",
" area_worst smoothness_worst compactness_worst \\\n",
"diagnosis -0.733825 -0.419661 -0.590477 \n",
"radius_mean 0.941087 0.119031 0.413065 \n",
"texture_mean 0.343546 0.075885 0.278112 \n",
"perimeter_mean 0.941550 0.149530 0.455370 \n",
"area_mean 0.959213 0.122984 0.389874 \n",
"smoothness_mean 0.182905 0.777084 0.461971 \n",
"compactness_mean 0.509449 0.559318 0.864209 \n",
"concavity_mean 0.676656 0.446247 0.755167 \n",
"concave points_mean 0.807978 0.446157 0.665741 \n",
"symmetry_mean 0.181731 0.424135 0.474981 \n",
"fractal_dimension_mean -0.218566 0.451897 0.410627 \n",
"radius_se 0.751431 0.140187 0.286572 \n",
"texture_se -0.083078 -0.073042 -0.092846 \n",
"perimeter_se 0.730647 0.125791 0.340951 \n",
"area_se 0.811408 0.124392 0.282682 \n",
"smoothness_se -0.115951 0.309111 0.033085 \n",
"compactness_se 0.196258 0.221708 0.672119 \n",
"concavity_se 0.187836 0.172332 0.491365 \n",
"concave points_se 0.297057 0.144157 0.389143 \n",
"symmetry_se -0.123926 -0.007047 0.070591 \n",
"fractal_dimension_se -0.000367 0.167714 0.381239 \n",
"radius_worst 0.984014 0.215895 0.475348 \n",
"texture_worst 0.345842 0.225808 0.361123 \n",
"perimeter_worst 0.977578 0.235168 0.528876 \n",
"area_worst 1.000000 0.209064 0.437727 \n",
"smoothness_worst 0.209064 1.000000 0.563176 \n",
"compactness_worst 0.437727 0.563176 1.000000 \n",
"concavity_worst 0.543774 0.513958 0.892859 \n",
"concave points_worst 0.745090 0.542461 0.800307 \n",
"symmetry_worst 0.209443 0.486361 0.614717 \n",
"fractal_dimension_worst 0.079535 0.608113 0.800823 \n",
"\n",
" concavity_worst concave points_worst \\\n",
"diagnosis -0.659345 -0.791976 \n",
"radius_mean 0.527379 0.741494 \n",
"texture_mean 0.300041 0.293490 \n",
"perimeter_mean 0.564315 0.768429 \n",
"area_mean 0.512943 0.719419 \n",
"smoothness_mean 0.420549 0.483773 \n",
"compactness_mean 0.815163 0.813038 \n",
"concavity_mean 0.883088 0.859451 \n",
"concave points_mean 0.748687 0.905058 \n",
"symmetry_mean 0.432004 0.428627 \n",
"fractal_dimension_mean 0.305228 0.148275 \n",
"radius_se 0.380110 0.528842 \n",
"texture_se -0.070533 -0.120938 \n",
"perimeter_se 0.418269 0.552873 \n",
"area_se 0.384820 0.535417 \n",
"smoothness_se 0.018581 -0.027878 \n",
"compactness_se 0.633328 0.477739 \n",
"concavity_se 0.662437 0.443167 \n",
"concave points_se 0.463453 0.502092 \n",
"symmetry_se 0.035520 -0.032924 \n",
"fractal_dimension_se 0.363578 0.203468 \n",
"radius_worst 0.574562 0.784946 \n",
"texture_worst 0.367625 0.358467 \n",
"perimeter_worst 0.618906 0.813826 \n",
"area_worst 0.543774 0.745090 \n",
"smoothness_worst 0.513958 0.542461 \n",
"compactness_worst 0.892859 0.800307 \n",
"concavity_worst 1.000000 0.855035 \n",
"concave points_worst 0.855035 1.000000 \n",
"symmetry_worst 0.530509 0.502487 \n",
"fractal_dimension_worst 0.682274 0.510454 \n",
"\n",
" symmetry_worst fractal_dimension_worst \n",
"diagnosis -0.417123 -0.322046 \n",
"radius_mean 0.164409 0.009169 \n",
"texture_mean 0.105824 0.113425 \n",
"perimeter_mean 0.189385 0.052794 \n",
"area_mean 0.143762 0.006854 \n",
"smoothness_mean 0.391210 0.498783 \n",
"compactness_mean 0.508330 0.681343 \n",
"concavity_mean 0.409733 0.513497 \n",
"concave points_mean 0.370446 0.366058 \n",
"symmetry_mean 0.698086 0.427079 \n",
"fractal_dimension_mean 0.283061 0.701592 \n",
"radius_se 0.094549 0.049351 \n",
"texture_se -0.127607 -0.044761 \n",
"perimeter_se 0.108877 0.083512 \n",
"area_se 0.072572 0.016228 \n",
"smoothness_se -0.032829 0.119089 \n",
"compactness_se 0.271770 0.574867 \n",
"concavity_se 0.199593 0.445392 \n",
"concave points_se 0.126508 0.237510 \n",
"symmetry_se 0.382952 0.084610 \n",
"fractal_dimension_se 0.098792 0.522940 \n",
"radius_worst 0.244034 0.092952 \n",
"texture_worst 0.234337 0.214237 \n",
"perimeter_worst 0.269788 0.137973 \n",
"area_worst 0.209443 0.079535 \n",
"smoothness_worst 0.486361 0.608113 \n",
"compactness_worst 0.614717 0.800823 \n",
"concavity_worst 0.530509 0.682274 \n",
"concave points_worst 0.502487 0.510454 \n",
"symmetry_worst 1.000000 0.527177 \n",
"fractal_dimension_worst 0.527177 1.000000 \n",
"\n",
"[31 rows x 31 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.corr()"
]
},
{
"cell_type": "markdown",
"id": "ee2f0278",
"metadata": {},
"source": [
"# Model Loading\n",
"In this classification, we used scikitlearn's algorithms for predicting the labels of M and B (now 0's and 1's).\n",
" \n",
"Having multiple models on their libary, we can run many models and compare it later."
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "4b48f208",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "d7b5908a",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.svm import SVC\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from sklearn.ensemble import AdaBoostClassifier\n",
"from sklearn.ensemble import GradientBoostingClassifier"
]
},
{
"cell_type": "markdown",
"id": "3d50df8d",
"metadata": {},
"source": [
"Splitting the dataset into Training and Test Sets"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "e32ec855",
"metadata": {},
"outputs": [],
"source": [
"# Split the data into features (X) and labels (y)\n",
"X = df.drop(columns=['diagnosis'])\n",
"y = df['diagnosis']\n",
"\n",
"# Split the data into training and testing sets\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n",
"# Define a dictionary to store results\n",
"results = {'Model': [], 'F1_score': [], 'Accuracy': [], 'Precision': [], 'Recall': []}"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "28ebe0cf",
"metadata": {},
"outputs": [],
"source": [
"models = {\n",
" 'Random Forest': RandomForestClassifier(),\n",
" 'Support Vector Machine': SVC(),\n",
" 'K-Nearest Neighbors': KNeighborsClassifier(),\n",
" 'Logistic Regression': LogisticRegression(),\n",
" 'Decision Tree': DecisionTreeClassifier(),\n",
" 'Naive Bayes': GaussianNB(),\n",
" 'AdaBoost': AdaBoostClassifier(),\n",
" 'Gradient Boosting': GradientBoostingClassifier()\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "17781adc",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\sang.yogi\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
" n_iter_i = _check_optimize_result(\n"
]
}
],
"source": [
"for model_name, model in models.items():\n",
" # Train the model\n",
" model.fit(X_train, y_train)\n",
"\n",
" # Make predictions\n",
" y_pred = model.predict(X_test)\n",
"\n",
" # Evaluate the model\n",
" f1 = f1_score(y_test, y_pred)\n",
" accuracy = accuracy_score(y_test, y_pred)\n",
" precision = precision_score(y_test, y_pred)\n",
" recall = recall_score(y_test, y_pred)\n",
"\n",
" # Store results in the dictionary\n",
" results['Model'].append(model_name)\n",
" results['F1_score'].append(f1)\n",
" results['Accuracy'].append(accuracy)\n",
" results['Precision'].append(precision)\n",
" results['Recall'].append(recall)\n",
"\n",
"# Create a DataFrame from the results dictionary\n",
"results_df = pd.DataFrame(results)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "c98e51c8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Model</th>\n",
" <th>F1_score</th>\n",
" <th>Accuracy</th>\n",
" <th>Precision</th>\n",
" <th>Recall</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Random Forest</td>\n",
" <td>0.972222</td>\n",
" <td>0.964912</td>\n",
" <td>0.958904</td>\n",
" <td>0.985915</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Support Vector Machine</td>\n",
" <td>0.959459</td>\n",
" <td>0.947368</td>\n",
" <td>0.922078</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>K-Nearest Neighbors</td>\n",
" <td>0.965986</td>\n",
" <td>0.956140</td>\n",
" <td>0.934211</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Random Forest</td>\n",
" <td>0.972222</td>\n",
" <td>0.964912</td>\n",
" <td>0.958904</td>\n",
" <td>0.985915</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Support Vector Machine</td>\n",
" <td>0.959459</td>\n",
" <td>0.947368</td>\n",
" <td>0.922078</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>K-Nearest Neighbors</td>\n",
" <td>0.965986</td>\n",
" <td>0.956140</td>\n",
" <td>0.934211</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Logistic Regression</td>\n",
" <td>0.972222</td>\n",
" <td>0.964912</td>\n",
" <td>0.958904</td>\n",
" <td>0.985915</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Decision Tree</td>\n",
" <td>0.951049</td>\n",
" <td>0.938596</td>\n",
" <td>0.944444</td>\n",
" <td>0.957746</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Naive Bayes</td>\n",
" <td>0.979310</td>\n",
" <td>0.973684</td>\n",
" <td>0.959459</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>AdaBoost</td>\n",
" <td>0.957746</td>\n",
" <td>0.947368</td>\n",
" <td>0.957746</td>\n",
" <td>0.957746</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Gradient Boosting</td>\n",
" <td>0.972222</td>\n",
" <td>0.964912</td>\n",
" <td>0.958904</td>\n",
" <td>0.985915</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Model F1_score Accuracy Precision Recall\n",
"0 Random Forest 0.972222 0.964912 0.958904 0.985915\n",
"1 Support Vector Machine 0.959459 0.947368 0.922078 1.000000\n",
"2 K-Nearest Neighbors 0.965986 0.956140 0.934211 1.000000\n",
"3 Random Forest 0.972222 0.964912 0.958904 0.985915\n",
"4 Support Vector Machine 0.959459 0.947368 0.922078 1.000000\n",
"5 K-Nearest Neighbors 0.965986 0.956140 0.934211 1.000000\n",
"6 Logistic Regression 0.972222 0.964912 0.958904 0.985915\n",
"7 Decision Tree 0.951049 0.938596 0.944444 0.957746\n",
"8 Naive Bayes 0.979310 0.973684 0.959459 1.000000\n",
"9 AdaBoost 0.957746 0.947368 0.957746 0.957746\n",
"10 Gradient Boosting 0.972222 0.964912 0.958904 0.985915"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "212e9b94",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}