diff --git a/Classification/Breast_Cancer_Wisconsin.ipynb b/Classification/Breast_Cancer_Wisconsin.ipynb
new file mode 100644
index 0000000..6f7b4db
--- /dev/null
+++ b/Classification/Breast_Cancer_Wisconsin.ipynb
@@ -0,0 +1,2802 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "bd00ebdc",
+ "metadata": {},
+ "source": [
+ "# Load Basic Libraries\n",
+ " Load some libaries to read and display the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "375f1a0c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "b8760991",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " diagnosis | \n",
+ " radius_mean | \n",
+ " texture_mean | \n",
+ " perimeter_mean | \n",
+ " area_mean | \n",
+ " smoothness_mean | \n",
+ " compactness_mean | \n",
+ " concavity_mean | \n",
+ " concave points_mean | \n",
+ " ... | \n",
+ " texture_worst | \n",
+ " perimeter_worst | \n",
+ " area_worst | \n",
+ " smoothness_worst | \n",
+ " compactness_worst | \n",
+ " concavity_worst | \n",
+ " concave points_worst | \n",
+ " symmetry_worst | \n",
+ " fractal_dimension_worst | \n",
+ " Unnamed: 32 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 842302 | \n",
+ " M | \n",
+ " 17.99 | \n",
+ " 10.38 | \n",
+ " 122.80 | \n",
+ " 1001.0 | \n",
+ " 0.11840 | \n",
+ " 0.27760 | \n",
+ " 0.3001 | \n",
+ " 0.14710 | \n",
+ " ... | \n",
+ " 17.33 | \n",
+ " 184.60 | \n",
+ " 2019.0 | \n",
+ " 0.1622 | \n",
+ " 0.6656 | \n",
+ " 0.7119 | \n",
+ " 0.2654 | \n",
+ " 0.4601 | \n",
+ " 0.11890 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 842517 | \n",
+ " M | \n",
+ " 20.57 | \n",
+ " 17.77 | \n",
+ " 132.90 | \n",
+ " 1326.0 | \n",
+ " 0.08474 | \n",
+ " 0.07864 | \n",
+ " 0.0869 | \n",
+ " 0.07017 | \n",
+ " ... | \n",
+ " 23.41 | \n",
+ " 158.80 | \n",
+ " 1956.0 | \n",
+ " 0.1238 | \n",
+ " 0.1866 | \n",
+ " 0.2416 | \n",
+ " 0.1860 | \n",
+ " 0.2750 | \n",
+ " 0.08902 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 84300903 | \n",
+ " M | \n",
+ " 19.69 | \n",
+ " 21.25 | \n",
+ " 130.00 | \n",
+ " 1203.0 | \n",
+ " 0.10960 | \n",
+ " 0.15990 | \n",
+ " 0.1974 | \n",
+ " 0.12790 | \n",
+ " ... | \n",
+ " 25.53 | \n",
+ " 152.50 | \n",
+ " 1709.0 | \n",
+ " 0.1444 | \n",
+ " 0.4245 | \n",
+ " 0.4504 | \n",
+ " 0.2430 | \n",
+ " 0.3613 | \n",
+ " 0.08758 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 84348301 | \n",
+ " M | \n",
+ " 11.42 | \n",
+ " 20.38 | \n",
+ " 77.58 | \n",
+ " 386.1 | \n",
+ " 0.14250 | \n",
+ " 0.28390 | \n",
+ " 0.2414 | \n",
+ " 0.10520 | \n",
+ " ... | \n",
+ " 26.50 | \n",
+ " 98.87 | \n",
+ " 567.7 | \n",
+ " 0.2098 | \n",
+ " 0.8663 | \n",
+ " 0.6869 | \n",
+ " 0.2575 | \n",
+ " 0.6638 | \n",
+ " 0.17300 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 84358402 | \n",
+ " M | \n",
+ " 20.29 | \n",
+ " 14.34 | \n",
+ " 135.10 | \n",
+ " 1297.0 | \n",
+ " 0.10030 | \n",
+ " 0.13280 | \n",
+ " 0.1980 | \n",
+ " 0.10430 | \n",
+ " ... | \n",
+ " 16.67 | \n",
+ " 152.20 | \n",
+ " 1575.0 | \n",
+ " 0.1374 | \n",
+ " 0.2050 | \n",
+ " 0.4000 | \n",
+ " 0.1625 | \n",
+ " 0.2364 | \n",
+ " 0.07678 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 33 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n",
+ "0 842302 M 17.99 10.38 122.80 1001.0 \n",
+ "1 842517 M 20.57 17.77 132.90 1326.0 \n",
+ "2 84300903 M 19.69 21.25 130.00 1203.0 \n",
+ "3 84348301 M 11.42 20.38 77.58 386.1 \n",
+ "4 84358402 M 20.29 14.34 135.10 1297.0 \n",
+ "\n",
+ " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n",
+ "0 0.11840 0.27760 0.3001 0.14710 \n",
+ "1 0.08474 0.07864 0.0869 0.07017 \n",
+ "2 0.10960 0.15990 0.1974 0.12790 \n",
+ "3 0.14250 0.28390 0.2414 0.10520 \n",
+ "4 0.10030 0.13280 0.1980 0.10430 \n",
+ "\n",
+ " ... texture_worst perimeter_worst area_worst smoothness_worst \\\n",
+ "0 ... 17.33 184.60 2019.0 0.1622 \n",
+ "1 ... 23.41 158.80 1956.0 0.1238 \n",
+ "2 ... 25.53 152.50 1709.0 0.1444 \n",
+ "3 ... 26.50 98.87 567.7 0.2098 \n",
+ "4 ... 16.67 152.20 1575.0 0.1374 \n",
+ "\n",
+ " compactness_worst concavity_worst concave points_worst symmetry_worst \\\n",
+ "0 0.6656 0.7119 0.2654 0.4601 \n",
+ "1 0.1866 0.2416 0.1860 0.2750 \n",
+ "2 0.4245 0.4504 0.2430 0.3613 \n",
+ "3 0.8663 0.6869 0.2575 0.6638 \n",
+ "4 0.2050 0.4000 0.1625 0.2364 \n",
+ "\n",
+ " fractal_dimension_worst Unnamed: 32 \n",
+ "0 0.11890 NaN \n",
+ "1 0.08902 NaN \n",
+ "2 0.08758 NaN \n",
+ "3 0.17300 NaN \n",
+ "4 0.07678 NaN \n",
+ "\n",
+ "[5 rows x 33 columns]"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.read_csv(\"https://raw.githubusercontent.com/youronlydimwit/Data_ScienceUse_Cases/main/Classification/Data/Breast_Cancer_Wisconsin.csv\")\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6eb25bbe",
+ "metadata": {},
+ "source": [
+ "# Preliminary check for missing values\n",
+ "Created a function to outputs a dataframe with columns:\n",
+ "- Column Name\n",
+ "- The Data type\n",
+ "- Count of missing data (Nulls)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "72bb3dcc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Missing data check function\n",
+ "def completeness_check(input_df):\n",
+ " # Create a new DataFrame\n",
+ " summary_df = pd.DataFrame(columns=['Column_Name', 'Data_Type', 'Missing_Data'])\n",
+ "\n",
+ " # Fill in the data\n",
+ " summary_df['Column_Name'] = input_df.columns\n",
+ " summary_df['Data_Type'] = input_df.dtypes.values\n",
+ " summary_df['Missing_Data'] = input_df.isnull().sum().values\n",
+ "\n",
+ " return summary_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "dd4b5d0f",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Column_Name | \n",
+ " Data_Type | \n",
+ " Missing_Data | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " id | \n",
+ " int64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " diagnosis | \n",
+ " object | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " radius_mean | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " texture_mean | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " perimeter_mean | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " area_mean | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " smoothness_mean | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " compactness_mean | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " concavity_mean | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " concave points_mean | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " symmetry_mean | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " fractal_dimension_mean | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " radius_se | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " texture_se | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " perimeter_se | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " area_se | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " smoothness_se | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " compactness_se | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " concavity_se | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " concave points_se | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " symmetry_se | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " fractal_dimension_se | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " radius_worst | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " texture_worst | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " perimeter_worst | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " area_worst | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " smoothness_worst | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " compactness_worst | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " concavity_worst | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 29 | \n",
+ " concave points_worst | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 30 | \n",
+ " symmetry_worst | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 31 | \n",
+ " fractal_dimension_worst | \n",
+ " float64 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " Unnamed: 32 | \n",
+ " float64 | \n",
+ " 569 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Column_Name Data_Type Missing_Data\n",
+ "0 id int64 0\n",
+ "1 diagnosis object 0\n",
+ "2 radius_mean float64 0\n",
+ "3 texture_mean float64 0\n",
+ "4 perimeter_mean float64 0\n",
+ "5 area_mean float64 0\n",
+ "6 smoothness_mean float64 0\n",
+ "7 compactness_mean float64 0\n",
+ "8 concavity_mean float64 0\n",
+ "9 concave points_mean float64 0\n",
+ "10 symmetry_mean float64 0\n",
+ "11 fractal_dimension_mean float64 0\n",
+ "12 radius_se float64 0\n",
+ "13 texture_se float64 0\n",
+ "14 perimeter_se float64 0\n",
+ "15 area_se float64 0\n",
+ "16 smoothness_se float64 0\n",
+ "17 compactness_se float64 0\n",
+ "18 concavity_se float64 0\n",
+ "19 concave points_se float64 0\n",
+ "20 symmetry_se float64 0\n",
+ "21 fractal_dimension_se float64 0\n",
+ "22 radius_worst float64 0\n",
+ "23 texture_worst float64 0\n",
+ "24 perimeter_worst float64 0\n",
+ "25 area_worst float64 0\n",
+ "26 smoothness_worst float64 0\n",
+ "27 compactness_worst float64 0\n",
+ "28 concavity_worst float64 0\n",
+ "29 concave points_worst float64 0\n",
+ "30 symmetry_worst float64 0\n",
+ "31 fractal_dimension_worst float64 0\n",
+ "32 Unnamed: 32 float64 569"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "completeness_check(df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d91cd816",
+ "metadata": {},
+ "source": [
+ "Just dropping unnecessary columns, for this context, it is column **'id'** and **'Unnamed: 32'**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "6ce4e125",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# We don't need these 2 columns\n",
+ "df = df.drop(['id','Unnamed: 32'], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "35e3462f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " diagnosis | \n",
+ " radius_mean | \n",
+ " texture_mean | \n",
+ " perimeter_mean | \n",
+ " area_mean | \n",
+ " smoothness_mean | \n",
+ " compactness_mean | \n",
+ " concavity_mean | \n",
+ " concave points_mean | \n",
+ " symmetry_mean | \n",
+ " ... | \n",
+ " radius_worst | \n",
+ " texture_worst | \n",
+ " perimeter_worst | \n",
+ " area_worst | \n",
+ " smoothness_worst | \n",
+ " compactness_worst | \n",
+ " concavity_worst | \n",
+ " concave points_worst | \n",
+ " symmetry_worst | \n",
+ " fractal_dimension_worst | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " M | \n",
+ " 17.99 | \n",
+ " 10.38 | \n",
+ " 122.80 | \n",
+ " 1001.0 | \n",
+ " 0.11840 | \n",
+ " 0.27760 | \n",
+ " 0.3001 | \n",
+ " 0.14710 | \n",
+ " 0.2419 | \n",
+ " ... | \n",
+ " 25.38 | \n",
+ " 17.33 | \n",
+ " 184.60 | \n",
+ " 2019.0 | \n",
+ " 0.1622 | \n",
+ " 0.6656 | \n",
+ " 0.7119 | \n",
+ " 0.2654 | \n",
+ " 0.4601 | \n",
+ " 0.11890 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " M | \n",
+ " 20.57 | \n",
+ " 17.77 | \n",
+ " 132.90 | \n",
+ " 1326.0 | \n",
+ " 0.08474 | \n",
+ " 0.07864 | \n",
+ " 0.0869 | \n",
+ " 0.07017 | \n",
+ " 0.1812 | \n",
+ " ... | \n",
+ " 24.99 | \n",
+ " 23.41 | \n",
+ " 158.80 | \n",
+ " 1956.0 | \n",
+ " 0.1238 | \n",
+ " 0.1866 | \n",
+ " 0.2416 | \n",
+ " 0.1860 | \n",
+ " 0.2750 | \n",
+ " 0.08902 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " M | \n",
+ " 19.69 | \n",
+ " 21.25 | \n",
+ " 130.00 | \n",
+ " 1203.0 | \n",
+ " 0.10960 | \n",
+ " 0.15990 | \n",
+ " 0.1974 | \n",
+ " 0.12790 | \n",
+ " 0.2069 | \n",
+ " ... | \n",
+ " 23.57 | \n",
+ " 25.53 | \n",
+ " 152.50 | \n",
+ " 1709.0 | \n",
+ " 0.1444 | \n",
+ " 0.4245 | \n",
+ " 0.4504 | \n",
+ " 0.2430 | \n",
+ " 0.3613 | \n",
+ " 0.08758 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " M | \n",
+ " 11.42 | \n",
+ " 20.38 | \n",
+ " 77.58 | \n",
+ " 386.1 | \n",
+ " 0.14250 | \n",
+ " 0.28390 | \n",
+ " 0.2414 | \n",
+ " 0.10520 | \n",
+ " 0.2597 | \n",
+ " ... | \n",
+ " 14.91 | \n",
+ " 26.50 | \n",
+ " 98.87 | \n",
+ " 567.7 | \n",
+ " 0.2098 | \n",
+ " 0.8663 | \n",
+ " 0.6869 | \n",
+ " 0.2575 | \n",
+ " 0.6638 | \n",
+ " 0.17300 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " M | \n",
+ " 20.29 | \n",
+ " 14.34 | \n",
+ " 135.10 | \n",
+ " 1297.0 | \n",
+ " 0.10030 | \n",
+ " 0.13280 | \n",
+ " 0.1980 | \n",
+ " 0.10430 | \n",
+ " 0.1809 | \n",
+ " ... | \n",
+ " 22.54 | \n",
+ " 16.67 | \n",
+ " 152.20 | \n",
+ " 1575.0 | \n",
+ " 0.1374 | \n",
+ " 0.2050 | \n",
+ " 0.4000 | \n",
+ " 0.1625 | \n",
+ " 0.2364 | \n",
+ " 0.07678 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 31 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n",
+ "0 M 17.99 10.38 122.80 1001.0 \n",
+ "1 M 20.57 17.77 132.90 1326.0 \n",
+ "2 M 19.69 21.25 130.00 1203.0 \n",
+ "3 M 11.42 20.38 77.58 386.1 \n",
+ "4 M 20.29 14.34 135.10 1297.0 \n",
+ "\n",
+ " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n",
+ "0 0.11840 0.27760 0.3001 0.14710 \n",
+ "1 0.08474 0.07864 0.0869 0.07017 \n",
+ "2 0.10960 0.15990 0.1974 0.12790 \n",
+ "3 0.14250 0.28390 0.2414 0.10520 \n",
+ "4 0.10030 0.13280 0.1980 0.10430 \n",
+ "\n",
+ " symmetry_mean ... radius_worst texture_worst perimeter_worst \\\n",
+ "0 0.2419 ... 25.38 17.33 184.60 \n",
+ "1 0.1812 ... 24.99 23.41 158.80 \n",
+ "2 0.2069 ... 23.57 25.53 152.50 \n",
+ "3 0.2597 ... 14.91 26.50 98.87 \n",
+ "4 0.1809 ... 22.54 16.67 152.20 \n",
+ "\n",
+ " area_worst smoothness_worst compactness_worst concavity_worst \\\n",
+ "0 2019.0 0.1622 0.6656 0.7119 \n",
+ "1 1956.0 0.1238 0.1866 0.2416 \n",
+ "2 1709.0 0.1444 0.4245 0.4504 \n",
+ "3 567.7 0.2098 0.8663 0.6869 \n",
+ "4 1575.0 0.1374 0.2050 0.4000 \n",
+ "\n",
+ " concave points_worst symmetry_worst fractal_dimension_worst \n",
+ "0 0.2654 0.4601 0.11890 \n",
+ "1 0.1860 0.2750 0.08902 \n",
+ "2 0.2430 0.3613 0.08758 \n",
+ "3 0.2575 0.6638 0.17300 \n",
+ "4 0.1625 0.2364 0.07678 \n",
+ "\n",
+ "[5 rows x 31 columns]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4b2e562f",
+ "metadata": {},
+ "source": [
+ "# Reformatting\n",
+ "From the source, the data is assumed already cleaned. Then, the _float_ in this dataset is being reformatted as **#.##** format for easier reading."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "b7eca327",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def format_float_columns(df):\n",
+ " # Get columns with float64 dtype\n",
+ " float_columns = df.select_dtypes(include='float64').columns\n",
+ " \n",
+ " # Format each column without changing the data type\n",
+ " for col in float_columns:\n",
+ " df[col] = df[col].round(2)\n",
+ " \n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "adc5f2f3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " diagnosis | \n",
+ " radius_mean | \n",
+ " texture_mean | \n",
+ " perimeter_mean | \n",
+ " area_mean | \n",
+ " smoothness_mean | \n",
+ " compactness_mean | \n",
+ " concavity_mean | \n",
+ " concave points_mean | \n",
+ " symmetry_mean | \n",
+ " ... | \n",
+ " radius_worst | \n",
+ " texture_worst | \n",
+ " perimeter_worst | \n",
+ " area_worst | \n",
+ " smoothness_worst | \n",
+ " compactness_worst | \n",
+ " concavity_worst | \n",
+ " concave points_worst | \n",
+ " symmetry_worst | \n",
+ " fractal_dimension_worst | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " M | \n",
+ " 17.99 | \n",
+ " 10.38 | \n",
+ " 122.80 | \n",
+ " 1001.0 | \n",
+ " 0.12 | \n",
+ " 0.28 | \n",
+ " 0.30 | \n",
+ " 0.15 | \n",
+ " 0.24 | \n",
+ " ... | \n",
+ " 25.38 | \n",
+ " 17.33 | \n",
+ " 184.60 | \n",
+ " 2019.0 | \n",
+ " 0.16 | \n",
+ " 0.67 | \n",
+ " 0.71 | \n",
+ " 0.27 | \n",
+ " 0.46 | \n",
+ " 0.12 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " M | \n",
+ " 20.57 | \n",
+ " 17.77 | \n",
+ " 132.90 | \n",
+ " 1326.0 | \n",
+ " 0.08 | \n",
+ " 0.08 | \n",
+ " 0.09 | \n",
+ " 0.07 | \n",
+ " 0.18 | \n",
+ " ... | \n",
+ " 24.99 | \n",
+ " 23.41 | \n",
+ " 158.80 | \n",
+ " 1956.0 | \n",
+ " 0.12 | \n",
+ " 0.19 | \n",
+ " 0.24 | \n",
+ " 0.19 | \n",
+ " 0.28 | \n",
+ " 0.09 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " M | \n",
+ " 19.69 | \n",
+ " 21.25 | \n",
+ " 130.00 | \n",
+ " 1203.0 | \n",
+ " 0.11 | \n",
+ " 0.16 | \n",
+ " 0.20 | \n",
+ " 0.13 | \n",
+ " 0.21 | \n",
+ " ... | \n",
+ " 23.57 | \n",
+ " 25.53 | \n",
+ " 152.50 | \n",
+ " 1709.0 | \n",
+ " 0.14 | \n",
+ " 0.42 | \n",
+ " 0.45 | \n",
+ " 0.24 | \n",
+ " 0.36 | \n",
+ " 0.09 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " M | \n",
+ " 11.42 | \n",
+ " 20.38 | \n",
+ " 77.58 | \n",
+ " 386.1 | \n",
+ " 0.14 | \n",
+ " 0.28 | \n",
+ " 0.24 | \n",
+ " 0.11 | \n",
+ " 0.26 | \n",
+ " ... | \n",
+ " 14.91 | \n",
+ " 26.50 | \n",
+ " 98.87 | \n",
+ " 567.7 | \n",
+ " 0.21 | \n",
+ " 0.87 | \n",
+ " 0.69 | \n",
+ " 0.26 | \n",
+ " 0.66 | \n",
+ " 0.17 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " M | \n",
+ " 20.29 | \n",
+ " 14.34 | \n",
+ " 135.10 | \n",
+ " 1297.0 | \n",
+ " 0.10 | \n",
+ " 0.13 | \n",
+ " 0.20 | \n",
+ " 0.10 | \n",
+ " 0.18 | \n",
+ " ... | \n",
+ " 22.54 | \n",
+ " 16.67 | \n",
+ " 152.20 | \n",
+ " 1575.0 | \n",
+ " 0.14 | \n",
+ " 0.20 | \n",
+ " 0.40 | \n",
+ " 0.16 | \n",
+ " 0.24 | \n",
+ " 0.08 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 564 | \n",
+ " M | \n",
+ " 21.56 | \n",
+ " 22.39 | \n",
+ " 142.00 | \n",
+ " 1479.0 | \n",
+ " 0.11 | \n",
+ " 0.12 | \n",
+ " 0.24 | \n",
+ " 0.14 | \n",
+ " 0.17 | \n",
+ " ... | \n",
+ " 25.45 | \n",
+ " 26.40 | \n",
+ " 166.10 | \n",
+ " 2027.0 | \n",
+ " 0.14 | \n",
+ " 0.21 | \n",
+ " 0.41 | \n",
+ " 0.22 | \n",
+ " 0.21 | \n",
+ " 0.07 | \n",
+ "
\n",
+ " \n",
+ " | 565 | \n",
+ " M | \n",
+ " 20.13 | \n",
+ " 28.25 | \n",
+ " 131.20 | \n",
+ " 1261.0 | \n",
+ " 0.10 | \n",
+ " 0.10 | \n",
+ " 0.14 | \n",
+ " 0.10 | \n",
+ " 0.18 | \n",
+ " ... | \n",
+ " 23.69 | \n",
+ " 38.25 | \n",
+ " 155.00 | \n",
+ " 1731.0 | \n",
+ " 0.12 | \n",
+ " 0.19 | \n",
+ " 0.32 | \n",
+ " 0.16 | \n",
+ " 0.26 | \n",
+ " 0.07 | \n",
+ "
\n",
+ " \n",
+ " | 566 | \n",
+ " M | \n",
+ " 16.60 | \n",
+ " 28.08 | \n",
+ " 108.30 | \n",
+ " 858.1 | \n",
+ " 0.08 | \n",
+ " 0.10 | \n",
+ " 0.09 | \n",
+ " 0.05 | \n",
+ " 0.16 | \n",
+ " ... | \n",
+ " 18.98 | \n",
+ " 34.12 | \n",
+ " 126.70 | \n",
+ " 1124.0 | \n",
+ " 0.11 | \n",
+ " 0.31 | \n",
+ " 0.34 | \n",
+ " 0.14 | \n",
+ " 0.22 | \n",
+ " 0.08 | \n",
+ "
\n",
+ " \n",
+ " | 567 | \n",
+ " M | \n",
+ " 20.60 | \n",
+ " 29.33 | \n",
+ " 140.10 | \n",
+ " 1265.0 | \n",
+ " 0.12 | \n",
+ " 0.28 | \n",
+ " 0.35 | \n",
+ " 0.15 | \n",
+ " 0.24 | \n",
+ " ... | \n",
+ " 25.74 | \n",
+ " 39.42 | \n",
+ " 184.60 | \n",
+ " 1821.0 | \n",
+ " 0.16 | \n",
+ " 0.87 | \n",
+ " 0.94 | \n",
+ " 0.26 | \n",
+ " 0.41 | \n",
+ " 0.12 | \n",
+ "
\n",
+ " \n",
+ " | 568 | \n",
+ " B | \n",
+ " 7.76 | \n",
+ " 24.54 | \n",
+ " 47.92 | \n",
+ " 181.0 | \n",
+ " 0.05 | \n",
+ " 0.04 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.16 | \n",
+ " ... | \n",
+ " 9.46 | \n",
+ " 30.37 | \n",
+ " 59.16 | \n",
+ " 268.6 | \n",
+ " 0.09 | \n",
+ " 0.06 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.29 | \n",
+ " 0.07 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
569 rows × 31 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n",
+ "0 M 17.99 10.38 122.80 1001.0 \n",
+ "1 M 20.57 17.77 132.90 1326.0 \n",
+ "2 M 19.69 21.25 130.00 1203.0 \n",
+ "3 M 11.42 20.38 77.58 386.1 \n",
+ "4 M 20.29 14.34 135.10 1297.0 \n",
+ ".. ... ... ... ... ... \n",
+ "564 M 21.56 22.39 142.00 1479.0 \n",
+ "565 M 20.13 28.25 131.20 1261.0 \n",
+ "566 M 16.60 28.08 108.30 858.1 \n",
+ "567 M 20.60 29.33 140.10 1265.0 \n",
+ "568 B 7.76 24.54 47.92 181.0 \n",
+ "\n",
+ " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n",
+ "0 0.12 0.28 0.30 0.15 \n",
+ "1 0.08 0.08 0.09 0.07 \n",
+ "2 0.11 0.16 0.20 0.13 \n",
+ "3 0.14 0.28 0.24 0.11 \n",
+ "4 0.10 0.13 0.20 0.10 \n",
+ ".. ... ... ... ... \n",
+ "564 0.11 0.12 0.24 0.14 \n",
+ "565 0.10 0.10 0.14 0.10 \n",
+ "566 0.08 0.10 0.09 0.05 \n",
+ "567 0.12 0.28 0.35 0.15 \n",
+ "568 0.05 0.04 0.00 0.00 \n",
+ "\n",
+ " symmetry_mean ... radius_worst texture_worst perimeter_worst \\\n",
+ "0 0.24 ... 25.38 17.33 184.60 \n",
+ "1 0.18 ... 24.99 23.41 158.80 \n",
+ "2 0.21 ... 23.57 25.53 152.50 \n",
+ "3 0.26 ... 14.91 26.50 98.87 \n",
+ "4 0.18 ... 22.54 16.67 152.20 \n",
+ ".. ... ... ... ... ... \n",
+ "564 0.17 ... 25.45 26.40 166.10 \n",
+ "565 0.18 ... 23.69 38.25 155.00 \n",
+ "566 0.16 ... 18.98 34.12 126.70 \n",
+ "567 0.24 ... 25.74 39.42 184.60 \n",
+ "568 0.16 ... 9.46 30.37 59.16 \n",
+ "\n",
+ " area_worst smoothness_worst compactness_worst concavity_worst \\\n",
+ "0 2019.0 0.16 0.67 0.71 \n",
+ "1 1956.0 0.12 0.19 0.24 \n",
+ "2 1709.0 0.14 0.42 0.45 \n",
+ "3 567.7 0.21 0.87 0.69 \n",
+ "4 1575.0 0.14 0.20 0.40 \n",
+ ".. ... ... ... ... \n",
+ "564 2027.0 0.14 0.21 0.41 \n",
+ "565 1731.0 0.12 0.19 0.32 \n",
+ "566 1124.0 0.11 0.31 0.34 \n",
+ "567 1821.0 0.16 0.87 0.94 \n",
+ "568 268.6 0.09 0.06 0.00 \n",
+ "\n",
+ " concave points_worst symmetry_worst fractal_dimension_worst \n",
+ "0 0.27 0.46 0.12 \n",
+ "1 0.19 0.28 0.09 \n",
+ "2 0.24 0.36 0.09 \n",
+ "3 0.26 0.66 0.17 \n",
+ "4 0.16 0.24 0.08 \n",
+ ".. ... ... ... \n",
+ "564 0.22 0.21 0.07 \n",
+ "565 0.16 0.26 0.07 \n",
+ "566 0.14 0.22 0.08 \n",
+ "567 0.26 0.41 0.12 \n",
+ "568 0.00 0.29 0.07 \n",
+ "\n",
+ "[569 rows x 31 columns]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "format_float_columns(df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c9aaef3a",
+ "metadata": {},
+ "source": [
+ "# Encoding\n",
+ "Since the labels to be predicted only has **M for Malignant** and **B for Benign**, we can encode it to become 0 and 1 respectively.\n",
+ "I also changed the type to float, so the data types are all the same type"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "60870d9a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " diagnosis | \n",
+ " radius_mean | \n",
+ " texture_mean | \n",
+ " perimeter_mean | \n",
+ " area_mean | \n",
+ " smoothness_mean | \n",
+ " compactness_mean | \n",
+ " concavity_mean | \n",
+ " concave points_mean | \n",
+ " symmetry_mean | \n",
+ " ... | \n",
+ " radius_worst | \n",
+ " texture_worst | \n",
+ " perimeter_worst | \n",
+ " area_worst | \n",
+ " smoothness_worst | \n",
+ " compactness_worst | \n",
+ " concavity_worst | \n",
+ " concave points_worst | \n",
+ " symmetry_worst | \n",
+ " fractal_dimension_worst | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.0 | \n",
+ " 17.99 | \n",
+ " 10.38 | \n",
+ " 122.8 | \n",
+ " 1001.0 | \n",
+ " 0.12 | \n",
+ " 0.28 | \n",
+ " 0.30 | \n",
+ " 0.15 | \n",
+ " 0.24 | \n",
+ " ... | \n",
+ " 25.38 | \n",
+ " 17.33 | \n",
+ " 184.6 | \n",
+ " 2019.0 | \n",
+ " 0.16 | \n",
+ " 0.67 | \n",
+ " 0.71 | \n",
+ " 0.27 | \n",
+ " 0.46 | \n",
+ " 0.12 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0.0 | \n",
+ " 20.57 | \n",
+ " 17.77 | \n",
+ " 132.9 | \n",
+ " 1326.0 | \n",
+ " 0.08 | \n",
+ " 0.08 | \n",
+ " 0.09 | \n",
+ " 0.07 | \n",
+ " 0.18 | \n",
+ " ... | \n",
+ " 24.99 | \n",
+ " 23.41 | \n",
+ " 158.8 | \n",
+ " 1956.0 | \n",
+ " 0.12 | \n",
+ " 0.19 | \n",
+ " 0.24 | \n",
+ " 0.19 | \n",
+ " 0.28 | \n",
+ " 0.09 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0.0 | \n",
+ " 19.69 | \n",
+ " 21.25 | \n",
+ " 130.0 | \n",
+ " 1203.0 | \n",
+ " 0.11 | \n",
+ " 0.16 | \n",
+ " 0.20 | \n",
+ " 0.13 | \n",
+ " 0.21 | \n",
+ " ... | \n",
+ " 23.57 | \n",
+ " 25.53 | \n",
+ " 152.5 | \n",
+ " 1709.0 | \n",
+ " 0.14 | \n",
+ " 0.42 | \n",
+ " 0.45 | \n",
+ " 0.24 | \n",
+ " 0.36 | \n",
+ " 0.09 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
3 rows × 31 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n",
+ "0 0.0 17.99 10.38 122.8 1001.0 \n",
+ "1 0.0 20.57 17.77 132.9 1326.0 \n",
+ "2 0.0 19.69 21.25 130.0 1203.0 \n",
+ "\n",
+ " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n",
+ "0 0.12 0.28 0.30 0.15 \n",
+ "1 0.08 0.08 0.09 0.07 \n",
+ "2 0.11 0.16 0.20 0.13 \n",
+ "\n",
+ " symmetry_mean ... radius_worst texture_worst perimeter_worst \\\n",
+ "0 0.24 ... 25.38 17.33 184.6 \n",
+ "1 0.18 ... 24.99 23.41 158.8 \n",
+ "2 0.21 ... 23.57 25.53 152.5 \n",
+ "\n",
+ " area_worst smoothness_worst compactness_worst concavity_worst \\\n",
+ "0 2019.0 0.16 0.67 0.71 \n",
+ "1 1956.0 0.12 0.19 0.24 \n",
+ "2 1709.0 0.14 0.42 0.45 \n",
+ "\n",
+ " concave points_worst symmetry_worst fractal_dimension_worst \n",
+ "0 0.27 0.46 0.12 \n",
+ "1 0.19 0.28 0.09 \n",
+ "2 0.24 0.36 0.09 \n",
+ "\n",
+ "[3 rows x 31 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Replace 'M' with 0 and 'B' with 1 in the 'Column' column\n",
+ "df['diagnosis'] = df['diagnosis'].replace({'M': 0, 'B': 1})\n",
+ "df['diagnosis'] = df['diagnosis'].astype(float)\n",
+ "df.head(3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ecbe4b22",
+ "metadata": {},
+ "source": [
+ "Correct data type"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "7e437274",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dtype('float64')"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df['diagnosis'].dtypes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5b18b919",
+ "metadata": {},
+ "source": [
+ "Running a correlation matrix to get a glimpse of who's affecting who the most."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "2c3b26b6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " diagnosis | \n",
+ " radius_mean | \n",
+ " texture_mean | \n",
+ " perimeter_mean | \n",
+ " area_mean | \n",
+ " smoothness_mean | \n",
+ " compactness_mean | \n",
+ " concavity_mean | \n",
+ " concave points_mean | \n",
+ " symmetry_mean | \n",
+ " ... | \n",
+ " radius_worst | \n",
+ " texture_worst | \n",
+ " perimeter_worst | \n",
+ " area_worst | \n",
+ " smoothness_worst | \n",
+ " compactness_worst | \n",
+ " concavity_worst | \n",
+ " concave points_worst | \n",
+ " symmetry_worst | \n",
+ " fractal_dimension_worst | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | diagnosis | \n",
+ " 1.000000 | \n",
+ " -0.730032 | \n",
+ " -0.415185 | \n",
+ " -0.742636 | \n",
+ " -0.708984 | \n",
+ " -0.330624 | \n",
+ " -0.597576 | \n",
+ " -0.695750 | \n",
+ " -0.774410 | \n",
+ " -0.333477 | \n",
+ " ... | \n",
+ " -0.776453 | \n",
+ " -0.456903 | \n",
+ " -0.782914 | \n",
+ " -0.733825 | \n",
+ " -0.419661 | \n",
+ " -0.590477 | \n",
+ " -0.659345 | \n",
+ " -0.791976 | \n",
+ " -0.417123 | \n",
+ " -0.322046 | \n",
+ "
\n",
+ " \n",
+ " | radius_mean | \n",
+ " -0.730032 | \n",
+ " 1.000000 | \n",
+ " 0.323777 | \n",
+ " 0.997856 | \n",
+ " 0.987361 | \n",
+ " 0.147104 | \n",
+ " 0.505622 | \n",
+ " 0.677041 | \n",
+ " 0.821646 | \n",
+ " 0.151195 | \n",
+ " ... | \n",
+ " 0.969541 | \n",
+ " 0.297000 | \n",
+ " 0.965139 | \n",
+ " 0.941087 | \n",
+ " 0.119031 | \n",
+ " 0.413065 | \n",
+ " 0.527379 | \n",
+ " 0.741494 | \n",
+ " 0.164409 | \n",
+ " 0.009169 | \n",
+ "
\n",
+ " \n",
+ " | texture_mean | \n",
+ " -0.415185 | \n",
+ " 0.323777 | \n",
+ " 1.000000 | \n",
+ " 0.329533 | \n",
+ " 0.321086 | \n",
+ " -0.040419 | \n",
+ " 0.237019 | \n",
+ " 0.300536 | \n",
+ " 0.287618 | \n",
+ " 0.067193 | \n",
+ " ... | \n",
+ " 0.352578 | \n",
+ " 0.912045 | \n",
+ " 0.358040 | \n",
+ " 0.343546 | \n",
+ " 0.075885 | \n",
+ " 0.278112 | \n",
+ " 0.300041 | \n",
+ " 0.293490 | \n",
+ " 0.105824 | \n",
+ " 0.113425 | \n",
+ "
\n",
+ " \n",
+ " | perimeter_mean | \n",
+ " -0.742636 | \n",
+ " 0.997856 | \n",
+ " 0.329533 | \n",
+ " 1.000000 | \n",
+ " 0.986507 | \n",
+ " 0.183356 | \n",
+ " 0.556485 | \n",
+ " 0.716448 | \n",
+ " 0.849875 | \n",
+ " 0.186306 | \n",
+ " ... | \n",
+ " 0.969476 | \n",
+ " 0.303038 | \n",
+ " 0.970387 | \n",
+ " 0.941550 | \n",
+ " 0.149530 | \n",
+ " 0.455370 | \n",
+ " 0.564315 | \n",
+ " 0.768429 | \n",
+ " 0.189385 | \n",
+ " 0.052794 | \n",
+ "
\n",
+ " \n",
+ " | area_mean | \n",
+ " -0.708984 | \n",
+ " 0.987361 | \n",
+ " 0.321086 | \n",
+ " 0.986507 | \n",
+ " 1.000000 | \n",
+ " 0.151671 | \n",
+ " 0.498038 | \n",
+ " 0.686308 | \n",
+ " 0.822500 | \n",
+ " 0.154603 | \n",
+ " ... | \n",
+ " 0.962745 | \n",
+ " 0.287489 | \n",
+ " 0.959120 | \n",
+ " 0.959213 | \n",
+ " 0.122984 | \n",
+ " 0.389874 | \n",
+ " 0.512943 | \n",
+ " 0.719419 | \n",
+ " 0.143762 | \n",
+ " 0.006854 | \n",
+ "
\n",
+ " \n",
+ " | smoothness_mean | \n",
+ " -0.330624 | \n",
+ " 0.147104 | \n",
+ " -0.040419 | \n",
+ " 0.183356 | \n",
+ " 0.151671 | \n",
+ " 1.000000 | \n",
+ " 0.640292 | \n",
+ " 0.501956 | \n",
+ " 0.528881 | \n",
+ " 0.544531 | \n",
+ " ... | \n",
+ " 0.189984 | \n",
+ " 0.022669 | \n",
+ " 0.215574 | \n",
+ " 0.182905 | \n",
+ " 0.777084 | \n",
+ " 0.461971 | \n",
+ " 0.420549 | \n",
+ " 0.483773 | \n",
+ " 0.391210 | \n",
+ " 0.498783 | \n",
+ "
\n",
+ " \n",
+ " | compactness_mean | \n",
+ " -0.597576 | \n",
+ " 0.505622 | \n",
+ " 0.237019 | \n",
+ " 0.556485 | \n",
+ " 0.498038 | \n",
+ " 0.640292 | \n",
+ " 1.000000 | \n",
+ " 0.883729 | \n",
+ " 0.829022 | \n",
+ " 0.602728 | \n",
+ " ... | \n",
+ " 0.534939 | \n",
+ " 0.247564 | \n",
+ " 0.589944 | \n",
+ " 0.509449 | \n",
+ " 0.559318 | \n",
+ " 0.864209 | \n",
+ " 0.815163 | \n",
+ " 0.813038 | \n",
+ " 0.508330 | \n",
+ " 0.681343 | \n",
+ "
\n",
+ " \n",
+ " | concavity_mean | \n",
+ " -0.695750 | \n",
+ " 0.677041 | \n",
+ " 0.300536 | \n",
+ " 0.716448 | \n",
+ " 0.686308 | \n",
+ " 0.501956 | \n",
+ " 0.883729 | \n",
+ " 1.000000 | \n",
+ " 0.918687 | \n",
+ " 0.500739 | \n",
+ " ... | \n",
+ " 0.688703 | \n",
+ " 0.298983 | \n",
+ " 0.729963 | \n",
+ " 0.676656 | \n",
+ " 0.446247 | \n",
+ " 0.755167 | \n",
+ " 0.883088 | \n",
+ " 0.859451 | \n",
+ " 0.409733 | \n",
+ " 0.513497 | \n",
+ "
\n",
+ " \n",
+ " | concave points_mean | \n",
+ " -0.774410 | \n",
+ " 0.821646 | \n",
+ " 0.287618 | \n",
+ " 0.849875 | \n",
+ " 0.822500 | \n",
+ " 0.528881 | \n",
+ " 0.829022 | \n",
+ " 0.918687 | \n",
+ " 1.000000 | \n",
+ " 0.460745 | \n",
+ " ... | \n",
+ " 0.828401 | \n",
+ " 0.286828 | \n",
+ " 0.853642 | \n",
+ " 0.807978 | \n",
+ " 0.446157 | \n",
+ " 0.665741 | \n",
+ " 0.748687 | \n",
+ " 0.905058 | \n",
+ " 0.370446 | \n",
+ " 0.366058 | \n",
+ "
\n",
+ " \n",
+ " | symmetry_mean | \n",
+ " -0.333477 | \n",
+ " 0.151195 | \n",
+ " 0.067193 | \n",
+ " 0.186306 | \n",
+ " 0.154603 | \n",
+ " 0.544531 | \n",
+ " 0.602728 | \n",
+ " 0.500739 | \n",
+ " 0.460745 | \n",
+ " 1.000000 | \n",
+ " ... | \n",
+ " 0.189763 | \n",
+ " 0.084924 | \n",
+ " 0.222908 | \n",
+ " 0.181731 | \n",
+ " 0.424135 | \n",
+ " 0.474981 | \n",
+ " 0.432004 | \n",
+ " 0.428627 | \n",
+ " 0.698086 | \n",
+ " 0.427079 | \n",
+ "
\n",
+ " \n",
+ " | fractal_dimension_mean | \n",
+ " 0.027269 | \n",
+ " -0.290001 | \n",
+ " -0.080337 | \n",
+ " -0.245096 | \n",
+ " -0.260436 | \n",
+ " 0.556840 | \n",
+ " 0.506156 | \n",
+ " 0.301176 | \n",
+ " 0.142307 | \n",
+ " 0.411393 | \n",
+ " ... | \n",
+ " -0.242125 | \n",
+ " -0.051211 | \n",
+ " -0.199454 | \n",
+ " -0.218566 | \n",
+ " 0.451897 | \n",
+ " 0.410627 | \n",
+ " 0.305228 | \n",
+ " 0.148275 | \n",
+ " 0.283061 | \n",
+ " 0.701592 | \n",
+ "
\n",
+ " \n",
+ " | radius_se | \n",
+ " -0.567328 | \n",
+ " 0.678590 | \n",
+ " 0.275973 | \n",
+ " 0.691268 | \n",
+ " 0.732125 | \n",
+ " 0.277600 | \n",
+ " 0.497381 | \n",
+ " 0.631258 | \n",
+ " 0.699764 | \n",
+ " 0.304353 | \n",
+ " ... | \n",
+ " 0.714872 | \n",
+ " 0.195092 | \n",
+ " 0.719492 | \n",
+ " 0.751431 | \n",
+ " 0.140187 | \n",
+ " 0.286572 | \n",
+ " 0.380110 | \n",
+ " 0.528842 | \n",
+ " 0.094549 | \n",
+ " 0.049351 | \n",
+ "
\n",
+ " \n",
+ " | texture_se | \n",
+ " 0.008299 | \n",
+ " -0.097230 | \n",
+ " 0.386443 | \n",
+ " -0.086669 | \n",
+ " -0.066119 | \n",
+ " 0.059149 | \n",
+ " 0.044875 | \n",
+ " 0.075893 | \n",
+ " 0.025260 | \n",
+ " 0.126154 | \n",
+ " ... | \n",
+ " -0.111592 | \n",
+ " 0.409071 | \n",
+ " -0.102161 | \n",
+ " -0.083078 | \n",
+ " -0.073042 | \n",
+ " -0.092846 | \n",
+ " -0.070533 | \n",
+ " -0.120938 | \n",
+ " -0.127607 | \n",
+ " -0.044761 | \n",
+ "
\n",
+ " \n",
+ " | perimeter_se | \n",
+ " -0.556055 | \n",
+ " 0.674109 | \n",
+ " 0.281590 | \n",
+ " 0.693069 | \n",
+ " 0.726564 | \n",
+ " 0.272183 | \n",
+ " 0.548665 | \n",
+ " 0.659685 | \n",
+ " 0.712066 | \n",
+ " 0.313482 | \n",
+ " ... | \n",
+ " 0.697133 | \n",
+ " 0.200270 | \n",
+ " 0.720966 | \n",
+ " 0.730647 | \n",
+ " 0.125791 | \n",
+ " 0.340951 | \n",
+ " 0.418269 | \n",
+ " 0.552873 | \n",
+ " 0.108877 | \n",
+ " 0.083512 | \n",
+ "
\n",
+ " \n",
+ " | area_se | \n",
+ " -0.548236 | \n",
+ " 0.735868 | \n",
+ " 0.259844 | \n",
+ " 0.744983 | \n",
+ " 0.800086 | \n",
+ " 0.222595 | \n",
+ " 0.456053 | \n",
+ " 0.617299 | \n",
+ " 0.690642 | \n",
+ " 0.226088 | \n",
+ " ... | \n",
+ " 0.757372 | \n",
+ " 0.196496 | \n",
+ " 0.761213 | \n",
+ " 0.811408 | \n",
+ " 0.124392 | \n",
+ " 0.282682 | \n",
+ " 0.384820 | \n",
+ " 0.535417 | \n",
+ " 0.072572 | \n",
+ " 0.016228 | \n",
+ "
\n",
+ " \n",
+ " | smoothness_se | \n",
+ " 0.012117 | \n",
+ " -0.167456 | \n",
+ " 0.019777 | \n",
+ " -0.149510 | \n",
+ " -0.130192 | \n",
+ " 0.268150 | \n",
+ " 0.133392 | \n",
+ " 0.110590 | \n",
+ " 0.051658 | \n",
+ " 0.153937 | \n",
+ " ... | \n",
+ " -0.152307 | \n",
+ " -0.008034 | \n",
+ " -0.136532 | \n",
+ " -0.115951 | \n",
+ " 0.309111 | \n",
+ " 0.033085 | \n",
+ " 0.018581 | \n",
+ " -0.027878 | \n",
+ " -0.032829 | \n",
+ " 0.119089 | \n",
+ "
\n",
+ " \n",
+ " | compactness_se | \n",
+ " -0.288713 | \n",
+ " 0.204341 | \n",
+ " 0.192029 | \n",
+ " 0.248683 | \n",
+ " 0.211126 | \n",
+ " 0.313043 | \n",
+ " 0.729164 | \n",
+ " 0.665217 | \n",
+ " 0.490719 | \n",
+ " 0.412566 | \n",
+ " ... | \n",
+ " 0.202027 | \n",
+ " 0.146121 | \n",
+ " 0.256901 | \n",
+ " 0.196258 | \n",
+ " 0.221708 | \n",
+ " 0.672119 | \n",
+ " 0.633328 | \n",
+ " 0.477739 | \n",
+ " 0.271770 | \n",
+ " 0.574867 | \n",
+ "
\n",
+ " \n",
+ " | concavity_se | \n",
+ " -0.255041 | \n",
+ " 0.194151 | \n",
+ " 0.145536 | \n",
+ " 0.228160 | \n",
+ " 0.206659 | \n",
+ " 0.249132 | \n",
+ " 0.571996 | \n",
+ " 0.688883 | \n",
+ " 0.441037 | \n",
+ " 0.337796 | \n",
+ " ... | \n",
+ " 0.187371 | \n",
+ " 0.102586 | \n",
+ " 0.227007 | \n",
+ " 0.187836 | \n",
+ " 0.172332 | \n",
+ " 0.491365 | \n",
+ " 0.662437 | \n",
+ " 0.443167 | \n",
+ " 0.199593 | \n",
+ " 0.445392 | \n",
+ "
\n",
+ " \n",
+ " | concave points_se | \n",
+ " -0.329287 | \n",
+ " 0.325042 | \n",
+ " 0.153373 | \n",
+ " 0.353151 | \n",
+ " 0.322610 | \n",
+ " 0.282948 | \n",
+ " 0.558232 | \n",
+ " 0.582869 | \n",
+ " 0.522044 | \n",
+ " 0.312608 | \n",
+ " ... | \n",
+ " 0.307288 | \n",
+ " 0.079999 | \n",
+ " 0.342227 | \n",
+ " 0.297057 | \n",
+ " 0.144157 | \n",
+ " 0.389143 | \n",
+ " 0.463453 | \n",
+ " 0.502092 | \n",
+ " 0.126508 | \n",
+ " 0.237510 | \n",
+ "
\n",
+ " \n",
+ " | symmetry_se | \n",
+ " 0.025127 | \n",
+ " -0.117235 | \n",
+ " -0.017239 | \n",
+ " -0.093748 | \n",
+ " -0.084347 | \n",
+ " 0.216286 | \n",
+ " 0.236474 | \n",
+ " 0.178725 | \n",
+ " 0.098510 | \n",
+ " 0.446116 | \n",
+ " ... | \n",
+ " -0.142188 | \n",
+ " -0.094773 | \n",
+ " -0.117920 | \n",
+ " -0.123926 | \n",
+ " -0.007047 | \n",
+ " 0.070591 | \n",
+ " 0.035520 | \n",
+ " -0.032924 | \n",
+ " 0.382952 | \n",
+ " 0.084610 | \n",
+ "
\n",
+ " \n",
+ " | fractal_dimension_se | \n",
+ " -0.091669 | \n",
+ " -0.009516 | \n",
+ " 0.079210 | \n",
+ " 0.027147 | \n",
+ " 0.002046 | \n",
+ " 0.278176 | \n",
+ " 0.496677 | \n",
+ " 0.427309 | \n",
+ " 0.260200 | \n",
+ " 0.333318 | \n",
+ " ... | \n",
+ " -0.005759 | \n",
+ " 0.008842 | \n",
+ " 0.033505 | \n",
+ " -0.000367 | \n",
+ " 0.167714 | \n",
+ " 0.381239 | \n",
+ " 0.363578 | \n",
+ " 0.203468 | \n",
+ " 0.098792 | \n",
+ " 0.522940 | \n",
+ "
\n",
+ " \n",
+ " | radius_worst | \n",
+ " -0.776453 | \n",
+ " 0.969541 | \n",
+ " 0.352578 | \n",
+ " 0.969476 | \n",
+ " 0.962745 | \n",
+ " 0.189984 | \n",
+ " 0.534939 | \n",
+ " 0.688703 | \n",
+ " 0.828401 | \n",
+ " 0.189763 | \n",
+ " ... | \n",
+ " 1.000000 | \n",
+ " 0.359925 | \n",
+ " 0.993707 | \n",
+ " 0.984014 | \n",
+ " 0.215895 | \n",
+ " 0.475348 | \n",
+ " 0.574562 | \n",
+ " 0.784946 | \n",
+ " 0.244034 | \n",
+ " 0.092952 | \n",
+ "
\n",
+ " \n",
+ " | texture_worst | \n",
+ " -0.456903 | \n",
+ " 0.297000 | \n",
+ " 0.912045 | \n",
+ " 0.303038 | \n",
+ " 0.287489 | \n",
+ " 0.022669 | \n",
+ " 0.247564 | \n",
+ " 0.298983 | \n",
+ " 0.286828 | \n",
+ " 0.084924 | \n",
+ " ... | \n",
+ " 0.359925 | \n",
+ " 1.000000 | \n",
+ " 0.365098 | \n",
+ " 0.345842 | \n",
+ " 0.225808 | \n",
+ " 0.361123 | \n",
+ " 0.367625 | \n",
+ " 0.358467 | \n",
+ " 0.234337 | \n",
+ " 0.214237 | \n",
+ "
\n",
+ " \n",
+ " | perimeter_worst | \n",
+ " -0.782914 | \n",
+ " 0.965139 | \n",
+ " 0.358040 | \n",
+ " 0.970387 | \n",
+ " 0.959120 | \n",
+ " 0.215574 | \n",
+ " 0.589944 | \n",
+ " 0.729963 | \n",
+ " 0.853642 | \n",
+ " 0.222908 | \n",
+ " ... | \n",
+ " 0.993707 | \n",
+ " 0.365098 | \n",
+ " 1.000000 | \n",
+ " 0.977578 | \n",
+ " 0.235168 | \n",
+ " 0.528876 | \n",
+ " 0.618906 | \n",
+ " 0.813826 | \n",
+ " 0.269788 | \n",
+ " 0.137973 | \n",
+ "
\n",
+ " \n",
+ " | area_worst | \n",
+ " -0.733825 | \n",
+ " 0.941087 | \n",
+ " 0.343546 | \n",
+ " 0.941550 | \n",
+ " 0.959213 | \n",
+ " 0.182905 | \n",
+ " 0.509449 | \n",
+ " 0.676656 | \n",
+ " 0.807978 | \n",
+ " 0.181731 | \n",
+ " ... | \n",
+ " 0.984014 | \n",
+ " 0.345842 | \n",
+ " 0.977578 | \n",
+ " 1.000000 | \n",
+ " 0.209064 | \n",
+ " 0.437727 | \n",
+ " 0.543774 | \n",
+ " 0.745090 | \n",
+ " 0.209443 | \n",
+ " 0.079535 | \n",
+ "
\n",
+ " \n",
+ " | smoothness_worst | \n",
+ " -0.419661 | \n",
+ " 0.119031 | \n",
+ " 0.075885 | \n",
+ " 0.149530 | \n",
+ " 0.122984 | \n",
+ " 0.777084 | \n",
+ " 0.559318 | \n",
+ " 0.446247 | \n",
+ " 0.446157 | \n",
+ " 0.424135 | \n",
+ " ... | \n",
+ " 0.215895 | \n",
+ " 0.225808 | \n",
+ " 0.235168 | \n",
+ " 0.209064 | \n",
+ " 1.000000 | \n",
+ " 0.563176 | \n",
+ " 0.513958 | \n",
+ " 0.542461 | \n",
+ " 0.486361 | \n",
+ " 0.608113 | \n",
+ "
\n",
+ " \n",
+ " | compactness_worst | \n",
+ " -0.590477 | \n",
+ " 0.413065 | \n",
+ " 0.278112 | \n",
+ " 0.455370 | \n",
+ " 0.389874 | \n",
+ " 0.461971 | \n",
+ " 0.864209 | \n",
+ " 0.755167 | \n",
+ " 0.665741 | \n",
+ " 0.474981 | \n",
+ " ... | \n",
+ " 0.475348 | \n",
+ " 0.361123 | \n",
+ " 0.528876 | \n",
+ " 0.437727 | \n",
+ " 0.563176 | \n",
+ " 1.000000 | \n",
+ " 0.892859 | \n",
+ " 0.800307 | \n",
+ " 0.614717 | \n",
+ " 0.800823 | \n",
+ "
\n",
+ " \n",
+ " | concavity_worst | \n",
+ " -0.659345 | \n",
+ " 0.527379 | \n",
+ " 0.300041 | \n",
+ " 0.564315 | \n",
+ " 0.512943 | \n",
+ " 0.420549 | \n",
+ " 0.815163 | \n",
+ " 0.883088 | \n",
+ " 0.748687 | \n",
+ " 0.432004 | \n",
+ " ... | \n",
+ " 0.574562 | \n",
+ " 0.367625 | \n",
+ " 0.618906 | \n",
+ " 0.543774 | \n",
+ " 0.513958 | \n",
+ " 0.892859 | \n",
+ " 1.000000 | \n",
+ " 0.855035 | \n",
+ " 0.530509 | \n",
+ " 0.682274 | \n",
+ "
\n",
+ " \n",
+ " | concave points_worst | \n",
+ " -0.791976 | \n",
+ " 0.741494 | \n",
+ " 0.293490 | \n",
+ " 0.768429 | \n",
+ " 0.719419 | \n",
+ " 0.483773 | \n",
+ " 0.813038 | \n",
+ " 0.859451 | \n",
+ " 0.905058 | \n",
+ " 0.428627 | \n",
+ " ... | \n",
+ " 0.784946 | \n",
+ " 0.358467 | \n",
+ " 0.813826 | \n",
+ " 0.745090 | \n",
+ " 0.542461 | \n",
+ " 0.800307 | \n",
+ " 0.855035 | \n",
+ " 1.000000 | \n",
+ " 0.502487 | \n",
+ " 0.510454 | \n",
+ "
\n",
+ " \n",
+ " | symmetry_worst | \n",
+ " -0.417123 | \n",
+ " 0.164409 | \n",
+ " 0.105824 | \n",
+ " 0.189385 | \n",
+ " 0.143762 | \n",
+ " 0.391210 | \n",
+ " 0.508330 | \n",
+ " 0.409733 | \n",
+ " 0.370446 | \n",
+ " 0.698086 | \n",
+ " ... | \n",
+ " 0.244034 | \n",
+ " 0.234337 | \n",
+ " 0.269788 | \n",
+ " 0.209443 | \n",
+ " 0.486361 | \n",
+ " 0.614717 | \n",
+ " 0.530509 | \n",
+ " 0.502487 | \n",
+ " 1.000000 | \n",
+ " 0.527177 | \n",
+ "
\n",
+ " \n",
+ " | fractal_dimension_worst | \n",
+ " -0.322046 | \n",
+ " 0.009169 | \n",
+ " 0.113425 | \n",
+ " 0.052794 | \n",
+ " 0.006854 | \n",
+ " 0.498783 | \n",
+ " 0.681343 | \n",
+ " 0.513497 | \n",
+ " 0.366058 | \n",
+ " 0.427079 | \n",
+ " ... | \n",
+ " 0.092952 | \n",
+ " 0.214237 | \n",
+ " 0.137973 | \n",
+ " 0.079535 | \n",
+ " 0.608113 | \n",
+ " 0.800823 | \n",
+ " 0.682274 | \n",
+ " 0.510454 | \n",
+ " 0.527177 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
31 rows × 31 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " diagnosis radius_mean texture_mean perimeter_mean \\\n",
+ "diagnosis 1.000000 -0.730032 -0.415185 -0.742636 \n",
+ "radius_mean -0.730032 1.000000 0.323777 0.997856 \n",
+ "texture_mean -0.415185 0.323777 1.000000 0.329533 \n",
+ "perimeter_mean -0.742636 0.997856 0.329533 1.000000 \n",
+ "area_mean -0.708984 0.987361 0.321086 0.986507 \n",
+ "smoothness_mean -0.330624 0.147104 -0.040419 0.183356 \n",
+ "compactness_mean -0.597576 0.505622 0.237019 0.556485 \n",
+ "concavity_mean -0.695750 0.677041 0.300536 0.716448 \n",
+ "concave points_mean -0.774410 0.821646 0.287618 0.849875 \n",
+ "symmetry_mean -0.333477 0.151195 0.067193 0.186306 \n",
+ "fractal_dimension_mean 0.027269 -0.290001 -0.080337 -0.245096 \n",
+ "radius_se -0.567328 0.678590 0.275973 0.691268 \n",
+ "texture_se 0.008299 -0.097230 0.386443 -0.086669 \n",
+ "perimeter_se -0.556055 0.674109 0.281590 0.693069 \n",
+ "area_se -0.548236 0.735868 0.259844 0.744983 \n",
+ "smoothness_se 0.012117 -0.167456 0.019777 -0.149510 \n",
+ "compactness_se -0.288713 0.204341 0.192029 0.248683 \n",
+ "concavity_se -0.255041 0.194151 0.145536 0.228160 \n",
+ "concave points_se -0.329287 0.325042 0.153373 0.353151 \n",
+ "symmetry_se 0.025127 -0.117235 -0.017239 -0.093748 \n",
+ "fractal_dimension_se -0.091669 -0.009516 0.079210 0.027147 \n",
+ "radius_worst -0.776453 0.969541 0.352578 0.969476 \n",
+ "texture_worst -0.456903 0.297000 0.912045 0.303038 \n",
+ "perimeter_worst -0.782914 0.965139 0.358040 0.970387 \n",
+ "area_worst -0.733825 0.941087 0.343546 0.941550 \n",
+ "smoothness_worst -0.419661 0.119031 0.075885 0.149530 \n",
+ "compactness_worst -0.590477 0.413065 0.278112 0.455370 \n",
+ "concavity_worst -0.659345 0.527379 0.300041 0.564315 \n",
+ "concave points_worst -0.791976 0.741494 0.293490 0.768429 \n",
+ "symmetry_worst -0.417123 0.164409 0.105824 0.189385 \n",
+ "fractal_dimension_worst -0.322046 0.009169 0.113425 0.052794 \n",
+ "\n",
+ " area_mean smoothness_mean compactness_mean \\\n",
+ "diagnosis -0.708984 -0.330624 -0.597576 \n",
+ "radius_mean 0.987361 0.147104 0.505622 \n",
+ "texture_mean 0.321086 -0.040419 0.237019 \n",
+ "perimeter_mean 0.986507 0.183356 0.556485 \n",
+ "area_mean 1.000000 0.151671 0.498038 \n",
+ "smoothness_mean 0.151671 1.000000 0.640292 \n",
+ "compactness_mean 0.498038 0.640292 1.000000 \n",
+ "concavity_mean 0.686308 0.501956 0.883729 \n",
+ "concave points_mean 0.822500 0.528881 0.829022 \n",
+ "symmetry_mean 0.154603 0.544531 0.602728 \n",
+ "fractal_dimension_mean -0.260436 0.556840 0.506156 \n",
+ "radius_se 0.732125 0.277600 0.497381 \n",
+ "texture_se -0.066119 0.059149 0.044875 \n",
+ "perimeter_se 0.726564 0.272183 0.548665 \n",
+ "area_se 0.800086 0.222595 0.456053 \n",
+ "smoothness_se -0.130192 0.268150 0.133392 \n",
+ "compactness_se 0.211126 0.313043 0.729164 \n",
+ "concavity_se 0.206659 0.249132 0.571996 \n",
+ "concave points_se 0.322610 0.282948 0.558232 \n",
+ "symmetry_se -0.084347 0.216286 0.236474 \n",
+ "fractal_dimension_se 0.002046 0.278176 0.496677 \n",
+ "radius_worst 0.962745 0.189984 0.534939 \n",
+ "texture_worst 0.287489 0.022669 0.247564 \n",
+ "perimeter_worst 0.959120 0.215574 0.589944 \n",
+ "area_worst 0.959213 0.182905 0.509449 \n",
+ "smoothness_worst 0.122984 0.777084 0.559318 \n",
+ "compactness_worst 0.389874 0.461971 0.864209 \n",
+ "concavity_worst 0.512943 0.420549 0.815163 \n",
+ "concave points_worst 0.719419 0.483773 0.813038 \n",
+ "symmetry_worst 0.143762 0.391210 0.508330 \n",
+ "fractal_dimension_worst 0.006854 0.498783 0.681343 \n",
+ "\n",
+ " concavity_mean concave points_mean symmetry_mean \\\n",
+ "diagnosis -0.695750 -0.774410 -0.333477 \n",
+ "radius_mean 0.677041 0.821646 0.151195 \n",
+ "texture_mean 0.300536 0.287618 0.067193 \n",
+ "perimeter_mean 0.716448 0.849875 0.186306 \n",
+ "area_mean 0.686308 0.822500 0.154603 \n",
+ "smoothness_mean 0.501956 0.528881 0.544531 \n",
+ "compactness_mean 0.883729 0.829022 0.602728 \n",
+ "concavity_mean 1.000000 0.918687 0.500739 \n",
+ "concave points_mean 0.918687 1.000000 0.460745 \n",
+ "symmetry_mean 0.500739 0.460745 1.000000 \n",
+ "fractal_dimension_mean 0.301176 0.142307 0.411393 \n",
+ "radius_se 0.631258 0.699764 0.304353 \n",
+ "texture_se 0.075893 0.025260 0.126154 \n",
+ "perimeter_se 0.659685 0.712066 0.313482 \n",
+ "area_se 0.617299 0.690642 0.226088 \n",
+ "smoothness_se 0.110590 0.051658 0.153937 \n",
+ "compactness_se 0.665217 0.490719 0.412566 \n",
+ "concavity_se 0.688883 0.441037 0.337796 \n",
+ "concave points_se 0.582869 0.522044 0.312608 \n",
+ "symmetry_se 0.178725 0.098510 0.446116 \n",
+ "fractal_dimension_se 0.427309 0.260200 0.333318 \n",
+ "radius_worst 0.688703 0.828401 0.189763 \n",
+ "texture_worst 0.298983 0.286828 0.084924 \n",
+ "perimeter_worst 0.729963 0.853642 0.222908 \n",
+ "area_worst 0.676656 0.807978 0.181731 \n",
+ "smoothness_worst 0.446247 0.446157 0.424135 \n",
+ "compactness_worst 0.755167 0.665741 0.474981 \n",
+ "concavity_worst 0.883088 0.748687 0.432004 \n",
+ "concave points_worst 0.859451 0.905058 0.428627 \n",
+ "symmetry_worst 0.409733 0.370446 0.698086 \n",
+ "fractal_dimension_worst 0.513497 0.366058 0.427079 \n",
+ "\n",
+ " ... radius_worst texture_worst perimeter_worst \\\n",
+ "diagnosis ... -0.776453 -0.456903 -0.782914 \n",
+ "radius_mean ... 0.969541 0.297000 0.965139 \n",
+ "texture_mean ... 0.352578 0.912045 0.358040 \n",
+ "perimeter_mean ... 0.969476 0.303038 0.970387 \n",
+ "area_mean ... 0.962745 0.287489 0.959120 \n",
+ "smoothness_mean ... 0.189984 0.022669 0.215574 \n",
+ "compactness_mean ... 0.534939 0.247564 0.589944 \n",
+ "concavity_mean ... 0.688703 0.298983 0.729963 \n",
+ "concave points_mean ... 0.828401 0.286828 0.853642 \n",
+ "symmetry_mean ... 0.189763 0.084924 0.222908 \n",
+ "fractal_dimension_mean ... -0.242125 -0.051211 -0.199454 \n",
+ "radius_se ... 0.714872 0.195092 0.719492 \n",
+ "texture_se ... -0.111592 0.409071 -0.102161 \n",
+ "perimeter_se ... 0.697133 0.200270 0.720966 \n",
+ "area_se ... 0.757372 0.196496 0.761213 \n",
+ "smoothness_se ... -0.152307 -0.008034 -0.136532 \n",
+ "compactness_se ... 0.202027 0.146121 0.256901 \n",
+ "concavity_se ... 0.187371 0.102586 0.227007 \n",
+ "concave points_se ... 0.307288 0.079999 0.342227 \n",
+ "symmetry_se ... -0.142188 -0.094773 -0.117920 \n",
+ "fractal_dimension_se ... -0.005759 0.008842 0.033505 \n",
+ "radius_worst ... 1.000000 0.359925 0.993707 \n",
+ "texture_worst ... 0.359925 1.000000 0.365098 \n",
+ "perimeter_worst ... 0.993707 0.365098 1.000000 \n",
+ "area_worst ... 0.984014 0.345842 0.977578 \n",
+ "smoothness_worst ... 0.215895 0.225808 0.235168 \n",
+ "compactness_worst ... 0.475348 0.361123 0.528876 \n",
+ "concavity_worst ... 0.574562 0.367625 0.618906 \n",
+ "concave points_worst ... 0.784946 0.358467 0.813826 \n",
+ "symmetry_worst ... 0.244034 0.234337 0.269788 \n",
+ "fractal_dimension_worst ... 0.092952 0.214237 0.137973 \n",
+ "\n",
+ " area_worst smoothness_worst compactness_worst \\\n",
+ "diagnosis -0.733825 -0.419661 -0.590477 \n",
+ "radius_mean 0.941087 0.119031 0.413065 \n",
+ "texture_mean 0.343546 0.075885 0.278112 \n",
+ "perimeter_mean 0.941550 0.149530 0.455370 \n",
+ "area_mean 0.959213 0.122984 0.389874 \n",
+ "smoothness_mean 0.182905 0.777084 0.461971 \n",
+ "compactness_mean 0.509449 0.559318 0.864209 \n",
+ "concavity_mean 0.676656 0.446247 0.755167 \n",
+ "concave points_mean 0.807978 0.446157 0.665741 \n",
+ "symmetry_mean 0.181731 0.424135 0.474981 \n",
+ "fractal_dimension_mean -0.218566 0.451897 0.410627 \n",
+ "radius_se 0.751431 0.140187 0.286572 \n",
+ "texture_se -0.083078 -0.073042 -0.092846 \n",
+ "perimeter_se 0.730647 0.125791 0.340951 \n",
+ "area_se 0.811408 0.124392 0.282682 \n",
+ "smoothness_se -0.115951 0.309111 0.033085 \n",
+ "compactness_se 0.196258 0.221708 0.672119 \n",
+ "concavity_se 0.187836 0.172332 0.491365 \n",
+ "concave points_se 0.297057 0.144157 0.389143 \n",
+ "symmetry_se -0.123926 -0.007047 0.070591 \n",
+ "fractal_dimension_se -0.000367 0.167714 0.381239 \n",
+ "radius_worst 0.984014 0.215895 0.475348 \n",
+ "texture_worst 0.345842 0.225808 0.361123 \n",
+ "perimeter_worst 0.977578 0.235168 0.528876 \n",
+ "area_worst 1.000000 0.209064 0.437727 \n",
+ "smoothness_worst 0.209064 1.000000 0.563176 \n",
+ "compactness_worst 0.437727 0.563176 1.000000 \n",
+ "concavity_worst 0.543774 0.513958 0.892859 \n",
+ "concave points_worst 0.745090 0.542461 0.800307 \n",
+ "symmetry_worst 0.209443 0.486361 0.614717 \n",
+ "fractal_dimension_worst 0.079535 0.608113 0.800823 \n",
+ "\n",
+ " concavity_worst concave points_worst \\\n",
+ "diagnosis -0.659345 -0.791976 \n",
+ "radius_mean 0.527379 0.741494 \n",
+ "texture_mean 0.300041 0.293490 \n",
+ "perimeter_mean 0.564315 0.768429 \n",
+ "area_mean 0.512943 0.719419 \n",
+ "smoothness_mean 0.420549 0.483773 \n",
+ "compactness_mean 0.815163 0.813038 \n",
+ "concavity_mean 0.883088 0.859451 \n",
+ "concave points_mean 0.748687 0.905058 \n",
+ "symmetry_mean 0.432004 0.428627 \n",
+ "fractal_dimension_mean 0.305228 0.148275 \n",
+ "radius_se 0.380110 0.528842 \n",
+ "texture_se -0.070533 -0.120938 \n",
+ "perimeter_se 0.418269 0.552873 \n",
+ "area_se 0.384820 0.535417 \n",
+ "smoothness_se 0.018581 -0.027878 \n",
+ "compactness_se 0.633328 0.477739 \n",
+ "concavity_se 0.662437 0.443167 \n",
+ "concave points_se 0.463453 0.502092 \n",
+ "symmetry_se 0.035520 -0.032924 \n",
+ "fractal_dimension_se 0.363578 0.203468 \n",
+ "radius_worst 0.574562 0.784946 \n",
+ "texture_worst 0.367625 0.358467 \n",
+ "perimeter_worst 0.618906 0.813826 \n",
+ "area_worst 0.543774 0.745090 \n",
+ "smoothness_worst 0.513958 0.542461 \n",
+ "compactness_worst 0.892859 0.800307 \n",
+ "concavity_worst 1.000000 0.855035 \n",
+ "concave points_worst 0.855035 1.000000 \n",
+ "symmetry_worst 0.530509 0.502487 \n",
+ "fractal_dimension_worst 0.682274 0.510454 \n",
+ "\n",
+ " symmetry_worst fractal_dimension_worst \n",
+ "diagnosis -0.417123 -0.322046 \n",
+ "radius_mean 0.164409 0.009169 \n",
+ "texture_mean 0.105824 0.113425 \n",
+ "perimeter_mean 0.189385 0.052794 \n",
+ "area_mean 0.143762 0.006854 \n",
+ "smoothness_mean 0.391210 0.498783 \n",
+ "compactness_mean 0.508330 0.681343 \n",
+ "concavity_mean 0.409733 0.513497 \n",
+ "concave points_mean 0.370446 0.366058 \n",
+ "symmetry_mean 0.698086 0.427079 \n",
+ "fractal_dimension_mean 0.283061 0.701592 \n",
+ "radius_se 0.094549 0.049351 \n",
+ "texture_se -0.127607 -0.044761 \n",
+ "perimeter_se 0.108877 0.083512 \n",
+ "area_se 0.072572 0.016228 \n",
+ "smoothness_se -0.032829 0.119089 \n",
+ "compactness_se 0.271770 0.574867 \n",
+ "concavity_se 0.199593 0.445392 \n",
+ "concave points_se 0.126508 0.237510 \n",
+ "symmetry_se 0.382952 0.084610 \n",
+ "fractal_dimension_se 0.098792 0.522940 \n",
+ "radius_worst 0.244034 0.092952 \n",
+ "texture_worst 0.234337 0.214237 \n",
+ "perimeter_worst 0.269788 0.137973 \n",
+ "area_worst 0.209443 0.079535 \n",
+ "smoothness_worst 0.486361 0.608113 \n",
+ "compactness_worst 0.614717 0.800823 \n",
+ "concavity_worst 0.530509 0.682274 \n",
+ "concave points_worst 0.502487 0.510454 \n",
+ "symmetry_worst 1.000000 0.527177 \n",
+ "fractal_dimension_worst 0.527177 1.000000 \n",
+ "\n",
+ "[31 rows x 31 columns]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.corr()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ee2f0278",
+ "metadata": {},
+ "source": [
+ "# Model Loading\n",
+ "In this classification, we used scikitlearn's algorithms for predicting the labels of M and B (now 0's and 1's).\n",
+ " \n",
+ "Having multiple models on their libary, we can run many models and compare it later."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "4b48f208",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "d7b5908a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.svm import SVC\n",
+ "from sklearn.neighbors import KNeighborsClassifier\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.tree import DecisionTreeClassifier\n",
+ "from sklearn.naive_bayes import GaussianNB\n",
+ "from sklearn.ensemble import AdaBoostClassifier\n",
+ "from sklearn.ensemble import GradientBoostingClassifier"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3d50df8d",
+ "metadata": {},
+ "source": [
+ "Splitting the dataset into Training and Test Sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "e32ec855",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Split the data into features (X) and labels (y)\n",
+ "X = df.drop(columns=['diagnosis'])\n",
+ "y = df['diagnosis']\n",
+ "\n",
+ "# Split the data into training and testing sets\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
+ "\n",
+ "# Define a dictionary to store results\n",
+ "results = {'Model': [], 'F1_score': [], 'Accuracy': [], 'Precision': [], 'Recall': []}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "28ebe0cf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "models = {\n",
+ " 'Random Forest': RandomForestClassifier(),\n",
+ " 'Support Vector Machine': SVC(),\n",
+ " 'K-Nearest Neighbors': KNeighborsClassifier(),\n",
+ " 'Logistic Regression': LogisticRegression(),\n",
+ " 'Decision Tree': DecisionTreeClassifier(),\n",
+ " 'Naive Bayes': GaussianNB(),\n",
+ " 'AdaBoost': AdaBoostClassifier(),\n",
+ " 'Gradient Boosting': GradientBoostingClassifier()\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "17781adc",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\sang.yogi\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
+ "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+ "\n",
+ "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
+ " https://scikit-learn.org/stable/modules/preprocessing.html\n",
+ "Please also refer to the documentation for alternative solver options:\n",
+ " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
+ " n_iter_i = _check_optimize_result(\n"
+ ]
+ }
+ ],
+ "source": [
+ "for model_name, model in models.items():\n",
+ " # Train the model\n",
+ " model.fit(X_train, y_train)\n",
+ "\n",
+ " # Make predictions\n",
+ " y_pred = model.predict(X_test)\n",
+ "\n",
+ " # Evaluate the model\n",
+ " f1 = f1_score(y_test, y_pred)\n",
+ " accuracy = accuracy_score(y_test, y_pred)\n",
+ " precision = precision_score(y_test, y_pred)\n",
+ " recall = recall_score(y_test, y_pred)\n",
+ "\n",
+ " # Store results in the dictionary\n",
+ " results['Model'].append(model_name)\n",
+ " results['F1_score'].append(f1)\n",
+ " results['Accuracy'].append(accuracy)\n",
+ " results['Precision'].append(precision)\n",
+ " results['Recall'].append(recall)\n",
+ "\n",
+ "# Create a DataFrame from the results dictionary\n",
+ "results_df = pd.DataFrame(results)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "c98e51c8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Model | \n",
+ " F1_score | \n",
+ " Accuracy | \n",
+ " Precision | \n",
+ " Recall | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Random Forest | \n",
+ " 0.972222 | \n",
+ " 0.964912 | \n",
+ " 0.958904 | \n",
+ " 0.985915 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Support Vector Machine | \n",
+ " 0.959459 | \n",
+ " 0.947368 | \n",
+ " 0.922078 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " K-Nearest Neighbors | \n",
+ " 0.965986 | \n",
+ " 0.956140 | \n",
+ " 0.934211 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Random Forest | \n",
+ " 0.972222 | \n",
+ " 0.964912 | \n",
+ " 0.958904 | \n",
+ " 0.985915 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Support Vector Machine | \n",
+ " 0.959459 | \n",
+ " 0.947368 | \n",
+ " 0.922078 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " K-Nearest Neighbors | \n",
+ " 0.965986 | \n",
+ " 0.956140 | \n",
+ " 0.934211 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " Logistic Regression | \n",
+ " 0.972222 | \n",
+ " 0.964912 | \n",
+ " 0.958904 | \n",
+ " 0.985915 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " Decision Tree | \n",
+ " 0.951049 | \n",
+ " 0.938596 | \n",
+ " 0.944444 | \n",
+ " 0.957746 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " Naive Bayes | \n",
+ " 0.979310 | \n",
+ " 0.973684 | \n",
+ " 0.959459 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " AdaBoost | \n",
+ " 0.957746 | \n",
+ " 0.947368 | \n",
+ " 0.957746 | \n",
+ " 0.957746 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " Gradient Boosting | \n",
+ " 0.972222 | \n",
+ " 0.964912 | \n",
+ " 0.958904 | \n",
+ " 0.985915 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Model F1_score Accuracy Precision Recall\n",
+ "0 Random Forest 0.972222 0.964912 0.958904 0.985915\n",
+ "1 Support Vector Machine 0.959459 0.947368 0.922078 1.000000\n",
+ "2 K-Nearest Neighbors 0.965986 0.956140 0.934211 1.000000\n",
+ "3 Random Forest 0.972222 0.964912 0.958904 0.985915\n",
+ "4 Support Vector Machine 0.959459 0.947368 0.922078 1.000000\n",
+ "5 K-Nearest Neighbors 0.965986 0.956140 0.934211 1.000000\n",
+ "6 Logistic Regression 0.972222 0.964912 0.958904 0.985915\n",
+ "7 Decision Tree 0.951049 0.938596 0.944444 0.957746\n",
+ "8 Naive Bayes 0.979310 0.973684 0.959459 1.000000\n",
+ "9 AdaBoost 0.957746 0.947368 0.957746 0.957746\n",
+ "10 Gradient Boosting 0.972222 0.964912 0.958904 0.985915"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "results_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "212e9b94",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}