{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "35edb7d6", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "id": "7c963881", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Role SatisfactionSkill UtilizationCareer Growth OpportunitySupervisor SupportWork-Life BalanceRecognition & AppreciationCompany CultureTraining & DevelopmentCommunication EffectivenessDiversity & InclusionWork EnvironmentCompensationStaff_IdMonth_Of_ServiceYears_Of_ServiceResidenceResidence_CodeNet_SalaryResigned
0345223332344SA6317110Depok455822180
1231243432224SP10211433Jakarta192134430
2332225443245SA79627100Bekasi358364550
3334431444535SA02310171Depok460354660
4324332342323SA98565171Jakarta155681010
\n", "
" ], "text/plain": [ " Role Satisfaction Skill Utilization Career Growth Opportunity \\\n", "0 3 4 5 \n", "1 2 3 1 \n", "2 3 3 2 \n", "3 3 3 4 \n", "4 3 2 4 \n", "\n", " Supervisor Support Work-Life Balance Recognition & Appreciation \\\n", "0 2 2 3 \n", "1 2 4 3 \n", "2 2 2 5 \n", "3 4 3 1 \n", "4 3 3 2 \n", "\n", " Company Culture Training & Development Communication Effectiveness \\\n", "0 3 3 2 \n", "1 4 3 2 \n", "2 4 4 3 \n", "3 4 4 4 \n", "4 3 4 2 \n", "\n", " Diversity & Inclusion Work Environment Compensation Staff_Id \\\n", "0 3 4 4 SA63171 \n", "1 2 2 4 SP10211 \n", "2 2 4 5 SA79627 \n", "3 5 3 5 SA02310 \n", "4 3 2 3 SA98565 \n", "\n", " Month_Of_Service Years_Of_Service Residence Residence_Code Net_Salary \\\n", "0 1 0 Depok 4 5582218 \n", "1 43 3 Jakarta 1 9213443 \n", "2 10 0 Bekasi 3 5836455 \n", "3 17 1 Depok 4 6035466 \n", "4 17 1 Jakarta 1 5568101 \n", "\n", " Resigned \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 0 \n", "4 0 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Grab Data\n", "df = pd.read_excel(\"https://raw.githubusercontent.com/youronlydimwit/Data_ScienceUse_Cases/main/Classification/Data/HRD_Survey_50.xlsx\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 3, "id": "fcbe70d6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Role SatisfactionSkill UtilizationCareer Growth OpportunitySupervisor SupportWork-Life BalanceRecognition & AppreciationCompany CultureTraining & DevelopmentCommunication EffectivenessDiversity & InclusionWork EnvironmentCompensationMonth_Of_ServiceYears_Of_ServiceResidence_CodeNet_SalaryResigned
034522333234410455822180
1231243432224433192134430
2332225443245100358364550
3334431444535171460354660
4324332342323171155681010
\n", "
" ], "text/plain": [ " Role Satisfaction Skill Utilization Career Growth Opportunity \\\n", "0 3 4 5 \n", "1 2 3 1 \n", "2 3 3 2 \n", "3 3 3 4 \n", "4 3 2 4 \n", "\n", " Supervisor Support Work-Life Balance Recognition & Appreciation \\\n", "0 2 2 3 \n", "1 2 4 3 \n", "2 2 2 5 \n", "3 4 3 1 \n", "4 3 3 2 \n", "\n", " Company Culture Training & Development Communication Effectiveness \\\n", "0 3 3 2 \n", "1 4 3 2 \n", "2 4 4 3 \n", "3 4 4 4 \n", "4 3 4 2 \n", "\n", " Diversity & Inclusion Work Environment Compensation Month_Of_Service \\\n", "0 3 4 4 1 \n", "1 2 2 4 43 \n", "2 2 4 5 10 \n", "3 5 3 5 17 \n", "4 3 2 3 17 \n", "\n", " Years_Of_Service Residence_Code Net_Salary Resigned \n", "0 0 4 5582218 0 \n", "1 3 1 9213443 0 \n", "2 0 3 5836455 0 \n", "3 1 4 6035466 0 \n", "4 1 1 5568101 0 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Making a copy of df, but with only numerical information\n", "# Removing unnecessary columns\n", "pred_df = df.drop(columns=['Staff_Id','Residence'])\n", "pred_df.head()" ] }, { "cell_type": "code", "execution_count": 4, "id": "10a4fe36", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Role Satisfaction int64\n", "Skill Utilization int64\n", "Career Growth Opportunity int64\n", "Supervisor Support int64\n", "Work-Life Balance int64\n", "Recognition & Appreciation int64\n", "Company Culture int64\n", "Training & Development int64\n", "Communication Effectiveness int64\n", "Diversity & Inclusion int64\n", "Work Environment int64\n", "Compensation int64\n", "Month_Of_Service int64\n", "Residence_Code int64\n", "Net_Salary int64\n", "Resigned int64\n", "dtype: object" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pred_df = pred_df.drop(columns=[\"Years_Of_Service\"])\n", "pred_df.dtypes" ] }, { "cell_type": "code", "execution_count": 5, "id": "a4bf1dae", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score" ] }, { "cell_type": "code", "execution_count": 6, "id": "2dd56994", "metadata": {}, "outputs": [], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.tree import DecisionTreeClassifier" ] }, { "cell_type": "code", "execution_count": 7, "id": "b7c40c6f", "metadata": {}, "outputs": [], "source": [ "# Split the data into features (X) and labels (y)\n", "X = pred_df.drop(columns=['Resigned'])\n", "y = pred_df['Resigned']\n", "\n", "# Split the data into training and testing sets\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", "# Define a dictionary to store results\n", "results = {'Model': [], 'F1_score': [], 'Accuracy': [], 'Precision': [], 'Recall': []}" ] }, { "cell_type": "markdown", "id": "319665bc", "metadata": {}, "source": [ "# Norm" ] }, { "cell_type": "code", "execution_count": 8, "id": "e162bda0", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Asus\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n", "C:\\Users\\Asus\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n" ] } ], "source": [ "# Train Random Forest model\n", "rf_model = RandomForestClassifier(random_state=42)\n", "rf_model.fit(X_train, y_train)\n", "\n", "# Train Decision Tree model\n", "dt_model = DecisionTreeClassifier(random_state=42)\n", "dt_model.fit(X_train, y_train)\n", "\n", "# Train Logistic Regression model\n", "lr_model = LogisticRegression(random_state=42)\n", "lr_model.fit(X_train, y_train)\n", "\n", "# Evaluate models\n", "models = {\"Random Forest\": rf_model, \"Decision Tree\": dt_model, \"Logistic Regression\": lr_model}\n", "metrics = {\"Accuracy\": accuracy_score, \"Precision\": precision_score, \"Recall\": recall_score, \"F1 Score\": f1_score}\n", "results = {}\n", "\n", "for name, model in models.items():\n", " y_pred = model.predict(X_test)\n", " result = {}\n", " for metric_name, metric_func in metrics.items():\n", " result[metric_name] = metric_func(y_test, y_pred)\n", " results[name] = result\n", "\n", "# Convert results to DataFrame for easier plotting\n", "results_df = pd.DataFrame(results)" ] }, { "cell_type": "code", "execution_count": 9, "id": "94ebdccc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Random ForestDecision TreeLogistic Regression
Accuracy0.870.8200000.87
Precision0.000.2727270.00
Recall0.000.2307690.00
F1 Score0.000.2500000.00
\n", "
" ], "text/plain": [ " Random Forest Decision Tree Logistic Regression\n", "Accuracy 0.87 0.820000 0.87\n", "Precision 0.00 0.272727 0.00\n", "Recall 0.00 0.230769 0.00\n", "F1 Score 0.00 0.250000 0.00" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results_df" ] }, { "cell_type": "code", "execution_count": 10, "id": "e95e9b4b", "metadata": {}, "outputs": [], "source": [ "from imblearn.over_sampling import SMOTE" ] }, { "cell_type": "markdown", "id": "96ab491e", "metadata": {}, "source": [ "# SMOTE" ] }, { "cell_type": "code", "execution_count": 11, "id": "4bed2a76", "metadata": {}, "outputs": [], "source": [ "# Apply SMOTE to oversample the minority class in the training data\n", "smote = SMOTE(random_state=42)\n", "X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 12, "id": "c24a2a88", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Random ForestDecision TreeLogistic Regression
Accuracy0.830.7200000.130000
Precision0.000.1052630.130000
Recall0.000.1538461.000000
F1 Score0.000.1250000.230088
\n", "
" ], "text/plain": [ " Random Forest Decision Tree Logistic Regression\n", "Accuracy 0.83 0.720000 0.130000\n", "Precision 0.00 0.105263 0.130000\n", "Recall 0.00 0.153846 1.000000\n", "F1 Score 0.00 0.125000 0.230088" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train Random Forest model\n", "rf_model = RandomForestClassifier(random_state=42)\n", "rf_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Train Decision Tree model\n", "dt_model = DecisionTreeClassifier(random_state=42)\n", "dt_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Train Logistic Regression model\n", "lr_model = LogisticRegression(random_state=42)\n", "lr_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Evaluate models\n", "models = {\"Random Forest\": rf_model, \"Decision Tree\": dt_model, \"Logistic Regression\": lr_model}\n", "metrics = {\"Accuracy\": accuracy_score, \"Precision\": precision_score, \"Recall\": recall_score, \"F1 Score\": f1_score}\n", "results = {}\n", "\n", "for name, model in models.items():\n", " y_pred = model.predict(X_test)\n", " result = {}\n", " for metric_name, metric_func in metrics.items():\n", " result[metric_name] = metric_func(y_test, y_pred)\n", " results[name] = result\n", "\n", "# Convert results to DataFrame for easier plotting\n", "results_df_resampled = pd.DataFrame(results)\n", "\n", "results_df_resampled" ] }, { "cell_type": "markdown", "id": "45d27935", "metadata": {}, "source": [ "# OVERSAMPLER" ] }, { "cell_type": "code", "execution_count": 13, "id": "33d278e9", "metadata": {}, "outputs": [], "source": [ "from imblearn.over_sampling import RandomOverSampler" ] }, { "cell_type": "code", "execution_count": 14, "id": "c4b2b38c", "metadata": {}, "outputs": [], "source": [ "# Apply Random Oversampling to balance the training data\n", "oversampler = RandomOverSampler(random_state=42)\n", "X_train_resampled, y_train_resampled = oversampler.fit_resample(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 15, "id": "6e730981", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Asus\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Random ForestDecision TreeLogistic Regression
Accuracy0.870.780.130000
Precision0.000.000.130000
Recall0.000.001.000000
F1 Score0.000.000.230088
\n", "
" ], "text/plain": [ " Random Forest Decision Tree Logistic Regression\n", "Accuracy 0.87 0.78 0.130000\n", "Precision 0.00 0.00 0.130000\n", "Recall 0.00 0.00 1.000000\n", "F1 Score 0.00 0.00 0.230088" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train Random Forest model\n", "rf_model = RandomForestClassifier(random_state=42)\n", "rf_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Train Decision Tree model\n", "dt_model = DecisionTreeClassifier(random_state=42)\n", "dt_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Train Logistic Regression model\n", "lr_model = LogisticRegression(random_state=42)\n", "lr_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Evaluate models\n", "models = {\"Random Forest\": rf_model, \"Decision Tree\": dt_model, \"Logistic Regression\": lr_model}\n", "metrics = {\"Accuracy\": accuracy_score, \"Precision\": precision_score, \"Recall\": recall_score, \"F1 Score\": f1_score}\n", "results = {}\n", "\n", "for name, model in models.items():\n", " y_pred = model.predict(X_test)\n", " result = {}\n", " for metric_name, metric_func in metrics.items():\n", " result[metric_name] = metric_func(y_test, y_pred)\n", " results[name] = result\n", "\n", "# Convert results to DataFrame for easier plotting\n", "results_df_resampled = pd.DataFrame(results)\n", "\n", "results_df_resampled" ] }, { "cell_type": "markdown", "id": "bbe4b256", "metadata": {}, "source": [ "# CLASS WEIGHTS - NORM" ] }, { "cell_type": "code", "execution_count": 17, "id": "a1a034ad", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Asus\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n", "C:\\Users\\Asus\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Random ForestDecision TreeLogistic Regression
Accuracy0.870.8000000.87
Precision0.000.1111110.00
Recall0.000.0769230.00
F1 Score0.000.0909090.00
\n", "
" ], "text/plain": [ " Random Forest Decision Tree Logistic Regression\n", "Accuracy 0.87 0.800000 0.87\n", "Precision 0.00 0.111111 0.00\n", "Recall 0.00 0.076923 0.00\n", "F1 Score 0.00 0.090909 0.00" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Define class weights\n", "class_weights = {0: 1, 1: 9}\n", "\n", "# Train Random Forest model\n", "rf_model = RandomForestClassifier(random_state=42, class_weight=class_weights)\n", "rf_model.fit(X_train, y_train)\n", "\n", "# Train Decision Tree model\n", "dt_model = DecisionTreeClassifier(random_state=42, class_weight=class_weights)\n", "dt_model.fit(X_train, y_train)\n", "\n", "# Train Logistic Regression model\n", "lr_model = LogisticRegression(random_state=42, class_weight=class_weights)\n", "lr_model.fit(X_train, y_train)\n", "\n", "# Evaluate models\n", "models = {\"Random Forest\": rf_model, \"Decision Tree\": dt_model, \"Logistic Regression\": lr_model}\n", "metrics = {\"Accuracy\": accuracy_score, \"Precision\": precision_score, \"Recall\": recall_score, \"F1 Score\": f1_score}\n", "results = {}\n", "\n", "for name, model in models.items():\n", " y_pred = model.predict(X_test)\n", " result = {}\n", " for metric_name, metric_func in metrics.items():\n", " result[metric_name] = metric_func(y_test, y_pred)\n", " results[name] = result\n", "\n", "# Convert results to DataFrame for easier plotting\n", "results_df_resampled = pd.DataFrame(results)\n", "\n", "results_df_resampled" ] }, { "cell_type": "markdown", "id": "00cb6be9", "metadata": {}, "source": [ "# UNDERSAMPLER" ] }, { "cell_type": "code", "execution_count": 20, "id": "06a4d1c7", "metadata": {}, "outputs": [], "source": [ "from imblearn.under_sampling import RandomUnderSampler" ] }, { "cell_type": "code", "execution_count": 21, "id": "dcb0db52", "metadata": {}, "outputs": [], "source": [ "# Apply Random Oversampling to balance the training data\n", "undersampler = RandomUnderSampler(random_state=42)\n", "X_train_resampled, y_train_resampled = undersampler.fit_resample(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 22, "id": "a939e8fe", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Random ForestDecision TreeLogistic Regression
Accuracy0.5600000.4900000.130000
Precision0.1555560.1833330.130000
Recall0.5384620.8461541.000000
F1 Score0.2413790.3013700.230088
\n", "
" ], "text/plain": [ " Random Forest Decision Tree Logistic Regression\n", "Accuracy 0.560000 0.490000 0.130000\n", "Precision 0.155556 0.183333 0.130000\n", "Recall 0.538462 0.846154 1.000000\n", "F1 Score 0.241379 0.301370 0.230088" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train Random Forest model\n", "rf_model = RandomForestClassifier(random_state=42)\n", "rf_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Train Decision Tree model\n", "dt_model = DecisionTreeClassifier(random_state=42)\n", "dt_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Train Logistic Regression model\n", "lr_model = LogisticRegression(random_state=42)\n", "lr_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Evaluate models\n", "models = {\"Random Forest\": rf_model, \"Decision Tree\": dt_model, \"Logistic Regression\": lr_model}\n", "metrics = {\"Accuracy\": accuracy_score, \"Precision\": precision_score, \"Recall\": recall_score, \"F1 Score\": f1_score}\n", "results = {}\n", "\n", "for name, model in models.items():\n", " y_pred = model.predict(X_test)\n", " result = {}\n", " for metric_name, metric_func in metrics.items():\n", " result[metric_name] = metric_func(y_test, y_pred)\n", " results[name] = result\n", "\n", "# Convert results to DataFrame for easier plotting\n", "results_df_resampled = pd.DataFrame(results)\n", "\n", "results_df_resampled" ] }, { "cell_type": "code", "execution_count": 31, "id": "3269fce1", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAsgAAAGoCAYAAABbtxOxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAhx0lEQVR4nO3df7hld10f+veHJPJD0MCTIQ35DQ1q4JGIQ2qrVhApgRYj9UFiFaMXjT6NvaLUa0JBw62pPr0CvWrRhkITghJjEYgK1RDFiJcSJzRAEuCSkpgMk5sMIEIiBhI+94+9pnyZnJnZJzl7n5mzX6/nOc/Za+211v6cM/t81nvW+q69qrsDAADMPGSzCwAAgIOJgAwAAAMBGQAABgIyAAAMBGQAABgIyAAAMBCQD0FV9ZtV9YoN2tYJVXVXVR02Tb+7qn50I7Y9be+dVXX2Rm1vHa/7i1X1yar6/5b92vtTVTdU1dM3u45lq6oLqupNy14XVoF9wlyva5/Auhy+2QXwlarqliRHJ7k3yX1JbkzyxiQXdfeXkqS7f2Id2/rR7n7Xvpbp7luTPPLBVf2/Xu+CJH+/u39w2P5zNmLb66zj+CQvTXJid9+5Qds8KcnNSY7o7nsf6Ha6+0kbUQ+wGuwTNqQO+wTWzRHkg9PzuvtRSU5M8stJfi7J6zf6Rapqq/4H6cQkn9qoRjivzfh9buF/Q+DL7BMenJXZJ7BxBOSDWHf/TXdfkeSFSc6uqicnSVVdXFW/OD0+qqr+oKo+U1Wfrqo/r6qHVNWlSU5I8vvT6bL/o6pOqqquqhdX1a1J/mSYN/4hP6Gqrqmqv6mqt1fVY6bXenpV7RxrrKpbquq7quqMJC9L8sLp9T4wPf+/Ts9Ndb28qv6qqu6sqjdW1ddOz+2p4+yqunU6FfZv9vW7qaqvndbfPW3v5dP2vyvJlUkeN9Vx8RrrXl9Vzxumj5he77T9/HNcPX3/zLTdf1hVP1xVf1FVr6mqTye5oKqeUFV/UlWfmrb5W1V15N6/r+nxBVV1+fRzfG461bZ9PzWM2/i5qvpgkrur6vCq+paq+n+m98EHxlN2U50fn17j5qr6gWn+PLX+bFV9sKrurqrXV9XRNTtF+rmqeldVPXpads+/3zlVtauqbq+ql+7nZ9hfvSdX1Z9Nr3FlkqMO9DuBVWCfYJ/A8gjIh4DuvibJziTfvsbTL52e25bZabiXzVbpFyW5NbMjD4/s7n8/rPMdSb4hybP38ZI/lOR/S/K4zE7r/eocNf63JP8uye9Mr/eUNRb74enrGUken9lpvF/fa5lvS/J1SZ6Z5Oer6hv28ZK/luRrp+18x1Tzj0ynDp+TZNdUxw+vse4bk/zgMP3cJLd393X7+RH/8fT9yGm7752m/0GSjyd5bJILk1SSX8rsd/cNSY5PcsF+tvvdSS5LcmSSK3L/38e+fH+Sfzqtd3SSP0zyi0kek+RfJ3lLVW2rqq/O7N/vOdMRqH+UZM/POU+t35vkWUmemOR5Sd6Z2XvsqMz6x/++1/LPSHJKkn+S5Lw9jX9UVcfuq95pkd9Ocu30Gv82ydLHK8LBzD5hTau+T2CDCciHjl2ZhYm9fTHJMZmNrfpid/95d/cBtnVBd9/d3Z/fx/OXdvf13X13klck+b6aLth4kH4gyau7++PdfVeS85OctdeRild29+e7+wNJPpDkfk11quWFSc7v7s919y1JXpXkRXPW8aYkz62qr5mmX5Tk0gf0E82a7q91971T3Td195XdfU93707y6sya9b68p7vf0d33TTWstRNZy692923Tv+EPJnnHtJ0vdfeVSXZk1uST5EtJnlxVD+/u27v7hiSZs9Zf6+47uvsTSf48yfu6+3909z1J3prkm/Za/pXTe+tDSf5LZkF+b/ust6pOSPK0JK+Y6ro6ye/P+TuBVWKfMLFPYBEE5EPHsUk+vcb8/yvJTUn+uGan0c+bY1u3reP5v0pyRDbmNPfjpu2N2z48s6Mce4xXGP9t1r5Y5KgkX7XGto6dp4ju3pXkL5J873Sq6zlJfmueddfwFb/LqnpsVV1WVZ+oqs9m1nj397vb++d9WM03bm183ROTvGA6pfqZqvpMZkddjpl2aC9M8hNJbq+qP6yqr19HrXcMjz+/xvTe/z57v3cet0bt+6x3Wv6vp7rH7QBfyT7hy+wT2HAC8iGgqp6W2R/6e/Z+bvrf8ku7+/GZnQL/map65p6n97HJAx1NOH54fEJmRyQ+meTuJI8Y6joss9N48253V2bhaNz2vfnK0DWPT0417b2tT6xjG5dkdiTzBUneOx0h3Z95f5e/NM37xu7+muk1ah11zWt83dsyO8Jz5PD11d39y0nS3X/U3c/KLIB+JMnrFljr3u+dXWsss796b0/y6GloyLgdYGKfcD/2CWw4AfkgVlVfU1X/LLPxSG+aTlvvvcw/q6q/X1WV5LOZfQzQfdPTd2Q2Hmu9frCqTq2qRyT5P5P81+l0z/+b2f9m/2lVHZHk5UkeOqx3R5KTqmpf76s3J/npml2E9ch8eXzauj4iZ6rl8iQXVtWjqurEJD+T2f/M5/W2JE9N8lOZjT87kN2ZDVU40O/zUUnuyuzCjWOT/Ow6anqg3pTkeVX17Ko6rKoeVrOLZ46r2UV13z0Fznum2va8PxZR6yuq6hFV9aQkP5Lkd9ZTb3f/VWbDLV5ZVV9VVd+W2U4eVp59wtrsE1gEAfng9PtV9bnMjrT9m8zGLP3IPpY9Jcm7MvsDfG+S13b3u6fnfinJy6fT2P96Ha9/aZKLMzvV87BMF2J1998k+ZdJ/nNm/zO/O7OLQfb43en7p6rq/Wts9w3Ttq/O7PMj/y7Jv1pHXaN/Nb3+xzM7ivLb0/bnMo21e0uSk5P83hzL/21mF1z8xfT7/JZ9LPrKzJrs32R2IdoBt/1gdfdtSc7M7GKc3Zm9b342s7/vh2R20c6uzE7Hfkdm/4aLqvXPMju9e1WSX+nuP15nvUnyLzK70OXTSX4h8+2sYCuzTzgw+wQ2VB147D5sTVX180me2MOH2PPA1AZ9aD7AZrFPYGTgNyupZp/j+eLMf5UzAFuUfQJ7M8SClVNVP5bZqcp3Th8jtmf+D9TsA9/3/rphyfWdsI867po+Bg2ADXKw7xPYHIZYAADAwBFkAAAYHNJjkI866qg+6aSTNrsMgHW79tprP9nd2w685MFPLwYOVfvqxYd0QD7ppJOyY8eOzS4DYN2qasvcIVAvBg5V++rFhlgAAMBAQAYAgIGADAAAAwEZAAAGAjIAAAwEZAAAGAjIAAAwEJABAGBwSN8oBIBDV9VmV8AydG92BbB+jiADAMBAQAYAgIGADAAAAwEZAAAGAjIAAAwEZAAAGAjIAAAwEJABAGAgIAMAwGBhAbmqHlZV11TVB6rqhqp65TT/gqr6RFVdN309d1jn/Kq6qao+WlXPXlRtAKtCLwZYv0XeavqeJN/Z3XdV1RFJ3lNV75yee013/8q4cFWdmuSsJE9K8rgk76qqJ3b3fQusEWCr04sB1mlhR5B75q5p8ojpa393ZD8zyWXdfU9335zkpiSnL6o+gFWgFwOs30LHIFfVYVV1XZI7k1zZ3e+bnvrJqvpgVb2hqh49zTs2yW3D6juneXtv85yq2lFVO3bv3r3I8gG2BL0YYH0WGpC7+77uPi3JcUlOr6onJ/mNJE9IclqS25O8alq81trEGtu8qLu3d/f2bdu2LaRugK1ELwZYn6V8ikV3fybJu5Oc0d13TM36S0lely+futuZ5PhhteOS7FpGfQCrQC8GmM8iP8ViW1UdOT1+eJLvSvKRqjpmWOz5Sa6fHl+R5KyqemhVnZzklCTXLKo+gFWgFwOs3yI/xeKYJJdU1WGZBfHLu/sPqurSqjots1N2tyT58STp7huq6vIkNya5N8m5rpoGeND0YoB1qu79Xcx8cNu+fXvv2LFjs8sAWLequra7t292HRvhgfbiWmu0M1vOIRwzWAH76sXupAcAAAMBGQAABgIyAAAMBGQAABgIyAAAMBCQAQBgICADAMBAQAYAgIGADAAAAwEZAAAGAjIAAAwEZAAAGAjIAAAwEJABAGAgIAMAwEBABgCAgYAMAAADARkAAAYCMgAADARkAAAYCMgAADAQkAEAYCAgAwDAQEAGAICBgAwAAAMBGQAABgIyAAAMBGQAABgIyAAAMBCQAQBgICADAMBAQAYAgIGADAAAAwEZAAAGAjIAAAwEZAAAGAjIAAAwEJABAGAgIAMAwGBhAbmqHlZV11TVB6rqhqp65TT/MVV1ZVV9bPr+6GGd86vqpqr6aFU9e1G1AawKvRhg/RZ5BPmeJN/Z3U9JclqSM6rqW5Kcl+Sq7j4lyVXTdKrq1CRnJXlSkjOSvLaqDltgfQCrQC8GWKeFBeSeuWuaPGL66iRnJrlkmn9Jku+ZHp+Z5LLuvqe7b05yU5LTF1UfwCrQiwHWb6FjkKvqsKq6LsmdSa7s7vclObq7b0+S6ftjp8WPTXLbsPrOaR4AD4JeDLA+Cw3I3X1fd5+W5Lgkp1fVk/ezeK21ifstVHVOVe2oqh27d+/eoEoBti69GGB9lvIpFt39mSTvzmw82x1VdUySTN/vnBbbmeT4YbXjkuxaY1sXdff27t6+bdu2RZYNsKXoxQDzWeSnWGyrqiOnxw9P8l1JPpLkiiRnT4udneTt0+MrkpxVVQ+tqpOTnJLkmkXVB7AK9GKA9Tt8gds+Jskl09XPD0lyeXf/QVW9N8nlVfXiJLcmeUGSdPcNVXV5khuT3Jvk3O6+b4H1AawCvRhgnar7fkPLDhnbt2/vHTt2bHYZAOtWVdd29/bNrmMjPNBeXGuNdmbLOYRjBitgX73YnfQAAGAgIAMAwEBABgCAgYAMAAADARkAAAYCMgAADARkAAAYCMgAADAQkAEAYCAgAwDAQEAGAICBgAwAAAMBGQAABgIyAAAMBGQAABgIyAAAMBCQAQBgICADAMBAQAYAgIGADAAAAwEZAAAGAjIAAAwEZAAAGAjIAAAwEJABAGAgIAMAwEBABgCAgYAMAAADARkAAAYCMgAADARkAAAYCMgAADAQkAEAYCAgAwDAQEAGAICBgAwAAAMBGQAABgIyAAAMBGQAABgIyAAAMFhYQK6q46vqT6vqw1V1Q1X91DT/gqr6RFVdN309d1jn/Kq6qao+WlXPXlRtAKtCLwZYv8MXuO17k7y0u99fVY9Kcm1VXTk995ru/pVx4ao6NclZSZ6U5HFJ3lVVT+zu+xZYI8BWpxcDrNPCjiB39+3d/f7p8eeSfDjJsftZ5cwkl3X3Pd19c5Kbkpy+qPoAVoFeDLB+SxmDXFUnJfmmJO+bZv1kVX2wqt5QVY+e5h2b5LZhtZ1Zo4lX1TlVtaOqduzevXuRZQNsKXoxwHwWHpCr6pFJ3pLkJd392SS/keQJSU5LcnuSV+1ZdI3V+34zui/q7u3dvX3btm2LKRpgi9GLAea30IBcVUdk1pB/q7t/L0m6+47uvq+7v5TkdfnyqbudSY4fVj8uya5F1gewCvRigPVZ5KdYVJLXJ/lwd796mH/MsNjzk1w/Pb4iyVlV9dCqOjnJKUmuWVR9AKtALwZYv0V+isW3JnlRkg9V1XXTvJcl+f6qOi2zU3a3JPnxJOnuG6rq8iQ3ZnbV9bmumgZ40PRigHVaWEDu7vdk7bFs79jPOhcmuXBRNQGsGr0YYP3cSQ8AAAYCMgAADARkAAAYCMgAADAQkAEAYCAgAwDAQEAGAICBgAwAAAMBGQAABgIyAAAMBGQAABgIyAAAMBCQAQBgICADAMBAQAYAgIGADAAAAwEZAAAGAjIAAAwEZAAAGAjIAAAwEJABAGAgIAMAwEBABgCAgYAMAAADARkAAAYCMgAADARkAAAYCMgAADAQkAEAYCAgAwDAQEAGAICBgAwAAAMBGQAABnMF5Kp68qILAWD/9GKA5Zj3CPJvVtU1VfUvq+rIRRYEwD7pxQBLMFdA7u5vS/IDSY5PsqOqfruqnrXQygD4CnoxwHLMPQa5uz+W5OVJfi7JdyT51ar6SFX980UVB8BX0osBFm/eMcjfWFWvSfLhJN+Z5Hnd/Q3T49cssD4AJnoxwHIcPudyv57kdUle1t2f3zOzu3dV1csXUhkAe9OLAZZg3oD83CSf7+77kqSqHpLkYd39t9196cKqA2CkFwMswbxjkN+V5OHD9COmeftUVcdX1Z9W1Yer6oaq+qlp/mOq6sqq+tj0/dHDOudX1U1V9dGqevZ6fxiALU4vBliCeQPyw7r7rj0T0+NHHGCde5O8dBof9y1Jzq2qU5Ocl+Sq7j4lyVXTdKbnzkrypCRnJHltVR22nh8GYIvTiwGWYN6AfHdVPXXPRFV9c5LP72f5dPft3f3+6fHnMruo5NgkZya5ZFrskiTfMz0+M8ll3X1Pd9+c5KYkp89ZH8Aq0IsBlmDeMcgvSfK7VbVrmj4myQvnfZGqOinJNyV5X5Kju/v2ZNa4q+qx02LHJvnvw2o7p3l7b+ucJOckyQknnDBvCQBbwUuiFwMs3FwBubv/sqq+PsnXJakkH+nuL86zblU9Mslbkrykuz9bVftcdK2XXqOWi5JclCTbt2+/3/MAW5VeDLAc8x5BTpKnJTlpWuebqird/cb9rVBVR2TWkH+ru39vmn1HVR0zHbE4Jsmd0/ydmd0dao/jkuwKACO9GGDB5r1RyKVJfiXJt2XWnJ+WZPsB1qkkr0/y4e5+9fDUFUnOnh6fneTtw/yzquqhVXVyklOSXDPnzwGw5enFAMsx7xHk7UlO7e71nEb71iQvSvKhqrpumveyJL+c5PKqenGSW5O8IEm6+4aqujzJjZlddX3uns/6BCCJXgywFPMG5OuT/L0kt8+74e5+T9Yey5Ykz9zHOhcmuXDe1wBYMXoxwBLMG5CPSnJjVV2T5J49M7v7uxdSFQBr0YsBlmDegHzBIosAYC4XbHYBAKtg3o95+7OqOjHJKd39rqp6RBJ3VgJYIr0YYDnm/RSLH0vyX5P8p2nWsUnetqCaAFiDXgywHPPeavrczK6E/mySdPfHkjx2v2sAsNH0YoAlmDcg39PdX9gzUVWHZ407KwGwUHoxwBLMG5D/rKpeluThVfWsJL+b5PcXVxYAa9CLAZZg3oB8XpLdST6U5MeTvCPJyxdVFABr0osBlmDeT7H4UpLXTV8AbAK9GGA55grIVXVz1hjn1t2P3/CKAFiTXgywHPPeKGT78PhhSV6Q5DEbXw4A+6EXAyzBXGOQu/tTw9cnuvs/JPnOxZYGwEgvBliOeYdYPHWYfEhmRzEetZCKAFiTXgywHPMOsXjV8PjeJLck+b4NrwaA/dGLAZZg3k+xeMaiCwFg//RigOWYd4jFz+zv+e5+9caUA8C+6MUAy7GeT7F4WpIrpunnJbk6yW2LKAqANenFAEswb0A+KslTu/tzSVJVFyT53e7+0UUVBsD96MUASzDvraZPSPKFYfoLSU7a8GoA2B+9GGAJ5j2CfGmSa6rqrZndxen5Sd64sKoAWIteDLAE836KxYVV9c4k3z7N+pHu/h+LKwuAvenFAMsx7xCLJHlEks929/+dZGdVnbygmgDYN70YYMHmCshV9QtJfi7J+dOsI5K8aVFFAXB/ejHAcsx7BPn5Sb47yd1J0t274vamAMumFwMswbwB+Qvd3ZldFJKq+urFlQTAPujFAEswb0C+vKr+U5Ijq+rHkrwryesWVxYAa9CLAZbggJ9iUVWV5HeSfH2Szyb5uiQ/391XLrg2ACZ6McDyHDAgd3dX1du6+5uTaMQAm0AvBlieeYdY/PeqetpCKwHgQPRigCWY9056z0jyE1V1S2ZXT1dmBzS+cVGFAXA/ejHAEuw3IFfVCd19a5LnLKkeAPaiFwMs14GOIL8tyVO7+6+q6i3d/b1LqAmAr/S26MUAS3OgMcg1PH78IgsBYJ/0YoAlOlBA7n08BmB59GKAJTrQEIunVNVnMzt68fDpcfLlC0O+ZqHVAZDoxQBLtd+A3N2HLasQANamFwMs17yfgwwAACtBQAYAgIGADAAAg4UF5Kp6Q1XdWVXXD/MuqKpPVNV109dzh+fOr6qbquqjVfXsRdUFsEr0YoD1W+QR5IuTnLHG/Nd092nT1zuSpKpOTXJWkidN67y2qlyUAvDgXRy9GGBdFhaQu/vqJJ+ec/Ezk1zW3fd0981Jbkpy+qJqA1gVejHA+m3GGOSfrKoPTqf9Hj3NOzbJbcMyO6d5ACyGXgywD8sOyL+R5AlJTktye5JXTfNrjWXXvFtUVZ1TVTuqasfu3bsXUiTAFqcXA+zHUgNyd9/R3fd195eSvC5fPnW3M8nxw6LHJdm1j21c1N3bu3v7tm3bFlswwBakFwPs31IDclUdM0w+P8meq6qvSHJWVT20qk5OckqSa5ZZG8Cq0IsB9m+/t5p+MKrqzUmenuSoqtqZ5BeSPL2qTsvslN0tSX48Sbr7hqq6PMmNSe5Ncm5337eo2gBWhV4MsH7VvebwskPC9u3be8eOHZtdBsC6VdW13b19s+vYCA+0F9daI57Zcg7hmMEK2Fcvdic9AAAYCMgAADAQkAEAYCAgAwDAQEAGAICBgAwAAAMBGQAABgIyAAAMBGQAABgIyAAAMBCQAQBgICADAMBAQAYAgIGADAAAAwEZAAAGAjIAAAwEZAAAGAjIAAAwEJABAGAgIAMAwEBABgCAgYAMAAADARkAAAYCMgAADARkAAAYCMgAADAQkAEAYCAgAwDAQEAGAICBgAwAAAMBGQAABgIyAAAMBGQAABgIyAAAMBCQAQBgICADAMBAQAYAgIGADAAAAwEZAAAGAjIAAAwWFpCr6g1VdWdVXT/Me0xVXVlVH5u+P3p47vyquqmqPlpVz15UXQCrRC8GWL9FHkG+OMkZe807L8lV3X1Kkqum6VTVqUnOSvKkaZ3XVtVhC6wNYFVcHL0YYF0WFpC7++okn95r9plJLpkeX5Lke4b5l3X3Pd19c5Kbkpy+qNoAVoVeDLB+yx6DfHR3354k0/fHTvOPTXLbsNzOad79VNU5VbWjqnbs3r17ocUCbFF6McB+HCwX6dUa83qtBbv7ou7e3t3bt23btuCyAFaKXgyQ5QfkO6rqmCSZvt85zd+Z5PhhueOS7FpybQCrQi8G2I9lB+Qrkpw9PT47yduH+WdV1UOr6uQkpyS5Zsm1AawKvRhgPw5f1Iar6s1Jnp7kqKrameQXkvxyksur6sVJbk3ygiTp7huq6vIkNya5N8m53X3fomoDWBV6McD6LSwgd/f37+OpZ+5j+QuTXLioegBWkV4MsH4Hy0V6AABwUBCQAQBgICADAMBAQAYAgIGADAAAAwEZAAAGAjIAAAwEZAAAGAjIAAAwEJABAGAgIAMAwEBABgCAgYAMAAADARkAAAYCMgAADARkAAAYCMgAADAQkAEAYCAgAwDAQEAGAICBgAwAAAMBGQAABgIyAAAMBGQAABgIyAAAMBCQAQBgICADAMBAQAYAgIGADAAAAwEZAAAGAjIAAAwEZAAAGAjIAAAwEJABAGAgIAMAwEBABgCAgYAMAAADARkAAAYCMgAADARkAAAYHL4ZL1pVtyT5XJL7ktzb3dur6jFJfifJSUluSfJ93f3Xm1EfwCrQiwHWtplHkJ/R3ad19/Zp+rwkV3X3KUmumqYBWCy9GGAvB9MQizOTXDI9viTJ92xeKQArSy8GVt5mBeRO8sdVdW1VnTPNO7q7b0+S6ftj11qxqs6pqh1VtWP37t1LKhdgS9KLAdawKWOQk3xrd++qqscmubKqPjLvit19UZKLkmT79u29qAIBVoBeDLCGTTmC3N27pu93JnlrktOT3FFVxyTJ9P3OzagNYFXoxQBrW3pArqqvrqpH7Xmc5J8kuT7JFUnOnhY7O8nbl10bwKrQiwH2bTOGWByd5K1Vtef1f7u7/1tV/WWSy6vqxUluTfKCTagNYFXoxQD7sPSA3N0fT/KUNeZ/Kskzl10PwCrSiwH27WD6mDcAANh0AjIAAAwEZAAAGAjIAAAwEJABAGAgIAMAwEBABgCAwWbcKGTzzT4Yn1XQvTmv6z22OjbrPQYckFa8Oja6FTuCDAAAAwEZAAAGAjIAAAwEZAAAGAjIAAAwEJABAGAgIAMAwEBABgCAgYAMAAADARkAAAYCMgAADARkAAAYCMgAADAQkAEAYCAgAwDAQEAGAICBgAwAAAMBGQAABgIyAAAMBGQAABgIyAAAMBCQAQBgICADAMBAQAYAgIGADAAAAwEZAAAGAjIAAAwEZAAAGAjIAAAwEJABAGAgIAMAwEBABgCAwUEXkKvqjKr6aFXdVFXnbXY9AKtGHwZW3UEVkKvqsCT/Mclzkpya5Pur6tTNrQpgdejDAAdZQE5yepKbuvvj3f2FJJclOXOTawJYJfowsPIO3+wC9nJsktuG6Z1J/sG4QFWdk+ScafKuqvrokmo71B2V5JObXcTSVW12Batm9d5nD/w9duJGlrGBDtiHE734QVi5vxFteOlW7j2WPKj32Zq9+GALyGv9eP0VE90XJbloOeVsHVW1o7u3b3YdbG3eZ1vCAftwohc/UP5GWDTvsY1xsA2x2Jnk+GH6uCS7NqkWgFWkDwMr72ALyH+Z5JSqOrmqvirJWUmu2OSaAFaJPgysvINqiEV331tVP5nkj5IcluQN3X3DJpe1VTgVyjJ4nx3i9OGF8zfConmPbYDqvt/QMgAAWFkH2xALAADYVAIyAAAMBOQV4LaxLFpVvaGq7qyq6ze7FjhY6cUskj68sQTkLc5tY1mSi5OcsdlFwMFKL2YJLo4+vGEE5K3PbWNZuO6+OsmnN7sOOIjpxSyUPryxBOStb63bxh67SbUArCq9GA4hAvLWN9dtYwFYKL0YDiEC8tbntrEAm08vhkOIgLz1uW0swObTi+EQIiBvcd19b5I9t439cJLL3TaWjVZVb07y3iRfV1U7q+rFm10THEz0YhZNH95YbjUNAAADR5ABAGAgIAMAwEBABgCAgYAMAAADARkAAAYCMiujqv5eVV1WVf+zqm6sqndU1ROr6vrNrg1gVejFHAoO3+wCYBmqqpK8Nckl3X3WNO+0JEdvZl0Aq0Qv5lDhCDKr4hlJvtjdv7lnRndfl+S2PdNVdVJV/XlVvX/6+kfT/GOq6uqquq6qrq+qb6+qw6rq4mn6Q1X100v/iQAOPXoxhwRHkFkVT05y7QGWuTPJs7r776rqlCRvTrI9yb9I8kfdfWFVHZbkEUlOS3Jsdz85SarqyEUVDrCF6MUcEgRk+LIjkvz6dLrvviRPnOb/ZZI3VNURSd7W3ddV1ceTPL6qfi3JHyb5480oGGAL0ovZdIZYsCpuSPLNB1jmp5PckeQpmR2t+Kok6e6rk/zjJJ9IcmlV/VB3//W03LuTnJvkPy+mbIAtRS/mkCAgsyr+JMlDq+rH9syoqqclOXFY5muT3N7dX0ryoiSHTcudmOTO7n5dktcneWpVHZXkId39liSvSPLU5fwYAIc0vZhDgiEWrITu7qp6fpL/UFXnJfm7JLckecmw2GuTvKWqXpDkT5PcPc1/epKfraovJrkryQ8lOTbJf6mqPf/JPH/RPwPAoU4v5lBR3b3ZNQAAwEHDEAsAABgIyAAAMBCQAQBgICADAMBAQAYAgIGADAAAAwEZAAAG/z+YCmrMZAlbqAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Calculate class frequencies for y_train_resampled and y_train\n", "unique_train_resampled, counts_train_resampled = np.unique(y_train_resampled, return_counts=True)\n", "unique_train, counts_train = np.unique(y_train, return_counts=True)\n", "\n", "# Plot bar plots\n", "plt.figure(figsize=(10, 6))\n", "\n", "# Plot for y_train_resampled\n", "plt.subplot(1, 2, 1)\n", "plt.bar(unique_train_resampled, counts_train_resampled, color='red')\n", "plt.title('Distribution of y_train_resampled')\n", "plt.xlabel('Class')\n", "plt.ylabel('Frequency')\n", "plt.xticks(unique_train_resampled)\n", "plt.ylim(0, max(max(counts_train_resampled), max(counts_train)) + 10)\n", "\n", "# Plot for y_train\n", "plt.subplot(1, 2, 2)\n", "plt.bar(unique_train, counts_train, color='blue')\n", "plt.title('Distribution of y_train')\n", "plt.xlabel('Class')\n", "plt.ylabel('Frequency')\n", "plt.xticks(unique_train)\n", "plt.ylim(0, max(max(counts_train_resampled), max(counts_train)) + 10)\n", "\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "f16cadbc", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }