{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "35edb7d6", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "id": "7c963881", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Role SatisfactionSkill UtilizationCareer Growth OpportunitySupervisor SupportWork-Life BalanceRecognition & AppreciationCompany CultureTraining & DevelopmentCommunication EffectivenessDiversity & InclusionWork EnvironmentCompensationStaff_IdMonth_Of_ServiceYears_Of_ServiceResidenceResidence_CodeNet_SalaryResigned
0345223332344SA6317110Depok455822180
1231243432224SP10211433Jakarta192134430
2332225443245SA79627100Bekasi358364550
3334431444535SA02310171Depok460354660
4324332342323SA98565171Jakarta155681010
\n", "
" ], "text/plain": [ " Role Satisfaction Skill Utilization Career Growth Opportunity \\\n", "0 3 4 5 \n", "1 2 3 1 \n", "2 3 3 2 \n", "3 3 3 4 \n", "4 3 2 4 \n", "\n", " Supervisor Support Work-Life Balance Recognition & Appreciation \\\n", "0 2 2 3 \n", "1 2 4 3 \n", "2 2 2 5 \n", "3 4 3 1 \n", "4 3 3 2 \n", "\n", " Company Culture Training & Development Communication Effectiveness \\\n", "0 3 3 2 \n", "1 4 3 2 \n", "2 4 4 3 \n", "3 4 4 4 \n", "4 3 4 2 \n", "\n", " Diversity & Inclusion Work Environment Compensation Staff_Id \\\n", "0 3 4 4 SA63171 \n", "1 2 2 4 SP10211 \n", "2 2 4 5 SA79627 \n", "3 5 3 5 SA02310 \n", "4 3 2 3 SA98565 \n", "\n", " Month_Of_Service Years_Of_Service Residence Residence_Code Net_Salary \\\n", "0 1 0 Depok 4 5582218 \n", "1 43 3 Jakarta 1 9213443 \n", "2 10 0 Bekasi 3 5836455 \n", "3 17 1 Depok 4 6035466 \n", "4 17 1 Jakarta 1 5568101 \n", "\n", " Resigned \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 0 \n", "4 0 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Grab Data\n", "df = pd.read_excel(\"https://raw.githubusercontent.com/youronlydimwit/Data_ScienceUse_Cases/main/Classification/Data/HRD_Survey_50.xlsx\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 3, "id": "fcbe70d6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Role SatisfactionSkill UtilizationCareer Growth OpportunitySupervisor SupportWork-Life BalanceRecognition & AppreciationCompany CultureTraining & DevelopmentCommunication EffectivenessDiversity & InclusionWork EnvironmentCompensationMonth_Of_ServiceYears_Of_ServiceResidence_CodeNet_SalaryResigned
034522333234410455822180
1231243432224433192134430
2332225443245100358364550
3334431444535171460354660
4324332342323171155681010
\n", "
" ], "text/plain": [ " Role Satisfaction Skill Utilization Career Growth Opportunity \\\n", "0 3 4 5 \n", "1 2 3 1 \n", "2 3 3 2 \n", "3 3 3 4 \n", "4 3 2 4 \n", "\n", " Supervisor Support Work-Life Balance Recognition & Appreciation \\\n", "0 2 2 3 \n", "1 2 4 3 \n", "2 2 2 5 \n", "3 4 3 1 \n", "4 3 3 2 \n", "\n", " Company Culture Training & Development Communication Effectiveness \\\n", "0 3 3 2 \n", "1 4 3 2 \n", "2 4 4 3 \n", "3 4 4 4 \n", "4 3 4 2 \n", "\n", " Diversity & Inclusion Work Environment Compensation Month_Of_Service \\\n", "0 3 4 4 1 \n", "1 2 2 4 43 \n", "2 2 4 5 10 \n", "3 5 3 5 17 \n", "4 3 2 3 17 \n", "\n", " Years_Of_Service Residence_Code Net_Salary Resigned \n", "0 0 4 5582218 0 \n", "1 3 1 9213443 0 \n", "2 0 3 5836455 0 \n", "3 1 4 6035466 0 \n", "4 1 1 5568101 0 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Making a copy of df, but with only numerical information\n", "# Removing unnecessary columns\n", "pred_df = df.drop(columns=['Staff_Id','Residence'])\n", "pred_df.head()" ] }, { "cell_type": "code", "execution_count": 4, "id": "10a4fe36", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Role Satisfaction int64\n", "Skill Utilization int64\n", "Career Growth Opportunity int64\n", "Supervisor Support int64\n", "Work-Life Balance int64\n", "Recognition & Appreciation int64\n", "Company Culture int64\n", "Training & Development int64\n", "Communication Effectiveness int64\n", "Diversity & Inclusion int64\n", "Work Environment int64\n", "Compensation int64\n", "Month_Of_Service int64\n", "Residence_Code int64\n", "Net_Salary int64\n", "Resigned int64\n", "dtype: object" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pred_df = pred_df.drop(columns=[\"Years_Of_Service\"])\n", "pred_df.dtypes" ] }, { "cell_type": "code", "execution_count": 5, "id": "a4bf1dae", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score" ] }, { "cell_type": "code", "execution_count": 6, "id": "2dd56994", "metadata": {}, "outputs": [], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.tree import DecisionTreeClassifier" ] }, { "cell_type": "code", "execution_count": 7, "id": "b7c40c6f", "metadata": {}, "outputs": [], "source": [ "# Split the data into features (X) and labels (y)\n", "X = pred_df.drop(columns=['Resigned'])\n", "y = pred_df['Resigned']\n", "\n", "# Split the data into training and testing sets\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", "# Define a dictionary to store results\n", "results = {'Model': [], 'F1_score': [], 'Accuracy': [], 'Precision': [], 'Recall': []}" ] }, { "cell_type": "markdown", "id": "319665bc", "metadata": {}, "source": [ "# Norm" ] }, { "cell_type": "code", "execution_count": 8, "id": "e162bda0", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Asus\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n", "C:\\Users\\Asus\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n" ] } ], "source": [ "# Train Random Forest model\n", "rf_model = RandomForestClassifier(random_state=42)\n", "rf_model.fit(X_train, y_train)\n", "\n", "# Train Decision Tree model\n", "dt_model = DecisionTreeClassifier(random_state=42)\n", "dt_model.fit(X_train, y_train)\n", "\n", "# Train Logistic Regression model\n", "lr_model = LogisticRegression(random_state=42)\n", "lr_model.fit(X_train, y_train)\n", "\n", "# Evaluate models\n", "models = {\"Random Forest\": rf_model, \"Decision Tree\": dt_model, \"Logistic Regression\": lr_model}\n", "metrics = {\"Accuracy\": accuracy_score, \"Precision\": precision_score, \"Recall\": recall_score, \"F1 Score\": f1_score}\n", "results = {}\n", "\n", "for name, model in models.items():\n", " y_pred = model.predict(X_test)\n", " result = {}\n", " for metric_name, metric_func in metrics.items():\n", " result[metric_name] = metric_func(y_test, y_pred)\n", " results[name] = result\n", "\n", "# Convert results to DataFrame for easier plotting\n", "results_df = pd.DataFrame(results)" ] }, { "cell_type": "code", "execution_count": 9, "id": "94ebdccc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Random ForestDecision TreeLogistic Regression
Accuracy0.870.8200000.87
Precision0.000.2727270.00
Recall0.000.2307690.00
F1 Score0.000.2500000.00
\n", "
" ], "text/plain": [ " Random Forest Decision Tree Logistic Regression\n", "Accuracy 0.87 0.820000 0.87\n", "Precision 0.00 0.272727 0.00\n", "Recall 0.00 0.230769 0.00\n", "F1 Score 0.00 0.250000 0.00" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results_df" ] }, { "cell_type": "code", "execution_count": 10, "id": "e95e9b4b", "metadata": {}, "outputs": [], "source": [ "from imblearn.over_sampling import SMOTE" ] }, { "cell_type": "markdown", "id": "96ab491e", "metadata": {}, "source": [ "# SMOTE" ] }, { "cell_type": "code", "execution_count": 11, "id": "4bed2a76", "metadata": {}, "outputs": [], "source": [ "# Apply SMOTE to oversample the minority class in the training data\n", "smote = SMOTE(random_state=42)\n", "X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 12, "id": "c24a2a88", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Random ForestDecision TreeLogistic Regression
Accuracy0.830.7200000.130000
Precision0.000.1052630.130000
Recall0.000.1538461.000000
F1 Score0.000.1250000.230088
\n", "
" ], "text/plain": [ " Random Forest Decision Tree Logistic Regression\n", "Accuracy 0.83 0.720000 0.130000\n", "Precision 0.00 0.105263 0.130000\n", "Recall 0.00 0.153846 1.000000\n", "F1 Score 0.00 0.125000 0.230088" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train Random Forest model\n", "rf_model = RandomForestClassifier(random_state=42)\n", "rf_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Train Decision Tree model\n", "dt_model = DecisionTreeClassifier(random_state=42)\n", "dt_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Train Logistic Regression model\n", "lr_model = LogisticRegression(random_state=42)\n", "lr_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Evaluate models\n", "models = {\"Random Forest\": rf_model, \"Decision Tree\": dt_model, \"Logistic Regression\": lr_model}\n", "metrics = {\"Accuracy\": accuracy_score, \"Precision\": precision_score, \"Recall\": recall_score, \"F1 Score\": f1_score}\n", "results = {}\n", "\n", "for name, model in models.items():\n", " y_pred = model.predict(X_test)\n", " result = {}\n", " for metric_name, metric_func in metrics.items():\n", " result[metric_name] = metric_func(y_test, y_pred)\n", " results[name] = result\n", "\n", "# Convert results to DataFrame for easier plotting\n", "results_df_resampled = pd.DataFrame(results)\n", "\n", "results_df_resampled" ] }, { "cell_type": "markdown", "id": "45d27935", "metadata": {}, "source": [ "# OVERSAMPLER" ] }, { "cell_type": "code", "execution_count": 13, "id": "33d278e9", "metadata": {}, "outputs": [], "source": [ "from imblearn.over_sampling import RandomOverSampler" ] }, { "cell_type": "code", "execution_count": 14, "id": "c4b2b38c", "metadata": {}, "outputs": [], "source": [ "# Apply Random Oversampling to balance the training data\n", "oversampler = RandomOverSampler(random_state=42)\n", "X_train_resampled, y_train_resampled = oversampler.fit_resample(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 15, "id": "6e730981", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Asus\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Random ForestDecision TreeLogistic Regression
Accuracy0.870.780.130000
Precision0.000.000.130000
Recall0.000.001.000000
F1 Score0.000.000.230088
\n", "
" ], "text/plain": [ " Random Forest Decision Tree Logistic Regression\n", "Accuracy 0.87 0.78 0.130000\n", "Precision 0.00 0.00 0.130000\n", "Recall 0.00 0.00 1.000000\n", "F1 Score 0.00 0.00 0.230088" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train Random Forest model\n", "rf_model = RandomForestClassifier(random_state=42)\n", "rf_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Train Decision Tree model\n", "dt_model = DecisionTreeClassifier(random_state=42)\n", "dt_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Train Logistic Regression model\n", "lr_model = LogisticRegression(random_state=42)\n", "lr_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Evaluate models\n", "models = {\"Random Forest\": rf_model, \"Decision Tree\": dt_model, \"Logistic Regression\": lr_model}\n", "metrics = {\"Accuracy\": accuracy_score, \"Precision\": precision_score, \"Recall\": recall_score, \"F1 Score\": f1_score}\n", "results = {}\n", "\n", "for name, model in models.items():\n", " y_pred = model.predict(X_test)\n", " result = {}\n", " for metric_name, metric_func in metrics.items():\n", " result[metric_name] = metric_func(y_test, y_pred)\n", " results[name] = result\n", "\n", "# Convert results to DataFrame for easier plotting\n", "results_df_resampled = pd.DataFrame(results)\n", "\n", "results_df_resampled" ] }, { "cell_type": "markdown", "id": "bbe4b256", "metadata": {}, "source": [ "# CLASS WEIGHTS - NORM" ] }, { "cell_type": "code", "execution_count": 17, "id": "a1a034ad", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Asus\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n", "C:\\Users\\Asus\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Random ForestDecision TreeLogistic Regression
Accuracy0.870.8000000.87
Precision0.000.1111110.00
Recall0.000.0769230.00
F1 Score0.000.0909090.00
\n", "
" ], "text/plain": [ " Random Forest Decision Tree Logistic Regression\n", "Accuracy 0.87 0.800000 0.87\n", "Precision 0.00 0.111111 0.00\n", "Recall 0.00 0.076923 0.00\n", "F1 Score 0.00 0.090909 0.00" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Define class weights\n", "class_weights = {0: 1, 1: 9}\n", "\n", "# Train Random Forest model\n", "rf_model = RandomForestClassifier(random_state=42, class_weight=class_weights)\n", "rf_model.fit(X_train, y_train)\n", "\n", "# Train Decision Tree model\n", "dt_model = DecisionTreeClassifier(random_state=42, class_weight=class_weights)\n", "dt_model.fit(X_train, y_train)\n", "\n", "# Train Logistic Regression model\n", "lr_model = LogisticRegression(random_state=42, class_weight=class_weights)\n", "lr_model.fit(X_train, y_train)\n", "\n", "# Evaluate models\n", "models = {\"Random Forest\": rf_model, \"Decision Tree\": dt_model, \"Logistic Regression\": lr_model}\n", "metrics = {\"Accuracy\": accuracy_score, \"Precision\": precision_score, \"Recall\": recall_score, \"F1 Score\": f1_score}\n", "results = {}\n", "\n", "for name, model in models.items():\n", " y_pred = model.predict(X_test)\n", " result = {}\n", " for metric_name, metric_func in metrics.items():\n", " result[metric_name] = metric_func(y_test, y_pred)\n", " results[name] = result\n", "\n", "# Convert results to DataFrame for easier plotting\n", "results_df_resampled = pd.DataFrame(results)\n", "\n", "results_df_resampled" ] }, { "cell_type": "markdown", "id": "00cb6be9", "metadata": {}, "source": [ "# UNDERSAMPLER" ] }, { "cell_type": "code", "execution_count": 20, "id": "06a4d1c7", "metadata": {}, "outputs": [], "source": [ "from imblearn.under_sampling import RandomUnderSampler" ] }, { "cell_type": "code", "execution_count": 21, "id": "dcb0db52", "metadata": {}, "outputs": [], "source": [ "# Apply Random Oversampling to balance the training data\n", "undersampler = RandomUnderSampler(random_state=42)\n", "X_train_resampled, y_train_resampled = undersampler.fit_resample(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 22, "id": "a939e8fe", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Random ForestDecision TreeLogistic Regression
Accuracy0.5600000.4900000.130000
Precision0.1555560.1833330.130000
Recall0.5384620.8461541.000000
F1 Score0.2413790.3013700.230088
\n", "
" ], "text/plain": [ " Random Forest Decision Tree Logistic Regression\n", "Accuracy 0.560000 0.490000 0.130000\n", "Precision 0.155556 0.183333 0.130000\n", "Recall 0.538462 0.846154 1.000000\n", "F1 Score 0.241379 0.301370 0.230088" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train Random Forest model\n", "rf_model = RandomForestClassifier(random_state=42)\n", "rf_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Train Decision Tree model\n", "dt_model = DecisionTreeClassifier(random_state=42)\n", "dt_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Train Logistic Regression model\n", "lr_model = LogisticRegression(random_state=42)\n", "lr_model.fit(X_train_resampled, y_train_resampled)\n", "\n", "# Evaluate models\n", "models = {\"Random Forest\": rf_model, \"Decision Tree\": dt_model, \"Logistic Regression\": lr_model}\n", "metrics = {\"Accuracy\": accuracy_score, \"Precision\": precision_score, \"Recall\": recall_score, \"F1 Score\": f1_score}\n", "results = {}\n", "\n", "for name, model in models.items():\n", " y_pred = model.predict(X_test)\n", " result = {}\n", " for metric_name, metric_func in metrics.items():\n", " result[metric_name] = metric_func(y_test, y_pred)\n", " results[name] = result\n", "\n", "# Convert results to DataFrame for easier plotting\n", "results_df_resampled = pd.DataFrame(results)\n", "\n", "results_df_resampled" ] }, { "cell_type": "code", "execution_count": 31, "id": "3269fce1", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Calculate class frequencies for y_train_resampled and y_train\n", "unique_train_resampled, counts_train_resampled = np.unique(y_train_resampled, return_counts=True)\n", "unique_train, counts_train = np.unique(y_train, return_counts=True)\n", "\n", "# Plot bar plots\n", "plt.figure(figsize=(10, 6))\n", "\n", "# Plot for y_train_resampled\n", "plt.subplot(1, 2, 1)\n", "plt.bar(unique_train_resampled, counts_train_resampled, color='red')\n", "plt.title('Distribution of y_train_resampled')\n", "plt.xlabel('Class')\n", "plt.ylabel('Frequency')\n", "plt.xticks(unique_train_resampled)\n", "plt.ylim(0, max(max(counts_train_resampled), max(counts_train)) + 10)\n", "\n", "# Plot for y_train\n", "plt.subplot(1, 2, 2)\n", "plt.bar(unique_train, counts_train, color='blue')\n", "plt.title('Distribution of y_train')\n", "plt.xlabel('Class')\n", "plt.ylabel('Frequency')\n", "plt.xticks(unique_train)\n", "plt.ylim(0, max(max(counts_train_resampled), max(counts_train)) + 10)\n", "\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "f16cadbc", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }