Data_ScienceUse_Cases/Label Prediction (Binary Example).ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "ee34a7c4",
   "metadata": {},
   "source": [
    "## Import Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1a23a10f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "import sklearn\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3333920d",
   "metadata": {},
   "source": [
    "## Dataset\n",
    "For our dataset, you can find it [here.](https://www.kaggle.com/datasets/elakiricoder/gender-classification-dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "5aea2295",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>long_hair</th>\n",
       "      <th>forehead_width_cm</th>\n",
       "      <th>forehead_height_cm</th>\n",
       "      <th>nose_wide</th>\n",
       "      <th>nose_long</th>\n",
       "      <th>lips_thin</th>\n",
       "      <th>distance_nose_to_lip_long</th>\n",
       "      <th>gender</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>11.8</td>\n",
       "      <td>6.1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>5.4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>11.8</td>\n",
       "      <td>6.3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>14.4</td>\n",
       "      <td>6.1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>13.5</td>\n",
       "      <td>5.9</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Female</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   long_hair  forehead_width_cm  forehead_height_cm  nose_wide  nose_long  \\\n",
       "0          1               11.8                 6.1          1          0   \n",
       "1          0               14.0                 5.4          0          0   \n",
       "2          0               11.8                 6.3          1          1   \n",
       "3          0               14.4                 6.1          0          1   \n",
       "4          1               13.5                 5.9          0          0   \n",
       "\n",
       "   lips_thin  distance_nose_to_lip_long  gender  \n",
       "0          1                          1    Male  \n",
       "1          1                          0  Female  \n",
       "2          1                          1    Male  \n",
       "3          1                          1    Male  \n",
       "4          0                          0  Female  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Load dataset\n",
    "df = pd.read_csv(r'D:\\archive\\gender_classification_v7.csv', encoding='utf-8')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "58b8ed5e",
   "metadata": {},
   "source": [
    "## Data Pre-processing\n",
    "For this example I skipped the Descriptive Statistics, and went to minor adjustments."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d93ff56d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "long_hair                      int64\n",
       "forehead_width_cm            float64\n",
       "forehead_height_cm           float64\n",
       "nose_wide                      int64\n",
       "nose_long                      int64\n",
       "lips_thin                      int64\n",
       "distance_nose_to_lip_long      int64\n",
       "gender                        object\n",
       "dtype: object"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Check Data types of dataframe columns\n",
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "19ae1cf5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>long_hair</th>\n",
       "      <th>forehead_width_cm</th>\n",
       "      <th>forehead_height_cm</th>\n",
       "      <th>nose_wide</th>\n",
       "      <th>nose_long</th>\n",
       "      <th>lips_thin</th>\n",
       "      <th>distance_nose_to_lip_long</th>\n",
       "      <th>gender</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>11.8</td>\n",
       "      <td>6.1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>5.4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>11.8</td>\n",
       "      <td>6.3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>14.4</td>\n",
       "      <td>6.1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>13.5</td>\n",
       "      <td>5.9</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   long_hair  forehead_width_cm  forehead_height_cm  nose_wide  nose_long  \\\n",
       "0          1               11.8                 6.1          1          0   \n",
       "1          0               14.0                 5.4          0          0   \n",
       "2          0               11.8                 6.3          1          1   \n",
       "3          0               14.4                 6.1          0          1   \n",
       "4          1               13.5                 5.9          0          0   \n",
       "\n",
       "   lips_thin  distance_nose_to_lip_long  gender  \n",
       "0          1                          1       0  \n",
       "1          1                          0       1  \n",
       "2          1                          1       0  \n",
       "3          1                          1       0  \n",
       "4          0                          0       1  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Convert Gender labels into integer values, for classification\n",
    "df['gender']=df['gender'].replace('Male',0)\n",
    "df['gender']=df['gender'].replace('Female',1)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b573f11e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "long_hair                      int64\n",
       "forehead_width_cm            float64\n",
       "forehead_height_cm           float64\n",
       "nose_wide                      int64\n",
       "nose_long                      int64\n",
       "lips_thin                      int64\n",
       "distance_nose_to_lip_long      int64\n",
       "gender                         int64\n",
       "dtype: object"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Now all is numeric data\n",
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "35388ca3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Split dataset into X (Features) and y (Labels)\n",
    "\n",
    "# X is ALL columns except the last column (usually the label to be predicted)\n",
    "X = df.iloc[:,:-1]\n",
    "# y is the LABEL column (to be predicted)\n",
    "y = df.iloc[:,-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "14c3347e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use sklearn's train_test_split function imported before\n",
    "X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fe832e3f",
   "metadata": {},
   "source": [
    "## Using 4 Classifiers\n",
    "It is sugggested to take a deeper look of the parameters provided in documentations below, for better tweaking of the classifiers.\n",
    "- [Logistic Regression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html)\n",
    "- [Decision Tree](https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html)\n",
    "- [SVM](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html)\n",
    "- [K-Means / KNN](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f83a2e5c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.neighbors import KNeighborsClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "dc4c2062",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Logistic Regression Accuracy: 0.9682063587282543\n",
      "Decision Tree Accuracy: 0.8792241551689662\n",
      "SVM Accuracy: 0.967006598680264\n",
      "K-Means Accuracy: 0.9754049190161967\n"
     ]
    }
   ],
   "source": [
    "# Logistic Regression\n",
    "# Train the model\n",
    "LogR = LogisticRegression(random_state=0, multi_class='ovr').fit(X_train, y_train)\n",
    "# Predict the test set\n",
    "LogR_pred = LogR.predict(X_test)\n",
    "\n",
    "# Decision Tree\n",
    "dtree = DecisionTreeClassifier(max_depth = 2).fit(X_train, y_train)\n",
    "dtree_pred = dtree.predict(X_test)\n",
    "\n",
    "# SVM\n",
    "svm = SVC(kernel='linear',C=1).fit(X_train, y_train)\n",
    "svm_pred = svm.predict(X_test)\n",
    "\n",
    "# K-Means\n",
    "knn = KNeighborsClassifier(n_neighbors=5).fit(X_train,y_train)\n",
    "knn_pred = knn.predict(X_test)\n",
    "\n",
    "# See Accuracy of each classifier\n",
    "print(\"Logistic Regression Accuracy: \"+ str(LogR.score(X,y)))\n",
    "print(\"Decision Tree Accuracy: \"+ str(dtree.score(X,y)))\n",
    "print(\"SVM Accuracy: \"+ str(svm.score(X,y)))\n",
    "print(\"K-Means Accuracy: \"+ str(knn.score(X,y)))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "00f72b96",
   "metadata": {},
   "source": [
    "## Try on a new dataset\n",
    "Use one (or many) model above as predictor in a new dataset. Assuming we have the same columns but different values, we get.."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9c24db9a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>long_hair</th>\n",
       "      <th>forehead_width_cm</th>\n",
       "      <th>forehead_height_cm</th>\n",
       "      <th>nose_wide</th>\n",
       "      <th>nose_long</th>\n",
       "      <th>lips_thin</th>\n",
       "      <th>distance_nose_to_lip_long</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>14.5</td>\n",
       "      <td>6.7</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>14.0</td>\n",
       "      <td>5.9</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>12.9</td>\n",
       "      <td>6.4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   long_hair  forehead_width_cm  forehead_height_cm  nose_wide  nose_long  \\\n",
       "0          1               14.5                 6.7          0          1   \n",
       "1          1               14.0                 5.9          0          0   \n",
       "2          1               12.9                 6.4          1          0   \n",
       "\n",
       "   lips_thin  distance_nose_to_lip_long  \n",
       "0          1                          1  \n",
       "1          0                          0  \n",
       "2          0                          1  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# For this example we use 3 rows of data to be predicted\n",
    "dval = pd.read_csv(r'D:\\archive\\valgend.csv', encoding='utf-8')\n",
    "dval.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "ad501b6a",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "# predict with knn (change to which model you choose)\n",
    "knn_pred_new = knn.predict(dval)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "8896ab72",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 1, 0], dtype=int64)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# See the result\n",
    "knn_pred_new"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "7fa9db00",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add new column in new dataframe for placing the results, pass the \"result\" from before\n",
    "dval[\"pred_gender\"]=knn_pred_new"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "6155a519",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>long_hair</th>\n",
       "      <th>forehead_width_cm</th>\n",
       "      <th>forehead_height_cm</th>\n",
       "      <th>nose_wide</th>\n",
       "      <th>nose_long</th>\n",
       "      <th>lips_thin</th>\n",
       "      <th>distance_nose_to_lip_long</th>\n",
       "      <th>pred_gender</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>14.5</td>\n",
       "      <td>6.7</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>14.0</td>\n",
       "      <td>5.9</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>12.9</td>\n",
       "      <td>6.4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   long_hair  forehead_width_cm  forehead_height_cm  nose_wide  nose_long  \\\n",
       "0          1               14.5                 6.7          0          1   \n",
       "1          1               14.0                 5.9          0          0   \n",
       "2          1               12.9                 6.4          1          0   \n",
       "\n",
       "   lips_thin  distance_nose_to_lip_long  pred_gender  \n",
       "0          1                          1            0  \n",
       "1          0                          0            1  \n",
       "2          0                          1            0  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# See data with appended prediction (last column)\n",
    "dval.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "c2587a57",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>long_hair</th>\n",
       "      <th>forehead_width_cm</th>\n",
       "      <th>forehead_height_cm</th>\n",
       "      <th>nose_wide</th>\n",
       "      <th>nose_long</th>\n",
       "      <th>lips_thin</th>\n",
       "      <th>distance_nose_to_lip_long</th>\n",
       "      <th>pred_gender</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>14.5</td>\n",
       "      <td>6.7</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Male</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>14.0</td>\n",
       "      <td>5.9</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Female</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>12.9</td>\n",
       "      <td>6.4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>Male</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   long_hair  forehead_width_cm  forehead_height_cm  nose_wide  nose_long  \\\n",
       "0          1               14.5                 6.7          0          1   \n",
       "1          1               14.0                 5.9          0          0   \n",
       "2          1               12.9                 6.4          1          0   \n",
       "\n",
       "   lips_thin  distance_nose_to_lip_long pred_gender  \n",
       "0          1                          1        Male  \n",
       "1          0                          0      Female  \n",
       "2          0                          1        Male  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Converting back to labels\n",
    "dval['pred_gender']=dval['pred_gender'].replace(0,'Male')\n",
    "dval['pred_gender']=dval['pred_gender'].replace(1,'Female')\n",
    "dval.head()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}