Add files via upload

2025-12-17 15:49:59 +01:00 · 2023-09-05 10:19:56 +07:00
parent b9faf7e7b1
commit 90d22db225
5 changed files with 2437 additions and 0 deletions
--- a/Confidence_Interval.ipynb
+++ b/Confidence_Interval.ipynb
@@ -0,0 +1,192 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "410cdd47",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "f769b682",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.01390952774409444"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# T-Multiplier\n",
+    "tstar = 1.96\n",
+    "# P hat value\n",
+    "p = .85\n",
+    "# Number of observations\n",
+    "n = 659\n",
+    "\n",
+    "# Calculate Standard Error\n",
+    "se = np.sqrt((p * (1 - p))/n)\n",
+    "se"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "d77c95f1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0.8227373256215749, 0.8772626743784251)"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Lower confidence band\n",
+    "lcb = p - tstar * se\n",
+    "# Upper confidence band\n",
+    "ucb = p + tstar * se\n",
+    "# Show confidence bands\n",
+    "(lcb, ucb)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "1d08b43b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Same process, using statsmodels library\n",
+    "import statsmodels.api as sm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "41cb97c9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0.8227378265796143, 0.8772621734203857)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Get confidence bands\n",
+    "# n = observations\n",
+    "# p = result of a survey \n",
+    "sm.stats.proportion_confint(n * p, n)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4234b441",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Try to import dataset\n",
+    "import pandas as pd\n",
+    "\n",
+    "df = pd.read_csv(\"Cartwheeldata.csv\")\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d03c3d4f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Mean of a column\n",
+    "mean = df[\"CWDistance\"].mean()\n",
+    "# Standard deviation of a column\n",
+    "sd = df[\"CWDistance\"].std()\n",
+    "# Rows of the dataframe\n",
+    "n = len(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c52dddd2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tstar = 2.064\n",
+    "\n",
+    "se = sd/np.sqrt(n)\n",
+    "\n",
+    "se"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2dfbab7d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lcb = mean - tstar * se\n",
+    "ucb = mean + tstar * se\n",
+    "(lcb, ucb)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "649c18b1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#..OR use statsmodels instead\n",
+    "sm.stats.DescrStatsW(df[\"#ColumnName\"]).zconfint_mean()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/Example).ipynb
+++ b/Example).ipynb
@@ -0,0 +1,814 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ee34a7c4",
+   "metadata": {},
+   "source": [
+    "## Import Libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "1a23a10f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import sklearn\n",
+    "from sklearn.model_selection import train_test_split"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3333920d",
+   "metadata": {},
+   "source": [
+    "## Dataset\n",
+    "For our dataset, you can find it [here.](https://www.kaggle.com/datasets/elakiricoder/gender-classification-dataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "5aea2295",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>long_hair</th>\n",
+       "      <th>forehead_width_cm</th>\n",
+       "      <th>forehead_height_cm</th>\n",
+       "      <th>nose_wide</th>\n",
+       "      <th>nose_long</th>\n",
+       "      <th>lips_thin</th>\n",
+       "      <th>distance_nose_to_lip_long</th>\n",
+       "      <th>gender</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11.8</td>\n",
+       "      <td>6.1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Male</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>5.4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Female</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0</td>\n",
+       "      <td>11.8</td>\n",
+       "      <td>6.3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Male</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0</td>\n",
+       "      <td>14.4</td>\n",
+       "      <td>6.1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Male</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>13.5</td>\n",
+       "      <td>5.9</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Female</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   long_hair  forehead_width_cm  forehead_height_cm  nose_wide  nose_long  \\\n",
+       "0          1               11.8                 6.1          1          0   \n",
+       "1          0               14.0                 5.4          0          0   \n",
+       "2          0               11.8                 6.3          1          1   \n",
+       "3          0               14.4                 6.1          0          1   \n",
+       "4          1               13.5                 5.9          0          0   \n",
+       "\n",
+       "   lips_thin  distance_nose_to_lip_long  gender  \n",
+       "0          1                          1    Male  \n",
+       "1          1                          0  Female  \n",
+       "2          1                          1    Male  \n",
+       "3          1                          1    Male  \n",
+       "4          0                          0  Female  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Load dataset\n",
+    "df = pd.read_csv(r'D:\\archive\\gender_classification_v7.csv', encoding='utf-8')\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "58b8ed5e",
+   "metadata": {},
+   "source": [
+    "## Data Pre-processing\n",
+    "For this example I skipped the Descriptive Statistics, and went to minor adjustments."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "d93ff56d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "long_hair                      int64\n",
+       "forehead_width_cm            float64\n",
+       "forehead_height_cm           float64\n",
+       "nose_wide                      int64\n",
+       "nose_long                      int64\n",
+       "lips_thin                      int64\n",
+       "distance_nose_to_lip_long      int64\n",
+       "gender                        object\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Check Data types of dataframe columns\n",
+    "df.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "19ae1cf5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>long_hair</th>\n",
+       "      <th>forehead_width_cm</th>\n",
+       "      <th>forehead_height_cm</th>\n",
+       "      <th>nose_wide</th>\n",
+       "      <th>nose_long</th>\n",
+       "      <th>lips_thin</th>\n",
+       "      <th>distance_nose_to_lip_long</th>\n",
+       "      <th>gender</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>11.8</td>\n",
+       "      <td>6.1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>5.4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0</td>\n",
+       "      <td>11.8</td>\n",
+       "      <td>6.3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0</td>\n",
+       "      <td>14.4</td>\n",
+       "      <td>6.1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>13.5</td>\n",
+       "      <td>5.9</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   long_hair  forehead_width_cm  forehead_height_cm  nose_wide  nose_long  \\\n",
+       "0          1               11.8                 6.1          1          0   \n",
+       "1          0               14.0                 5.4          0          0   \n",
+       "2          0               11.8                 6.3          1          1   \n",
+       "3          0               14.4                 6.1          0          1   \n",
+       "4          1               13.5                 5.9          0          0   \n",
+       "\n",
+       "   lips_thin  distance_nose_to_lip_long  gender  \n",
+       "0          1                          1       0  \n",
+       "1          1                          0       1  \n",
+       "2          1                          1       0  \n",
+       "3          1                          1       0  \n",
+       "4          0                          0       1  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Convert Gender labels into integer values, for classification\n",
+    "df['gender']=df['gender'].replace('Male',0)\n",
+    "df['gender']=df['gender'].replace('Female',1)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "b573f11e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "long_hair                      int64\n",
+       "forehead_width_cm            float64\n",
+       "forehead_height_cm           float64\n",
+       "nose_wide                      int64\n",
+       "nose_long                      int64\n",
+       "lips_thin                      int64\n",
+       "distance_nose_to_lip_long      int64\n",
+       "gender                         int64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Now all is numeric data\n",
+    "df.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "35388ca3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Split dataset into X (Features) and y (Labels)\n",
+    "\n",
+    "# X is ALL columns except the last column (usually the label to be predicted)\n",
+    "X = df.iloc[:,:-1]\n",
+    "# y is the LABEL column (to be predicted)\n",
+    "y = df.iloc[:,-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "14c3347e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use sklearn's train_test_split function imported before\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fe832e3f",
+   "metadata": {},
+   "source": [
+    "## Using 4 Classifiers\n",
+    "It is sugggested to take a deeper look of the parameters provided in documentations below, for better tweaking of the classifiers.\n",
+    "- [Logistic Regression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html)\n",
+    "- [Decision Tree](https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html)\n",
+    "- [SVM](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html)\n",
+    "- [K-Means / KNN](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "f83a2e5c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from sklearn.svm import SVC\n",
+    "from sklearn.neighbors import KNeighborsClassifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "dc4c2062",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Logistic Regression Accuracy: 0.9682063587282543\n",
+      "Decision Tree Accuracy: 0.8792241551689662\n",
+      "SVM Accuracy: 0.967006598680264\n",
+      "K-Means Accuracy: 0.9754049190161967\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Logistic Regression\n",
+    "# Train the model\n",
+    "LogR = LogisticRegression(random_state=0, multi_class='ovr').fit(X_train, y_train)\n",
+    "# Predict the test set\n",
+    "LogR_pred = LogR.predict(X_test)\n",
+    "\n",
+    "# Decision Tree\n",
+    "dtree = DecisionTreeClassifier(max_depth = 2).fit(X_train, y_train)\n",
+    "dtree_pred = dtree.predict(X_test)\n",
+    "\n",
+    "# SVM\n",
+    "svm = SVC(kernel='linear',C=1).fit(X_train, y_train)\n",
+    "svm_pred = svm.predict(X_test)\n",
+    "\n",
+    "# K-Means\n",
+    "knn = KNeighborsClassifier(n_neighbors=5).fit(X_train,y_train)\n",
+    "knn_pred = knn.predict(X_test)\n",
+    "\n",
+    "# See Accuracy of each classifier\n",
+    "print(\"Logistic Regression Accuracy: \"+ str(LogR.score(X,y)))\n",
+    "print(\"Decision Tree Accuracy: \"+ str(dtree.score(X,y)))\n",
+    "print(\"SVM Accuracy: \"+ str(svm.score(X,y)))\n",
+    "print(\"K-Means Accuracy: \"+ str(knn.score(X,y)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "00f72b96",
+   "metadata": {},
+   "source": [
+    "## Try on a new dataset\n",
+    "Use one (or many) model above as predictor in a new dataset. Assuming we have the same columns but different values, we get.."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "9c24db9a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>long_hair</th>\n",
+       "      <th>forehead_width_cm</th>\n",
+       "      <th>forehead_height_cm</th>\n",
+       "      <th>nose_wide</th>\n",
+       "      <th>nose_long</th>\n",
+       "      <th>lips_thin</th>\n",
+       "      <th>distance_nose_to_lip_long</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>14.5</td>\n",
+       "      <td>6.7</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>5.9</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>12.9</td>\n",
+       "      <td>6.4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   long_hair  forehead_width_cm  forehead_height_cm  nose_wide  nose_long  \\\n",
+       "0          1               14.5                 6.7          0          1   \n",
+       "1          1               14.0                 5.9          0          0   \n",
+       "2          1               12.9                 6.4          1          0   \n",
+       "\n",
+       "   lips_thin  distance_nose_to_lip_long  \n",
+       "0          1                          1  \n",
+       "1          0                          0  \n",
+       "2          0                          1  "
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# For this example we use 3 rows of data to be predicted\n",
+    "dval = pd.read_csv(r'D:\\archive\\valgend.csv', encoding='utf-8')\n",
+    "dval.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "ad501b6a",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# predict with knn (change to which model you choose)\n",
+    "knn_pred_new = knn.predict(dval)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "8896ab72",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0, 1, 0], dtype=int64)"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# See the result\n",
+    "knn_pred_new"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "7fa9db00",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add new column in new dataframe for placing the results, pass the \"result\" from before\n",
+    "dval[\"pred_gender\"]=knn_pred_new"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "6155a519",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>long_hair</th>\n",
+       "      <th>forehead_width_cm</th>\n",
+       "      <th>forehead_height_cm</th>\n",
+       "      <th>nose_wide</th>\n",
+       "      <th>nose_long</th>\n",
+       "      <th>lips_thin</th>\n",
+       "      <th>distance_nose_to_lip_long</th>\n",
+       "      <th>pred_gender</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>14.5</td>\n",
+       "      <td>6.7</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>5.9</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>12.9</td>\n",
+       "      <td>6.4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   long_hair  forehead_width_cm  forehead_height_cm  nose_wide  nose_long  \\\n",
+       "0          1               14.5                 6.7          0          1   \n",
+       "1          1               14.0                 5.9          0          0   \n",
+       "2          1               12.9                 6.4          1          0   \n",
+       "\n",
+       "   lips_thin  distance_nose_to_lip_long  pred_gender  \n",
+       "0          1                          1            0  \n",
+       "1          0                          0            1  \n",
+       "2          0                          1            0  "
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# See data with appended prediction (last column)\n",
+    "dval.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "c2587a57",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>long_hair</th>\n",
+       "      <th>forehead_width_cm</th>\n",
+       "      <th>forehead_height_cm</th>\n",
+       "      <th>nose_wide</th>\n",
+       "      <th>nose_long</th>\n",
+       "      <th>lips_thin</th>\n",
+       "      <th>distance_nose_to_lip_long</th>\n",
+       "      <th>pred_gender</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>14.5</td>\n",
+       "      <td>6.7</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Male</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>5.9</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Female</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>12.9</td>\n",
+       "      <td>6.4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Male</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   long_hair  forehead_width_cm  forehead_height_cm  nose_wide  nose_long  \\\n",
+       "0          1               14.5                 6.7          0          1   \n",
+       "1          1               14.0                 5.9          0          0   \n",
+       "2          1               12.9                 6.4          1          0   \n",
+       "\n",
+       "   lips_thin  distance_nose_to_lip_long pred_gender  \n",
+       "0          1                          1        Male  \n",
+       "1          0                          0      Female  \n",
+       "2          0                          1        Male  "
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Converting back to labels\n",
+    "dval['pred_gender']=dval['pred_gender'].replace(0,'Male')\n",
+    "dval['pred_gender']=dval['pred_gender'].replace(1,'Female')\n",
+    "dval.head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/pred.ipynb
+++ b/pred.ipynb
@@ -0,0 +1,794 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "5c3d106c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt \n",
+    "import seaborn as sns \n",
+    "import plotly as py\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "7e7ad082",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Id</th>\n",
+       "      <th>SepalLengthCm</th>\n",
+       "      <th>SepalWidthCm</th>\n",
+       "      <th>PetalLengthCm</th>\n",
+       "      <th>PetalWidthCm</th>\n",
+       "      <th>Species</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5.1</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>1.4</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>Iris-setosa</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>4.9</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.4</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>Iris-setosa</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>4.7</td>\n",
+       "      <td>3.2</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>Iris-setosa</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>4.6</td>\n",
+       "      <td>3.1</td>\n",
+       "      <td>1.5</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>Iris-setosa</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>3.6</td>\n",
+       "      <td>1.4</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>Iris-setosa</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species\n",
+       "0   1            5.1           3.5            1.4           0.2  Iris-setosa\n",
+       "1   2            4.9           3.0            1.4           0.2  Iris-setosa\n",
+       "2   3            4.7           3.2            1.3           0.2  Iris-setosa\n",
+       "3   4            4.6           3.1            1.5           0.2  Iris-setosa\n",
+       "4   5            5.0           3.6            1.4           0.2  Iris-setosa"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.read_csv(r'D:\\archive\\iris.csv', encoding='utf-8')\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "a85eca81",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Id</th>\n",
+       "      <th>SepalLengthCm</th>\n",
+       "      <th>SepalWidthCm</th>\n",
+       "      <th>PetalLengthCm</th>\n",
+       "      <th>PetalWidthCm</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>150.000000</td>\n",
+       "      <td>150.000000</td>\n",
+       "      <td>150.000000</td>\n",
+       "      <td>150.000000</td>\n",
+       "      <td>150.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>75.500000</td>\n",
+       "      <td>5.843333</td>\n",
+       "      <td>3.054000</td>\n",
+       "      <td>3.758667</td>\n",
+       "      <td>1.198667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>43.445368</td>\n",
+       "      <td>0.828066</td>\n",
+       "      <td>0.433594</td>\n",
+       "      <td>1.764420</td>\n",
+       "      <td>0.763161</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>4.300000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.100000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>38.250000</td>\n",
+       "      <td>5.100000</td>\n",
+       "      <td>2.800000</td>\n",
+       "      <td>1.600000</td>\n",
+       "      <td>0.300000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>75.500000</td>\n",
+       "      <td>5.800000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>4.350000</td>\n",
+       "      <td>1.300000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>112.750000</td>\n",
+       "      <td>6.400000</td>\n",
+       "      <td>3.300000</td>\n",
+       "      <td>5.100000</td>\n",
+       "      <td>1.800000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>150.000000</td>\n",
+       "      <td>7.900000</td>\n",
+       "      <td>4.400000</td>\n",
+       "      <td>6.900000</td>\n",
+       "      <td>2.500000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm\n",
+       "count  150.000000     150.000000    150.000000     150.000000    150.000000\n",
+       "mean    75.500000       5.843333      3.054000       3.758667      1.198667\n",
+       "std     43.445368       0.828066      0.433594       1.764420      0.763161\n",
+       "min      1.000000       4.300000      2.000000       1.000000      0.100000\n",
+       "25%     38.250000       5.100000      2.800000       1.600000      0.300000\n",
+       "50%     75.500000       5.800000      3.000000       4.350000      1.300000\n",
+       "75%    112.750000       6.400000      3.300000       5.100000      1.800000\n",
+       "max    150.000000       7.900000      4.400000       6.900000      2.500000"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "fd80a4a8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Id                 int64\n",
+       "SepalLengthCm    float64\n",
+       "SepalWidthCm     float64\n",
+       "PetalLengthCm    float64\n",
+       "PetalWidthCm     float64\n",
+       "Species           object\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "cc10d9c3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sklearn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "d07459e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import confusion_matrix\n",
+    "from sklearn.model_selection import train_test_split"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "f917c7bd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn import datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "9d3e54c7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "iris = datasets.load_iris()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "1c34bd6d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'data': array([[5.1, 3.5, 1.4, 0.2],\n",
+       "        [4.9, 3. , 1.4, 0.2],\n",
+       "        [4.7, 3.2, 1.3, 0.2],\n",
+       "        [4.6, 3.1, 1.5, 0.2],\n",
+       "        [5. , 3.6, 1.4, 0.2],\n",
+       "        [5.4, 3.9, 1.7, 0.4],\n",
+       "        [4.6, 3.4, 1.4, 0.3],\n",
+       "        [5. , 3.4, 1.5, 0.2],\n",
+       "        [4.4, 2.9, 1.4, 0.2],\n",
+       "        [4.9, 3.1, 1.5, 0.1],\n",
+       "        [5.4, 3.7, 1.5, 0.2],\n",
+       "        [4.8, 3.4, 1.6, 0.2],\n",
+       "        [4.8, 3. , 1.4, 0.1],\n",
+       "        [4.3, 3. , 1.1, 0.1],\n",
+       "        [5.8, 4. , 1.2, 0.2],\n",
+       "        [5.7, 4.4, 1.5, 0.4],\n",
+       "        [5.4, 3.9, 1.3, 0.4],\n",
+       "        [5.1, 3.5, 1.4, 0.3],\n",
+       "        [5.7, 3.8, 1.7, 0.3],\n",
+       "        [5.1, 3.8, 1.5, 0.3],\n",
+       "        [5.4, 3.4, 1.7, 0.2],\n",
+       "        [5.1, 3.7, 1.5, 0.4],\n",
+       "        [4.6, 3.6, 1. , 0.2],\n",
+       "        [5.1, 3.3, 1.7, 0.5],\n",
+       "        [4.8, 3.4, 1.9, 0.2],\n",
+       "        [5. , 3. , 1.6, 0.2],\n",
+       "        [5. , 3.4, 1.6, 0.4],\n",
+       "        [5.2, 3.5, 1.5, 0.2],\n",
+       "        [5.2, 3.4, 1.4, 0.2],\n",
+       "        [4.7, 3.2, 1.6, 0.2],\n",
+       "        [4.8, 3.1, 1.6, 0.2],\n",
+       "        [5.4, 3.4, 1.5, 0.4],\n",
+       "        [5.2, 4.1, 1.5, 0.1],\n",
+       "        [5.5, 4.2, 1.4, 0.2],\n",
+       "        [4.9, 3.1, 1.5, 0.2],\n",
+       "        [5. , 3.2, 1.2, 0.2],\n",
+       "        [5.5, 3.5, 1.3, 0.2],\n",
+       "        [4.9, 3.6, 1.4, 0.1],\n",
+       "        [4.4, 3. , 1.3, 0.2],\n",
+       "        [5.1, 3.4, 1.5, 0.2],\n",
+       "        [5. , 3.5, 1.3, 0.3],\n",
+       "        [4.5, 2.3, 1.3, 0.3],\n",
+       "        [4.4, 3.2, 1.3, 0.2],\n",
+       "        [5. , 3.5, 1.6, 0.6],\n",
+       "        [5.1, 3.8, 1.9, 0.4],\n",
+       "        [4.8, 3. , 1.4, 0.3],\n",
+       "        [5.1, 3.8, 1.6, 0.2],\n",
+       "        [4.6, 3.2, 1.4, 0.2],\n",
+       "        [5.3, 3.7, 1.5, 0.2],\n",
+       "        [5. , 3.3, 1.4, 0.2],\n",
+       "        [7. , 3.2, 4.7, 1.4],\n",
+       "        [6.4, 3.2, 4.5, 1.5],\n",
+       "        [6.9, 3.1, 4.9, 1.5],\n",
+       "        [5.5, 2.3, 4. , 1.3],\n",
+       "        [6.5, 2.8, 4.6, 1.5],\n",
+       "        [5.7, 2.8, 4.5, 1.3],\n",
+       "        [6.3, 3.3, 4.7, 1.6],\n",
+       "        [4.9, 2.4, 3.3, 1. ],\n",
+       "        [6.6, 2.9, 4.6, 1.3],\n",
+       "        [5.2, 2.7, 3.9, 1.4],\n",
+       "        [5. , 2. , 3.5, 1. ],\n",
+       "        [5.9, 3. , 4.2, 1.5],\n",
+       "        [6. , 2.2, 4. , 1. ],\n",
+       "        [6.1, 2.9, 4.7, 1.4],\n",
+       "        [5.6, 2.9, 3.6, 1.3],\n",
+       "        [6.7, 3.1, 4.4, 1.4],\n",
+       "        [5.6, 3. , 4.5, 1.5],\n",
+       "        [5.8, 2.7, 4.1, 1. ],\n",
+       "        [6.2, 2.2, 4.5, 1.5],\n",
+       "        [5.6, 2.5, 3.9, 1.1],\n",
+       "        [5.9, 3.2, 4.8, 1.8],\n",
+       "        [6.1, 2.8, 4. , 1.3],\n",
+       "        [6.3, 2.5, 4.9, 1.5],\n",
+       "        [6.1, 2.8, 4.7, 1.2],\n",
+       "        [6.4, 2.9, 4.3, 1.3],\n",
+       "        [6.6, 3. , 4.4, 1.4],\n",
+       "        [6.8, 2.8, 4.8, 1.4],\n",
+       "        [6.7, 3. , 5. , 1.7],\n",
+       "        [6. , 2.9, 4.5, 1.5],\n",
+       "        [5.7, 2.6, 3.5, 1. ],\n",
+       "        [5.5, 2.4, 3.8, 1.1],\n",
+       "        [5.5, 2.4, 3.7, 1. ],\n",
+       "        [5.8, 2.7, 3.9, 1.2],\n",
+       "        [6. , 2.7, 5.1, 1.6],\n",
+       "        [5.4, 3. , 4.5, 1.5],\n",
+       "        [6. , 3.4, 4.5, 1.6],\n",
+       "        [6.7, 3.1, 4.7, 1.5],\n",
+       "        [6.3, 2.3, 4.4, 1.3],\n",
+       "        [5.6, 3. , 4.1, 1.3],\n",
+       "        [5.5, 2.5, 4. , 1.3],\n",
+       "        [5.5, 2.6, 4.4, 1.2],\n",
+       "        [6.1, 3. , 4.6, 1.4],\n",
+       "        [5.8, 2.6, 4. , 1.2],\n",
+       "        [5. , 2.3, 3.3, 1. ],\n",
+       "        [5.6, 2.7, 4.2, 1.3],\n",
+       "        [5.7, 3. , 4.2, 1.2],\n",
+       "        [5.7, 2.9, 4.2, 1.3],\n",
+       "        [6.2, 2.9, 4.3, 1.3],\n",
+       "        [5.1, 2.5, 3. , 1.1],\n",
+       "        [5.7, 2.8, 4.1, 1.3],\n",
+       "        [6.3, 3.3, 6. , 2.5],\n",
+       "        [5.8, 2.7, 5.1, 1.9],\n",
+       "        [7.1, 3. , 5.9, 2.1],\n",
+       "        [6.3, 2.9, 5.6, 1.8],\n",
+       "        [6.5, 3. , 5.8, 2.2],\n",
+       "        [7.6, 3. , 6.6, 2.1],\n",
+       "        [4.9, 2.5, 4.5, 1.7],\n",
+       "        [7.3, 2.9, 6.3, 1.8],\n",
+       "        [6.7, 2.5, 5.8, 1.8],\n",
+       "        [7.2, 3.6, 6.1, 2.5],\n",
+       "        [6.5, 3.2, 5.1, 2. ],\n",
+       "        [6.4, 2.7, 5.3, 1.9],\n",
+       "        [6.8, 3. , 5.5, 2.1],\n",
+       "        [5.7, 2.5, 5. , 2. ],\n",
+       "        [5.8, 2.8, 5.1, 2.4],\n",
+       "        [6.4, 3.2, 5.3, 2.3],\n",
+       "        [6.5, 3. , 5.5, 1.8],\n",
+       "        [7.7, 3.8, 6.7, 2.2],\n",
+       "        [7.7, 2.6, 6.9, 2.3],\n",
+       "        [6. , 2.2, 5. , 1.5],\n",
+       "        [6.9, 3.2, 5.7, 2.3],\n",
+       "        [5.6, 2.8, 4.9, 2. ],\n",
+       "        [7.7, 2.8, 6.7, 2. ],\n",
+       "        [6.3, 2.7, 4.9, 1.8],\n",
+       "        [6.7, 3.3, 5.7, 2.1],\n",
+       "        [7.2, 3.2, 6. , 1.8],\n",
+       "        [6.2, 2.8, 4.8, 1.8],\n",
+       "        [6.1, 3. , 4.9, 1.8],\n",
+       "        [6.4, 2.8, 5.6, 2.1],\n",
+       "        [7.2, 3. , 5.8, 1.6],\n",
+       "        [7.4, 2.8, 6.1, 1.9],\n",
+       "        [7.9, 3.8, 6.4, 2. ],\n",
+       "        [6.4, 2.8, 5.6, 2.2],\n",
+       "        [6.3, 2.8, 5.1, 1.5],\n",
+       "        [6.1, 2.6, 5.6, 1.4],\n",
+       "        [7.7, 3. , 6.1, 2.3],\n",
+       "        [6.3, 3.4, 5.6, 2.4],\n",
+       "        [6.4, 3.1, 5.5, 1.8],\n",
+       "        [6. , 3. , 4.8, 1.8],\n",
+       "        [6.9, 3.1, 5.4, 2.1],\n",
+       "        [6.7, 3.1, 5.6, 2.4],\n",
+       "        [6.9, 3.1, 5.1, 2.3],\n",
+       "        [5.8, 2.7, 5.1, 1.9],\n",
+       "        [6.8, 3.2, 5.9, 2.3],\n",
+       "        [6.7, 3.3, 5.7, 2.5],\n",
+       "        [6.7, 3. , 5.2, 2.3],\n",
+       "        [6.3, 2.5, 5. , 1.9],\n",
+       "        [6.5, 3. , 5.2, 2. ],\n",
+       "        [6.2, 3.4, 5.4, 2.3],\n",
+       "        [5.9, 3. , 5.1, 1.8]]),\n",
+       " 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
+       "        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
+       "        0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
+       "        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
+       "        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]),\n",
+       " 'frame': None,\n",
+       " 'target_names': array(['setosa', 'versicolor', 'virginica'], dtype='<U10'),\n",
+       " 'DESCR': '.. _iris_dataset:\\n\\nIris plants dataset\\n--------------------\\n\\n**Data Set Characteristics:**\\n\\n    :Number of Instances: 150 (50 in each of three classes)\\n    :Number of Attributes: 4 numeric, predictive attributes and the class\\n    :Attribute Information:\\n        - sepal length in cm\\n        - sepal width in cm\\n        - petal length in cm\\n        - petal width in cm\\n        - class:\\n                - Iris-Setosa\\n                - Iris-Versicolour\\n                - Iris-Virginica\\n                \\n    :Summary Statistics:\\n\\n    ============== ==== ==== ======= ===== ====================\\n                    Min  Max   Mean    SD   Class Correlation\\n    ============== ==== ==== ======= ===== ====================\\n    sepal length:   4.3  7.9   5.84   0.83    0.7826\\n    sepal width:    2.0  4.4   3.05   0.43   -0.4194\\n    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)\\n    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)\\n    ============== ==== ==== ======= ===== ====================\\n\\n    :Missing Attribute Values: None\\n    :Class Distribution: 33.3% for each of 3 classes.\\n    :Creator: R.A. Fisher\\n    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\\n    :Date: July, 1988\\n\\nThe famous Iris database, first used by Sir R.A. Fisher. The dataset is taken\\nfrom Fisher\\'s paper. Note that it\\'s the same as in R, but not as in the UCI\\nMachine Learning Repository, which has two wrong data points.\\n\\nThis is perhaps the best known database to be found in the\\npattern recognition literature.  Fisher\\'s paper is a classic in the field and\\nis referenced frequently to this day.  (See Duda & Hart, for example.)  The\\ndata set contains 3 classes of 50 instances each, where each class refers to a\\ntype of iris plant.  One class is linearly separable from the other 2; the\\nlatter are NOT linearly separable from each other.\\n\\n.. topic:: References\\n\\n   - Fisher, R.A. \"The use of multiple measurements in taxonomic problems\"\\n     Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\\n     Mathematical Statistics\" (John Wiley, NY, 1950).\\n   - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\\n     (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.\\n   - Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\\n     Structure and Classification Rule for Recognition in Partially Exposed\\n     Environments\".  IEEE Transactions on Pattern Analysis and Machine\\n     Intelligence, Vol. PAMI-2, No. 1, 67-71.\\n   - Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\".  IEEE Transactions\\n     on Information Theory, May 1972, 431-433.\\n   - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al\"s AUTOCLASS II\\n     conceptual clustering system finds 3 classes in the data.\\n   - Many, many more ...',\n",
+       " 'feature_names': ['sepal length (cm)',\n",
+       "  'sepal width (cm)',\n",
+       "  'petal length (cm)',\n",
+       "  'petal width (cm)'],\n",
+       " 'filename': 'iris.csv',\n",
+       " 'data_module': 'sklearn.datasets.data'}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "iris"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "606f7821",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = df.iloc[:,:-1]\n",
+    "y = df.iloc[:,-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "4f7dade3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "85c8563e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from sklearn.svm import SVC\n",
+    "from sklearn.neighbors import KNeighborsClassifier\n",
+    "from sklearn.naive_bayes import GaussianNB"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "da7a7529",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica'\n",
+      " 'Iris-setosa' 'Iris-versicolor' 'Iris-setosa' 'Iris-versicolor'\n",
+      " 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-versicolor'\n",
+      " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa'\n",
+      " 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n",
+      " 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n",
+      " 'Iris-virginica' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'\n",
+      " 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'\n",
+      " 'Iris-setosa' 'Iris-virginica' 'Iris-virginica' 'Iris-versicolor'\n",
+      " 'Iris-setosa' 'Iris-versicolor']\n",
+      "[[13  0  0]\n",
+      " [ 0 16  0]\n",
+      " [ 0  1  8]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "dtree = DecisionTreeClassifier(max_depth = 2).fit(X_train, y_train)\n",
+    "dtree_pred = dtree.predict(X_test)\n",
+    "#confusionmatrix\n",
+    "cf = confusion_matrix(y_test,dtree_pred)\n",
+    "print(dtree_pred)\n",
+    "print(cf)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "17682b48",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9736842105263158"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dtree.score(X_test,y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "4f4d6dc0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "svm_mod = SVC(kernel='linear',C=1).fit(X_train, y_train)\n",
+    "svm_pred= svm_mod.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "a2280e55",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "accsvm=svm_mod.score(X_test, svm_pred)\n",
+    "print(accsvm)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "798a5c23",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[13  0  0]\n",
+      " [ 0 16  0]\n",
+      " [ 0  0  9]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "svmc = confusion_matrix(y_test, svm_pred)\n",
+    "print(svmc)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "ae7a988b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "knn=KNeighborsClassifier(n_neighbors=7).fit(X_train,y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "2fa42653",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "accknn=knn.score(X_test,y_test)\n",
+    "print(accknn)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "f801cbdf",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica'\n",
+      " 'Iris-setosa' 'Iris-virginica' 'Iris-setosa' 'Iris-versicolor'\n",
+      " 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-versicolor'\n",
+      " 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa'\n",
+      " 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n",
+      " 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n",
+      " 'Iris-virginica' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'\n",
+      " 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'\n",
+      " 'Iris-setosa' 'Iris-virginica' 'Iris-virginica' 'Iris-versicolor'\n",
+      " 'Iris-setosa' 'Iris-versicolor']\n",
+      "[[13  0  0]\n",
+      " [ 0 16  0]\n",
+      " [ 0  0  9]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "knn_pred = knn.predict(X_test)\n",
+    "cm = confusion_matrix(y_test, knn_pred)\n",
+    "print(knn_pred)\n",
+    "print(cm)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "17a6e503",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "114     Iris-virginica\n",
+       "62     Iris-versicolor\n",
+       "33         Iris-setosa\n",
+       "107     Iris-virginica\n",
+       "7          Iris-setosa\n",
+       "100     Iris-virginica\n",
+       "40         Iris-setosa\n",
+       "86     Iris-versicolor\n",
+       "76     Iris-versicolor\n",
+       "71     Iris-versicolor\n",
+       "134     Iris-virginica\n",
+       "51     Iris-versicolor\n",
+       "73     Iris-versicolor\n",
+       "54     Iris-versicolor\n",
+       "63     Iris-versicolor\n",
+       "37         Iris-setosa\n",
+       "78     Iris-versicolor\n",
+       "90     Iris-versicolor\n",
+       "45         Iris-setosa\n",
+       "16         Iris-setosa\n",
+       "121     Iris-virginica\n",
+       "66     Iris-versicolor\n",
+       "24         Iris-setosa\n",
+       "8          Iris-setosa\n",
+       "126     Iris-virginica\n",
+       "22         Iris-setosa\n",
+       "44         Iris-setosa\n",
+       "97     Iris-versicolor\n",
+       "93     Iris-versicolor\n",
+       "26         Iris-setosa\n",
+       "137     Iris-virginica\n",
+       "84     Iris-versicolor\n",
+       "27         Iris-setosa\n",
+       "127     Iris-virginica\n",
+       "132     Iris-virginica\n",
+       "59     Iris-versicolor\n",
+       "18         Iris-setosa\n",
+       "83     Iris-versicolor\n",
+       "Name: Species, dtype: object"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y_test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "72e4ee91",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/Multi-output_Regression.ipynb
+++ b/Multi-output_Regression.ipynb
@@ -0,0 +1,345 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "05e4b4c5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ef0e3a1e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Results</th>\n",
+       "      <th>Reach</th>\n",
+       "      <th>Impressions</th>\n",
+       "      <th>Video_plays</th>\n",
+       "      <th>Link_clicks</th>\n",
+       "      <th>Engagement</th>\n",
+       "      <th>Amount_Spent</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1534</td>\n",
+       "      <td>1534</td>\n",
+       "      <td>1535</td>\n",
+       "      <td>1448</td>\n",
+       "      <td>0</td>\n",
+       "      <td>62</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>8593</td>\n",
+       "      <td>8593</td>\n",
+       "      <td>10599</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>20</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>140</td>\n",
+       "      <td>571</td>\n",
+       "      <td>572</td>\n",
+       "      <td>457</td>\n",
+       "      <td>0</td>\n",
+       "      <td>140</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>13</td>\n",
+       "      <td>1396</td>\n",
+       "      <td>1479</td>\n",
+       "      <td>0</td>\n",
+       "      <td>13</td>\n",
+       "      <td>47</td>\n",
+       "      <td>23</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>37616</td>\n",
+       "      <td>5092</td>\n",
+       "      <td>40135</td>\n",
+       "      <td>39568</td>\n",
+       "      <td>0</td>\n",
+       "      <td>35711</td>\n",
+       "      <td>133</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Results  Reach  Impressions  Video_plays  Link_clicks  Engagement  \\\n",
+       "0     1534   1534         1535         1448            0          62   \n",
+       "1     8593   8593        10599            0            2           2   \n",
+       "2      140    571          572          457            0         140   \n",
+       "3       13   1396         1479            0           13          47   \n",
+       "4    37616   5092        40135        39568            0       35711   \n",
+       "\n",
+       "   Amount_Spent  \n",
+       "0             2  \n",
+       "1            20  \n",
+       "2             5  \n",
+       "3            23  \n",
+       "4           133  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = {'Results': [1534,8593,140,13,37616,1060,694,64,17744],\n",
+    "        'Reach': [1534,8593,571,1396,5092,6933,2008,2825,6154],\n",
+    "        'Impressions': [1535,10599,572,1479,40135,11468,2435,5087,21332],\n",
+    "        'Video_plays': [1448,0,457,0,39568,0,1225,0,20905],\n",
+    "       'Link_clicks': [0,2,0,13,0,100,1,49,0],\n",
+    "       'Engagement': [62,2,140,47,35711,1060,694,145,15604],\n",
+    "       'Amount_Spent': [2,20,5,23,133,89,37,85,76]}\n",
+    "\n",
+    "df = pd.DataFrame(data)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "b7bba8a6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.multioutput import MultiOutputRegressor\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.metrics import mean_squared_error, r2_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "764ebe28",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# X contains the features (Amount spent)\n",
+    "X = df['Amount_Spent']\n",
+    "\n",
+    "# y contains the target variable (Results, Reach, Impressions, Video Plays, Link clicks, and Post engagement)\n",
+    "y = df[['Results', 'Reach', 'Impressions', 'Video_plays', 'Link_clicks', 'Engagement']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "fa4f048c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "MultiOutputRegressor(estimator=LinearRegression())"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Split the data into training and testing sets\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
+    "\n",
+    "# Create a Linear Regression model and wrap it in MultiOutputRegressor\n",
+    "model = MultiOutputRegressor(LinearRegression())\n",
+    "\n",
+    "# Train the model with the training data\n",
+    "model.fit(X_train.values.reshape(-1, 1), y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "53fda5c4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Predict the columns using the test data\n",
+    "y_pred = model.predict(X_test.values.reshape(-1, 1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "2fcd8845",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Mean Squared Error: 100374744.3348547\n",
+      "R-squared: -3679.080219755561\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Evaluate the model's performance using metrics like Mean Squared Error (MSE) and R-squared (R2)\n",
+    "mse = mean_squared_error(y_test, y_pred)\n",
+    "r2 = r2_score(y_test, y_pred)\n",
+    "\n",
+    "print(\"Mean Squared Error:\", mse)\n",
+    "print(\"R-squared:\", r2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "7c2584f6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicted values: [ 726.14777024 1945.43688854 2292.37143629 1016.27642398    8.87683812\n",
+      "  240.56563231]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Predict the columns for a new value of 'Amount spent'\n",
+    "X_new = [[20]]\n",
+    "predicted_values = model.predict([[20]])\n",
+    "print(\"Predicted values:\", predicted_values[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "545f8034",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Results</th>\n",
+       "      <th>Reach</th>\n",
+       "      <th>Impressions</th>\n",
+       "      <th>Video_plays</th>\n",
+       "      <th>Link_clicks</th>\n",
+       "      <th>Engagement</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>726.14777</td>\n",
+       "      <td>1945.436889</td>\n",
+       "      <td>2292.371436</td>\n",
+       "      <td>1016.276424</td>\n",
+       "      <td>8.876838</td>\n",
+       "      <td>240.565632</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     Results        Reach  Impressions  Video_plays  Link_clicks  Engagement\n",
+       "0  726.14777  1945.436889  2292.371436  1016.276424     8.876838  240.565632"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Convert the predicted values into a new DataFrame\n",
+    "predicted_df = pd.DataFrame(data=predicted_values, columns=y.columns)\n",
+    "predicted_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf9c18ec",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/Numeral_Regression.ipynb
+++ b/Numeral_Regression.ipynb
@@ -0,0 +1,292 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "1732817d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "2c67c4f6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Results</th>\n",
+       "      <th>Reach</th>\n",
+       "      <th>Impressions</th>\n",
+       "      <th>Video_plays</th>\n",
+       "      <th>Link_clicks</th>\n",
+       "      <th>Engagement</th>\n",
+       "      <th>Amount_Spent</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1534</td>\n",
+       "      <td>1534</td>\n",
+       "      <td>1535</td>\n",
+       "      <td>1448</td>\n",
+       "      <td>0</td>\n",
+       "      <td>62</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>8593</td>\n",
+       "      <td>8593</td>\n",
+       "      <td>10599</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>20</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>140</td>\n",
+       "      <td>571</td>\n",
+       "      <td>572</td>\n",
+       "      <td>457</td>\n",
+       "      <td>0</td>\n",
+       "      <td>140</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>13</td>\n",
+       "      <td>1396</td>\n",
+       "      <td>1479</td>\n",
+       "      <td>0</td>\n",
+       "      <td>13</td>\n",
+       "      <td>47</td>\n",
+       "      <td>23</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>37616</td>\n",
+       "      <td>5092</td>\n",
+       "      <td>40135</td>\n",
+       "      <td>39568</td>\n",
+       "      <td>0</td>\n",
+       "      <td>35711</td>\n",
+       "      <td>133</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Results  Reach  Impressions  Video_plays  Link_clicks  Engagement  \\\n",
+       "0     1534   1534         1535         1448            0          62   \n",
+       "1     8593   8593        10599            0            2           2   \n",
+       "2      140    571          572          457            0         140   \n",
+       "3       13   1396         1479            0           13          47   \n",
+       "4    37616   5092        40135        39568            0       35711   \n",
+       "\n",
+       "   Amount_Spent  \n",
+       "0             2  \n",
+       "1            20  \n",
+       "2             5  \n",
+       "3            23  \n",
+       "4           133  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = {'Results': [1534,8593,140,13,37616,1060,694,64,17744],\n",
+    "        'Reach': [1534,8593,571,1396,5092,6933,2008,2825,6154],\n",
+    "        'Impressions': [1535,10599,572,1479,40135,11468,2435,5087,21332],\n",
+    "        'Video_plays': [1448,0,457,0,39568,0,1225,0,20905],\n",
+    "       'Link_clicks': [0,2,0,13,0,100,1,49,0],\n",
+    "       'Engagement': [62,2,140,47,35711,1060,694,145,15604],\n",
+    "       'Amount_Spent': [2,20,5,23,133,89,37,85,76]}\n",
+    "\n",
+    "df = pd.DataFrame(data)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "096de0cb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.metrics import mean_squared_error, r2_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "00517f34",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# X contains the features (Results, Reach, Impressions, Video Plays, Link clicks, and Post engagement)\n",
+    "X = df[['Results', 'Reach', 'Impressions', 'Video_plays', 'Link_clicks', 'Engagement']]\n",
+    "\n",
+    "# y contains the target variable (Amount spent)\n",
+    "y = df['Amount_Spent']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "a56a0001",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Split dataset\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "e54736e3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression()"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Linear Regression model\n",
+    "model = LinearRegression()\n",
+    "model.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "2b5aa068",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Predict model using X_test\n",
+    "y_pred = model.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "f9eb2f9e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Mean Squared Error: 10546.18825415638\n",
+      "R-squared: -8.984556927011957\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Evaluate model performance\n",
+    "mse = mean_squared_error(y_test, y_pred)\n",
+    "r2 = r2_score(y_test, y_pred)\n",
+    "\n",
+    "print(\"Mean Squared Error:\", mse)\n",
+    "print(\"R-squared:\", r2)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "0430cf6c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicted amount spent: -34.41443487262584\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\sang.yogi\\Anaconda3\\lib\\site-packages\\sklearn\\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Predict the amount spent for new data\n",
+    "X_new = [[100, 2000, 5000, 1000, 50, 150]]\n",
+    "predicted_amount_spent = model.predict(X_new)\n",
+    "print(\"Predicted amount spent:\", predicted_amount_spent[0])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d17c4a06",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}