diff --git a/Confidence_Interval.ipynb b/Confidence_Interval.ipynb
new file mode 100644
index 0000000..f2044f1
--- /dev/null
+++ b/Confidence_Interval.ipynb
@@ -0,0 +1,192 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "410cdd47",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "f769b682",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.01390952774409444"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# T-Multiplier\n",
+ "tstar = 1.96\n",
+ "# P hat value\n",
+ "p = .85\n",
+ "# Number of observations\n",
+ "n = 659\n",
+ "\n",
+ "# Calculate Standard Error\n",
+ "se = np.sqrt((p * (1 - p))/n)\n",
+ "se"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "d77c95f1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(0.8227373256215749, 0.8772626743784251)"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Lower confidence band\n",
+ "lcb = p - tstar * se\n",
+ "# Upper confidence band\n",
+ "ucb = p + tstar * se\n",
+ "# Show confidence bands\n",
+ "(lcb, ucb)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "1d08b43b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Same process, using statsmodels library\n",
+ "import statsmodels.api as sm"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "41cb97c9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(0.8227378265796143, 0.8772621734203857)"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Get confidence bands\n",
+ "# n = observations\n",
+ "# p = result of a survey \n",
+ "sm.stats.proportion_confint(n * p, n)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4234b441",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Try to import dataset\n",
+ "import pandas as pd\n",
+ "\n",
+ "df = pd.read_csv(\"Cartwheeldata.csv\")\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d03c3d4f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Mean of a column\n",
+ "mean = df[\"CWDistance\"].mean()\n",
+ "# Standard deviation of a column\n",
+ "sd = df[\"CWDistance\"].std()\n",
+ "# Rows of the dataframe\n",
+ "n = len(df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c52dddd2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tstar = 2.064\n",
+ "\n",
+ "se = sd/np.sqrt(n)\n",
+ "\n",
+ "se"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2dfbab7d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lcb = mean - tstar * se\n",
+ "ucb = mean + tstar * se\n",
+ "(lcb, ucb)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "649c18b1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#..OR use statsmodels instead\n",
+ "sm.stats.DescrStatsW(df[\"#ColumnName\"]).zconfint_mean()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Label Prediction (Binary Example).ipynb b/Label Prediction (Binary Example).ipynb
new file mode 100644
index 0000000..8cd9edc
--- /dev/null
+++ b/Label Prediction (Binary Example).ipynb
@@ -0,0 +1,814 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "ee34a7c4",
+ "metadata": {},
+ "source": [
+ "## Import Libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "1a23a10f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import os\n",
+ "import sklearn\n",
+ "from sklearn.model_selection import train_test_split"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3333920d",
+ "metadata": {},
+ "source": [
+ "## Dataset\n",
+ "For our dataset, you can find it [here.](https://www.kaggle.com/datasets/elakiricoder/gender-classification-dataset)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "5aea2295",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " long_hair | \n",
+ " forehead_width_cm | \n",
+ " forehead_height_cm | \n",
+ " nose_wide | \n",
+ " nose_long | \n",
+ " lips_thin | \n",
+ " distance_nose_to_lip_long | \n",
+ " gender | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 11.8 | \n",
+ " 6.1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0 | \n",
+ " 14.0 | \n",
+ " 5.4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0 | \n",
+ " 11.8 | \n",
+ " 6.3 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 0 | \n",
+ " 14.4 | \n",
+ " 6.1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1 | \n",
+ " 13.5 | \n",
+ " 5.9 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n",
+ "0 1 11.8 6.1 1 0 \n",
+ "1 0 14.0 5.4 0 0 \n",
+ "2 0 11.8 6.3 1 1 \n",
+ "3 0 14.4 6.1 0 1 \n",
+ "4 1 13.5 5.9 0 0 \n",
+ "\n",
+ " lips_thin distance_nose_to_lip_long gender \n",
+ "0 1 1 Male \n",
+ "1 1 0 Female \n",
+ "2 1 1 Male \n",
+ "3 1 1 Male \n",
+ "4 0 0 Female "
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load dataset\n",
+ "df = pd.read_csv(r'D:\\archive\\gender_classification_v7.csv', encoding='utf-8')\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "58b8ed5e",
+ "metadata": {},
+ "source": [
+ "## Data Pre-processing\n",
+ "For this example I skipped the Descriptive Statistics, and went to minor adjustments."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "d93ff56d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "long_hair int64\n",
+ "forehead_width_cm float64\n",
+ "forehead_height_cm float64\n",
+ "nose_wide int64\n",
+ "nose_long int64\n",
+ "lips_thin int64\n",
+ "distance_nose_to_lip_long int64\n",
+ "gender object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Check Data types of dataframe columns\n",
+ "df.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "19ae1cf5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " long_hair | \n",
+ " forehead_width_cm | \n",
+ " forehead_height_cm | \n",
+ " nose_wide | \n",
+ " nose_long | \n",
+ " lips_thin | \n",
+ " distance_nose_to_lip_long | \n",
+ " gender | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 11.8 | \n",
+ " 6.1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0 | \n",
+ " 14.0 | \n",
+ " 5.4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0 | \n",
+ " 11.8 | \n",
+ " 6.3 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 0 | \n",
+ " 14.4 | \n",
+ " 6.1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1 | \n",
+ " 13.5 | \n",
+ " 5.9 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n",
+ "0 1 11.8 6.1 1 0 \n",
+ "1 0 14.0 5.4 0 0 \n",
+ "2 0 11.8 6.3 1 1 \n",
+ "3 0 14.4 6.1 0 1 \n",
+ "4 1 13.5 5.9 0 0 \n",
+ "\n",
+ " lips_thin distance_nose_to_lip_long gender \n",
+ "0 1 1 0 \n",
+ "1 1 0 1 \n",
+ "2 1 1 0 \n",
+ "3 1 1 0 \n",
+ "4 0 0 1 "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Convert Gender labels into integer values, for classification\n",
+ "df['gender']=df['gender'].replace('Male',0)\n",
+ "df['gender']=df['gender'].replace('Female',1)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "b573f11e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "long_hair int64\n",
+ "forehead_width_cm float64\n",
+ "forehead_height_cm float64\n",
+ "nose_wide int64\n",
+ "nose_long int64\n",
+ "lips_thin int64\n",
+ "distance_nose_to_lip_long int64\n",
+ "gender int64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Now all is numeric data\n",
+ "df.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "35388ca3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Split dataset into X (Features) and y (Labels)\n",
+ "\n",
+ "# X is ALL columns except the last column (usually the label to be predicted)\n",
+ "X = df.iloc[:,:-1]\n",
+ "# y is the LABEL column (to be predicted)\n",
+ "y = df.iloc[:,-1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "14c3347e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Use sklearn's train_test_split function imported before\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fe832e3f",
+ "metadata": {},
+ "source": [
+ "## Using 4 Classifiers\n",
+ "It is sugggested to take a deeper look of the parameters provided in documentations below, for better tweaking of the classifiers.\n",
+ "- [Logistic Regression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html)\n",
+ "- [Decision Tree](https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html)\n",
+ "- [SVM](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html)\n",
+ "- [K-Means / KNN](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "f83a2e5c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.tree import DecisionTreeClassifier\n",
+ "from sklearn.svm import SVC\n",
+ "from sklearn.neighbors import KNeighborsClassifier"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "dc4c2062",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Logistic Regression Accuracy: 0.9682063587282543\n",
+ "Decision Tree Accuracy: 0.8792241551689662\n",
+ "SVM Accuracy: 0.967006598680264\n",
+ "K-Means Accuracy: 0.9754049190161967\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Logistic Regression\n",
+ "# Train the model\n",
+ "LogR = LogisticRegression(random_state=0, multi_class='ovr').fit(X_train, y_train)\n",
+ "# Predict the test set\n",
+ "LogR_pred = LogR.predict(X_test)\n",
+ "\n",
+ "# Decision Tree\n",
+ "dtree = DecisionTreeClassifier(max_depth = 2).fit(X_train, y_train)\n",
+ "dtree_pred = dtree.predict(X_test)\n",
+ "\n",
+ "# SVM\n",
+ "svm = SVC(kernel='linear',C=1).fit(X_train, y_train)\n",
+ "svm_pred = svm.predict(X_test)\n",
+ "\n",
+ "# K-Means\n",
+ "knn = KNeighborsClassifier(n_neighbors=5).fit(X_train,y_train)\n",
+ "knn_pred = knn.predict(X_test)\n",
+ "\n",
+ "# See Accuracy of each classifier\n",
+ "print(\"Logistic Regression Accuracy: \"+ str(LogR.score(X,y)))\n",
+ "print(\"Decision Tree Accuracy: \"+ str(dtree.score(X,y)))\n",
+ "print(\"SVM Accuracy: \"+ str(svm.score(X,y)))\n",
+ "print(\"K-Means Accuracy: \"+ str(knn.score(X,y)))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "00f72b96",
+ "metadata": {},
+ "source": [
+ "## Try on a new dataset\n",
+ "Use one (or many) model above as predictor in a new dataset. Assuming we have the same columns but different values, we get.."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "9c24db9a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " long_hair | \n",
+ " forehead_width_cm | \n",
+ " forehead_height_cm | \n",
+ " nose_wide | \n",
+ " nose_long | \n",
+ " lips_thin | \n",
+ " distance_nose_to_lip_long | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 14.5 | \n",
+ " 6.7 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " 14.0 | \n",
+ " 5.9 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1 | \n",
+ " 12.9 | \n",
+ " 6.4 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n",
+ "0 1 14.5 6.7 0 1 \n",
+ "1 1 14.0 5.9 0 0 \n",
+ "2 1 12.9 6.4 1 0 \n",
+ "\n",
+ " lips_thin distance_nose_to_lip_long \n",
+ "0 1 1 \n",
+ "1 0 0 \n",
+ "2 0 1 "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# For this example we use 3 rows of data to be predicted\n",
+ "dval = pd.read_csv(r'D:\\archive\\valgend.csv', encoding='utf-8')\n",
+ "dval.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "ad501b6a",
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [],
+ "source": [
+ "# predict with knn (change to which model you choose)\n",
+ "knn_pred_new = knn.predict(dval)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "8896ab72",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([0, 1, 0], dtype=int64)"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# See the result\n",
+ "knn_pred_new"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "7fa9db00",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add new column in new dataframe for placing the results, pass the \"result\" from before\n",
+ "dval[\"pred_gender\"]=knn_pred_new"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "6155a519",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " long_hair | \n",
+ " forehead_width_cm | \n",
+ " forehead_height_cm | \n",
+ " nose_wide | \n",
+ " nose_long | \n",
+ " lips_thin | \n",
+ " distance_nose_to_lip_long | \n",
+ " pred_gender | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 14.5 | \n",
+ " 6.7 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " 14.0 | \n",
+ " 5.9 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1 | \n",
+ " 12.9 | \n",
+ " 6.4 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n",
+ "0 1 14.5 6.7 0 1 \n",
+ "1 1 14.0 5.9 0 0 \n",
+ "2 1 12.9 6.4 1 0 \n",
+ "\n",
+ " lips_thin distance_nose_to_lip_long pred_gender \n",
+ "0 1 1 0 \n",
+ "1 0 0 1 \n",
+ "2 0 1 0 "
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# See data with appended prediction (last column)\n",
+ "dval.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "c2587a57",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " long_hair | \n",
+ " forehead_width_cm | \n",
+ " forehead_height_cm | \n",
+ " nose_wide | \n",
+ " nose_long | \n",
+ " lips_thin | \n",
+ " distance_nose_to_lip_long | \n",
+ " pred_gender | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 14.5 | \n",
+ " 6.7 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " 14.0 | \n",
+ " 5.9 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1 | \n",
+ " 12.9 | \n",
+ " 6.4 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n",
+ "0 1 14.5 6.7 0 1 \n",
+ "1 1 14.0 5.9 0 0 \n",
+ "2 1 12.9 6.4 1 0 \n",
+ "\n",
+ " lips_thin distance_nose_to_lip_long pred_gender \n",
+ "0 1 1 Male \n",
+ "1 0 0 Female \n",
+ "2 0 1 Male "
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Converting back to labels\n",
+ "dval['pred_gender']=dval['pred_gender'].replace(0,'Male')\n",
+ "dval['pred_gender']=dval['pred_gender'].replace(1,'Female')\n",
+ "dval.head()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Multi-label pred.ipynb b/Multi-label pred.ipynb
new file mode 100644
index 0000000..5b32f73
--- /dev/null
+++ b/Multi-label pred.ipynb
@@ -0,0 +1,794 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "5c3d106c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt \n",
+ "import seaborn as sns \n",
+ "import plotly as py\n",
+ "import os"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "7e7ad082",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " SepalLengthCm | \n",
+ " SepalWidthCm | \n",
+ " PetalLengthCm | \n",
+ " PetalWidthCm | \n",
+ " Species | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 5.1 | \n",
+ " 3.5 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " Iris-setosa | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 4.9 | \n",
+ " 3.0 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " Iris-setosa | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 4.7 | \n",
+ " 3.2 | \n",
+ " 1.3 | \n",
+ " 0.2 | \n",
+ " Iris-setosa | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 4.6 | \n",
+ " 3.1 | \n",
+ " 1.5 | \n",
+ " 0.2 | \n",
+ " Iris-setosa | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 5.0 | \n",
+ " 3.6 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " Iris-setosa | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species\n",
+ "0 1 5.1 3.5 1.4 0.2 Iris-setosa\n",
+ "1 2 4.9 3.0 1.4 0.2 Iris-setosa\n",
+ "2 3 4.7 3.2 1.3 0.2 Iris-setosa\n",
+ "3 4 4.6 3.1 1.5 0.2 Iris-setosa\n",
+ "4 5 5.0 3.6 1.4 0.2 Iris-setosa"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.read_csv(r'D:\\archive\\iris.csv', encoding='utf-8')\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "a85eca81",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " SepalLengthCm | \n",
+ " SepalWidthCm | \n",
+ " PetalLengthCm | \n",
+ " PetalWidthCm | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | count | \n",
+ " 150.000000 | \n",
+ " 150.000000 | \n",
+ " 150.000000 | \n",
+ " 150.000000 | \n",
+ " 150.000000 | \n",
+ "
\n",
+ " \n",
+ " | mean | \n",
+ " 75.500000 | \n",
+ " 5.843333 | \n",
+ " 3.054000 | \n",
+ " 3.758667 | \n",
+ " 1.198667 | \n",
+ "
\n",
+ " \n",
+ " | std | \n",
+ " 43.445368 | \n",
+ " 0.828066 | \n",
+ " 0.433594 | \n",
+ " 1.764420 | \n",
+ " 0.763161 | \n",
+ "
\n",
+ " \n",
+ " | min | \n",
+ " 1.000000 | \n",
+ " 4.300000 | \n",
+ " 2.000000 | \n",
+ " 1.000000 | \n",
+ " 0.100000 | \n",
+ "
\n",
+ " \n",
+ " | 25% | \n",
+ " 38.250000 | \n",
+ " 5.100000 | \n",
+ " 2.800000 | \n",
+ " 1.600000 | \n",
+ " 0.300000 | \n",
+ "
\n",
+ " \n",
+ " | 50% | \n",
+ " 75.500000 | \n",
+ " 5.800000 | \n",
+ " 3.000000 | \n",
+ " 4.350000 | \n",
+ " 1.300000 | \n",
+ "
\n",
+ " \n",
+ " | 75% | \n",
+ " 112.750000 | \n",
+ " 6.400000 | \n",
+ " 3.300000 | \n",
+ " 5.100000 | \n",
+ " 1.800000 | \n",
+ "
\n",
+ " \n",
+ " | max | \n",
+ " 150.000000 | \n",
+ " 7.900000 | \n",
+ " 4.400000 | \n",
+ " 6.900000 | \n",
+ " 2.500000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm\n",
+ "count 150.000000 150.000000 150.000000 150.000000 150.000000\n",
+ "mean 75.500000 5.843333 3.054000 3.758667 1.198667\n",
+ "std 43.445368 0.828066 0.433594 1.764420 0.763161\n",
+ "min 1.000000 4.300000 2.000000 1.000000 0.100000\n",
+ "25% 38.250000 5.100000 2.800000 1.600000 0.300000\n",
+ "50% 75.500000 5.800000 3.000000 4.350000 1.300000\n",
+ "75% 112.750000 6.400000 3.300000 5.100000 1.800000\n",
+ "max 150.000000 7.900000 4.400000 6.900000 2.500000"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "fd80a4a8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Id int64\n",
+ "SepalLengthCm float64\n",
+ "SepalWidthCm float64\n",
+ "PetalLengthCm float64\n",
+ "PetalWidthCm float64\n",
+ "Species object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "cc10d9c3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sklearn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "d07459e2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.metrics import confusion_matrix\n",
+ "from sklearn.model_selection import train_test_split"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "f917c7bd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn import datasets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "9d3e54c7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "iris = datasets.load_iris()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "1c34bd6d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'data': array([[5.1, 3.5, 1.4, 0.2],\n",
+ " [4.9, 3. , 1.4, 0.2],\n",
+ " [4.7, 3.2, 1.3, 0.2],\n",
+ " [4.6, 3.1, 1.5, 0.2],\n",
+ " [5. , 3.6, 1.4, 0.2],\n",
+ " [5.4, 3.9, 1.7, 0.4],\n",
+ " [4.6, 3.4, 1.4, 0.3],\n",
+ " [5. , 3.4, 1.5, 0.2],\n",
+ " [4.4, 2.9, 1.4, 0.2],\n",
+ " [4.9, 3.1, 1.5, 0.1],\n",
+ " [5.4, 3.7, 1.5, 0.2],\n",
+ " [4.8, 3.4, 1.6, 0.2],\n",
+ " [4.8, 3. , 1.4, 0.1],\n",
+ " [4.3, 3. , 1.1, 0.1],\n",
+ " [5.8, 4. , 1.2, 0.2],\n",
+ " [5.7, 4.4, 1.5, 0.4],\n",
+ " [5.4, 3.9, 1.3, 0.4],\n",
+ " [5.1, 3.5, 1.4, 0.3],\n",
+ " [5.7, 3.8, 1.7, 0.3],\n",
+ " [5.1, 3.8, 1.5, 0.3],\n",
+ " [5.4, 3.4, 1.7, 0.2],\n",
+ " [5.1, 3.7, 1.5, 0.4],\n",
+ " [4.6, 3.6, 1. , 0.2],\n",
+ " [5.1, 3.3, 1.7, 0.5],\n",
+ " [4.8, 3.4, 1.9, 0.2],\n",
+ " [5. , 3. , 1.6, 0.2],\n",
+ " [5. , 3.4, 1.6, 0.4],\n",
+ " [5.2, 3.5, 1.5, 0.2],\n",
+ " [5.2, 3.4, 1.4, 0.2],\n",
+ " [4.7, 3.2, 1.6, 0.2],\n",
+ " [4.8, 3.1, 1.6, 0.2],\n",
+ " [5.4, 3.4, 1.5, 0.4],\n",
+ " [5.2, 4.1, 1.5, 0.1],\n",
+ " [5.5, 4.2, 1.4, 0.2],\n",
+ " [4.9, 3.1, 1.5, 0.2],\n",
+ " [5. , 3.2, 1.2, 0.2],\n",
+ " [5.5, 3.5, 1.3, 0.2],\n",
+ " [4.9, 3.6, 1.4, 0.1],\n",
+ " [4.4, 3. , 1.3, 0.2],\n",
+ " [5.1, 3.4, 1.5, 0.2],\n",
+ " [5. , 3.5, 1.3, 0.3],\n",
+ " [4.5, 2.3, 1.3, 0.3],\n",
+ " [4.4, 3.2, 1.3, 0.2],\n",
+ " [5. , 3.5, 1.6, 0.6],\n",
+ " [5.1, 3.8, 1.9, 0.4],\n",
+ " [4.8, 3. , 1.4, 0.3],\n",
+ " [5.1, 3.8, 1.6, 0.2],\n",
+ " [4.6, 3.2, 1.4, 0.2],\n",
+ " [5.3, 3.7, 1.5, 0.2],\n",
+ " [5. , 3.3, 1.4, 0.2],\n",
+ " [7. , 3.2, 4.7, 1.4],\n",
+ " [6.4, 3.2, 4.5, 1.5],\n",
+ " [6.9, 3.1, 4.9, 1.5],\n",
+ " [5.5, 2.3, 4. , 1.3],\n",
+ " [6.5, 2.8, 4.6, 1.5],\n",
+ " [5.7, 2.8, 4.5, 1.3],\n",
+ " [6.3, 3.3, 4.7, 1.6],\n",
+ " [4.9, 2.4, 3.3, 1. ],\n",
+ " [6.6, 2.9, 4.6, 1.3],\n",
+ " [5.2, 2.7, 3.9, 1.4],\n",
+ " [5. , 2. , 3.5, 1. ],\n",
+ " [5.9, 3. , 4.2, 1.5],\n",
+ " [6. , 2.2, 4. , 1. ],\n",
+ " [6.1, 2.9, 4.7, 1.4],\n",
+ " [5.6, 2.9, 3.6, 1.3],\n",
+ " [6.7, 3.1, 4.4, 1.4],\n",
+ " [5.6, 3. , 4.5, 1.5],\n",
+ " [5.8, 2.7, 4.1, 1. ],\n",
+ " [6.2, 2.2, 4.5, 1.5],\n",
+ " [5.6, 2.5, 3.9, 1.1],\n",
+ " [5.9, 3.2, 4.8, 1.8],\n",
+ " [6.1, 2.8, 4. , 1.3],\n",
+ " [6.3, 2.5, 4.9, 1.5],\n",
+ " [6.1, 2.8, 4.7, 1.2],\n",
+ " [6.4, 2.9, 4.3, 1.3],\n",
+ " [6.6, 3. , 4.4, 1.4],\n",
+ " [6.8, 2.8, 4.8, 1.4],\n",
+ " [6.7, 3. , 5. , 1.7],\n",
+ " [6. , 2.9, 4.5, 1.5],\n",
+ " [5.7, 2.6, 3.5, 1. ],\n",
+ " [5.5, 2.4, 3.8, 1.1],\n",
+ " [5.5, 2.4, 3.7, 1. ],\n",
+ " [5.8, 2.7, 3.9, 1.2],\n",
+ " [6. , 2.7, 5.1, 1.6],\n",
+ " [5.4, 3. , 4.5, 1.5],\n",
+ " [6. , 3.4, 4.5, 1.6],\n",
+ " [6.7, 3.1, 4.7, 1.5],\n",
+ " [6.3, 2.3, 4.4, 1.3],\n",
+ " [5.6, 3. , 4.1, 1.3],\n",
+ " [5.5, 2.5, 4. , 1.3],\n",
+ " [5.5, 2.6, 4.4, 1.2],\n",
+ " [6.1, 3. , 4.6, 1.4],\n",
+ " [5.8, 2.6, 4. , 1.2],\n",
+ " [5. , 2.3, 3.3, 1. ],\n",
+ " [5.6, 2.7, 4.2, 1.3],\n",
+ " [5.7, 3. , 4.2, 1.2],\n",
+ " [5.7, 2.9, 4.2, 1.3],\n",
+ " [6.2, 2.9, 4.3, 1.3],\n",
+ " [5.1, 2.5, 3. , 1.1],\n",
+ " [5.7, 2.8, 4.1, 1.3],\n",
+ " [6.3, 3.3, 6. , 2.5],\n",
+ " [5.8, 2.7, 5.1, 1.9],\n",
+ " [7.1, 3. , 5.9, 2.1],\n",
+ " [6.3, 2.9, 5.6, 1.8],\n",
+ " [6.5, 3. , 5.8, 2.2],\n",
+ " [7.6, 3. , 6.6, 2.1],\n",
+ " [4.9, 2.5, 4.5, 1.7],\n",
+ " [7.3, 2.9, 6.3, 1.8],\n",
+ " [6.7, 2.5, 5.8, 1.8],\n",
+ " [7.2, 3.6, 6.1, 2.5],\n",
+ " [6.5, 3.2, 5.1, 2. ],\n",
+ " [6.4, 2.7, 5.3, 1.9],\n",
+ " [6.8, 3. , 5.5, 2.1],\n",
+ " [5.7, 2.5, 5. , 2. ],\n",
+ " [5.8, 2.8, 5.1, 2.4],\n",
+ " [6.4, 3.2, 5.3, 2.3],\n",
+ " [6.5, 3. , 5.5, 1.8],\n",
+ " [7.7, 3.8, 6.7, 2.2],\n",
+ " [7.7, 2.6, 6.9, 2.3],\n",
+ " [6. , 2.2, 5. , 1.5],\n",
+ " [6.9, 3.2, 5.7, 2.3],\n",
+ " [5.6, 2.8, 4.9, 2. ],\n",
+ " [7.7, 2.8, 6.7, 2. ],\n",
+ " [6.3, 2.7, 4.9, 1.8],\n",
+ " [6.7, 3.3, 5.7, 2.1],\n",
+ " [7.2, 3.2, 6. , 1.8],\n",
+ " [6.2, 2.8, 4.8, 1.8],\n",
+ " [6.1, 3. , 4.9, 1.8],\n",
+ " [6.4, 2.8, 5.6, 2.1],\n",
+ " [7.2, 3. , 5.8, 1.6],\n",
+ " [7.4, 2.8, 6.1, 1.9],\n",
+ " [7.9, 3.8, 6.4, 2. ],\n",
+ " [6.4, 2.8, 5.6, 2.2],\n",
+ " [6.3, 2.8, 5.1, 1.5],\n",
+ " [6.1, 2.6, 5.6, 1.4],\n",
+ " [7.7, 3. , 6.1, 2.3],\n",
+ " [6.3, 3.4, 5.6, 2.4],\n",
+ " [6.4, 3.1, 5.5, 1.8],\n",
+ " [6. , 3. , 4.8, 1.8],\n",
+ " [6.9, 3.1, 5.4, 2.1],\n",
+ " [6.7, 3.1, 5.6, 2.4],\n",
+ " [6.9, 3.1, 5.1, 2.3],\n",
+ " [5.8, 2.7, 5.1, 1.9],\n",
+ " [6.8, 3.2, 5.9, 2.3],\n",
+ " [6.7, 3.3, 5.7, 2.5],\n",
+ " [6.7, 3. , 5.2, 2.3],\n",
+ " [6.3, 2.5, 5. , 1.9],\n",
+ " [6.5, 3. , 5.2, 2. ],\n",
+ " [6.2, 3.4, 5.4, 2.3],\n",
+ " [5.9, 3. , 5.1, 1.8]]),\n",
+ " 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
+ " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
+ " 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
+ " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
+ " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]),\n",
+ " 'frame': None,\n",
+ " 'target_names': array(['setosa', 'versicolor', 'virginica'], dtype='\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Results | \n",
+ " Reach | \n",
+ " Impressions | \n",
+ " Video_plays | \n",
+ " Link_clicks | \n",
+ " Engagement | \n",
+ " Amount_Spent | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1534 | \n",
+ " 1534 | \n",
+ " 1535 | \n",
+ " 1448 | \n",
+ " 0 | \n",
+ " 62 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 8593 | \n",
+ " 8593 | \n",
+ " 10599 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 20 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 140 | \n",
+ " 571 | \n",
+ " 572 | \n",
+ " 457 | \n",
+ " 0 | \n",
+ " 140 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 13 | \n",
+ " 1396 | \n",
+ " 1479 | \n",
+ " 0 | \n",
+ " 13 | \n",
+ " 47 | \n",
+ " 23 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 37616 | \n",
+ " 5092 | \n",
+ " 40135 | \n",
+ " 39568 | \n",
+ " 0 | \n",
+ " 35711 | \n",
+ " 133 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ ""
+ ],
+ "text/plain": [
+ " Results Reach Impressions Video_plays Link_clicks Engagement \\\n",
+ "0 1534 1534 1535 1448 0 62 \n",
+ "1 8593 8593 10599 0 2 2 \n",
+ "2 140 571 572 457 0 140 \n",
+ "3 13 1396 1479 0 13 47 \n",
+ "4 37616 5092 40135 39568 0 35711 \n",
+ "\n",
+ " Amount_Spent \n",
+ "0 2 \n",
+ "1 20 \n",
+ "2 5 \n",
+ "3 23 \n",
+ "4 133 "
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data = {'Results': [1534,8593,140,13,37616,1060,694,64,17744],\n",
+ " 'Reach': [1534,8593,571,1396,5092,6933,2008,2825,6154],\n",
+ " 'Impressions': [1535,10599,572,1479,40135,11468,2435,5087,21332],\n",
+ " 'Video_plays': [1448,0,457,0,39568,0,1225,0,20905],\n",
+ " 'Link_clicks': [0,2,0,13,0,100,1,49,0],\n",
+ " 'Engagement': [62,2,140,47,35711,1060,694,145,15604],\n",
+ " 'Amount_Spent': [2,20,5,23,133,89,37,85,76]}\n",
+ "\n",
+ "df = pd.DataFrame(data)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "b7bba8a6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.multioutput import MultiOutputRegressor\n",
+ "from sklearn.linear_model import LinearRegression\n",
+ "from sklearn.metrics import mean_squared_error, r2_score"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "764ebe28",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# X contains the features (Amount spent)\n",
+ "X = df['Amount_Spent']\n",
+ "\n",
+ "# y contains the target variable (Results, Reach, Impressions, Video Plays, Link clicks, and Post engagement)\n",
+ "y = df[['Results', 'Reach', 'Impressions', 'Video_plays', 'Link_clicks', 'Engagement']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "fa4f048c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MultiOutputRegressor(estimator=LinearRegression())"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Split the data into training and testing sets\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
+ "\n",
+ "# Create a Linear Regression model and wrap it in MultiOutputRegressor\n",
+ "model = MultiOutputRegressor(LinearRegression())\n",
+ "\n",
+ "# Train the model with the training data\n",
+ "model.fit(X_train.values.reshape(-1, 1), y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "53fda5c4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Predict the columns using the test data\n",
+ "y_pred = model.predict(X_test.values.reshape(-1, 1))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "2fcd8845",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Mean Squared Error: 100374744.3348547\n",
+ "R-squared: -3679.080219755561\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Evaluate the model's performance using metrics like Mean Squared Error (MSE) and R-squared (R2)\n",
+ "mse = mean_squared_error(y_test, y_pred)\n",
+ "r2 = r2_score(y_test, y_pred)\n",
+ "\n",
+ "print(\"Mean Squared Error:\", mse)\n",
+ "print(\"R-squared:\", r2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "7c2584f6",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Predicted values: [ 726.14777024 1945.43688854 2292.37143629 1016.27642398 8.87683812\n",
+ " 240.56563231]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Predict the columns for a new value of 'Amount spent'\n",
+ "X_new = [[20]]\n",
+ "predicted_values = model.predict([[20]])\n",
+ "print(\"Predicted values:\", predicted_values[0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "545f8034",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Results | \n",
+ " Reach | \n",
+ " Impressions | \n",
+ " Video_plays | \n",
+ " Link_clicks | \n",
+ " Engagement | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 726.14777 | \n",
+ " 1945.436889 | \n",
+ " 2292.371436 | \n",
+ " 1016.276424 | \n",
+ " 8.876838 | \n",
+ " 240.565632 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Results Reach Impressions Video_plays Link_clicks Engagement\n",
+ "0 726.14777 1945.436889 2292.371436 1016.276424 8.876838 240.565632"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Convert the predicted values into a new DataFrame\n",
+ "predicted_df = pd.DataFrame(data=predicted_values, columns=y.columns)\n",
+ "predicted_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bf9c18ec",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Numeral_Regression.ipynb b/Numeral_Regression.ipynb
new file mode 100644
index 0000000..e27fdc6
--- /dev/null
+++ b/Numeral_Regression.ipynb
@@ -0,0 +1,292 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "1732817d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "2c67c4f6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Results | \n",
+ " Reach | \n",
+ " Impressions | \n",
+ " Video_plays | \n",
+ " Link_clicks | \n",
+ " Engagement | \n",
+ " Amount_Spent | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1534 | \n",
+ " 1534 | \n",
+ " 1535 | \n",
+ " 1448 | \n",
+ " 0 | \n",
+ " 62 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 8593 | \n",
+ " 8593 | \n",
+ " 10599 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 20 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 140 | \n",
+ " 571 | \n",
+ " 572 | \n",
+ " 457 | \n",
+ " 0 | \n",
+ " 140 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 13 | \n",
+ " 1396 | \n",
+ " 1479 | \n",
+ " 0 | \n",
+ " 13 | \n",
+ " 47 | \n",
+ " 23 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 37616 | \n",
+ " 5092 | \n",
+ " 40135 | \n",
+ " 39568 | \n",
+ " 0 | \n",
+ " 35711 | \n",
+ " 133 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Results Reach Impressions Video_plays Link_clicks Engagement \\\n",
+ "0 1534 1534 1535 1448 0 62 \n",
+ "1 8593 8593 10599 0 2 2 \n",
+ "2 140 571 572 457 0 140 \n",
+ "3 13 1396 1479 0 13 47 \n",
+ "4 37616 5092 40135 39568 0 35711 \n",
+ "\n",
+ " Amount_Spent \n",
+ "0 2 \n",
+ "1 20 \n",
+ "2 5 \n",
+ "3 23 \n",
+ "4 133 "
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data = {'Results': [1534,8593,140,13,37616,1060,694,64,17744],\n",
+ " 'Reach': [1534,8593,571,1396,5092,6933,2008,2825,6154],\n",
+ " 'Impressions': [1535,10599,572,1479,40135,11468,2435,5087,21332],\n",
+ " 'Video_plays': [1448,0,457,0,39568,0,1225,0,20905],\n",
+ " 'Link_clicks': [0,2,0,13,0,100,1,49,0],\n",
+ " 'Engagement': [62,2,140,47,35711,1060,694,145,15604],\n",
+ " 'Amount_Spent': [2,20,5,23,133,89,37,85,76]}\n",
+ "\n",
+ "df = pd.DataFrame(data)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "096de0cb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.linear_model import LinearRegression\n",
+ "from sklearn.metrics import mean_squared_error, r2_score"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "00517f34",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# X contains the features (Results, Reach, Impressions, Video Plays, Link clicks, and Post engagement)\n",
+ "X = df[['Results', 'Reach', 'Impressions', 'Video_plays', 'Link_clicks', 'Engagement']]\n",
+ "\n",
+ "# y contains the target variable (Amount spent)\n",
+ "y = df['Amount_Spent']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "a56a0001",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Split dataset\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "e54736e3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "LinearRegression()"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Linear Regression model\n",
+ "model = LinearRegression()\n",
+ "model.fit(X_train, y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "2b5aa068",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Predict model using X_test\n",
+ "y_pred = model.predict(X_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "f9eb2f9e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Mean Squared Error: 10546.18825415638\n",
+ "R-squared: -8.984556927011957\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Evaluate model performance\n",
+ "mse = mean_squared_error(y_test, y_pred)\n",
+ "r2 = r2_score(y_test, y_pred)\n",
+ "\n",
+ "print(\"Mean Squared Error:\", mse)\n",
+ "print(\"R-squared:\", r2)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "0430cf6c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Predicted amount spent: -34.41443487262584\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\sang.yogi\\Anaconda3\\lib\\site-packages\\sklearn\\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Predict the amount spent for new data\n",
+ "X_new = [[100, 2000, 5000, 1000, 50, 150]]\n",
+ "predicted_amount_spent = model.predict(X_new)\n",
+ "print(\"Predicted amount spent:\", predicted_amount_spent[0])\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d17c4a06",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}