diff --git a/Confidence_Interval.ipynb b/Confidence_Interval.ipynb new file mode 100644 index 0000000..f2044f1 --- /dev/null +++ b/Confidence_Interval.ipynb @@ -0,0 +1,192 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "410cdd47", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f769b682", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.01390952774409444" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# T-Multiplier\n", + "tstar = 1.96\n", + "# P hat value\n", + "p = .85\n", + "# Number of observations\n", + "n = 659\n", + "\n", + "# Calculate Standard Error\n", + "se = np.sqrt((p * (1 - p))/n)\n", + "se" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d77c95f1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.8227373256215749, 0.8772626743784251)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Lower confidence band\n", + "lcb = p - tstar * se\n", + "# Upper confidence band\n", + "ucb = p + tstar * se\n", + "# Show confidence bands\n", + "(lcb, ucb)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1d08b43b", + "metadata": {}, + "outputs": [], + "source": [ + "# Same process, using statsmodels library\n", + "import statsmodels.api as sm" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "41cb97c9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.8227378265796143, 0.8772621734203857)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get confidence bands\n", + "# n = observations\n", + "# p = result of a survey \n", + "sm.stats.proportion_confint(n * p, n)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4234b441", + "metadata": {}, + "outputs": [], + "source": [ + "# Try to import dataset\n", + "import pandas as pd\n", + "\n", + "df = pd.read_csv(\"Cartwheeldata.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d03c3d4f", + "metadata": {}, + "outputs": [], + "source": [ + "# Mean of a column\n", + "mean = df[\"CWDistance\"].mean()\n", + "# Standard deviation of a column\n", + "sd = df[\"CWDistance\"].std()\n", + "# Rows of the dataframe\n", + "n = len(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c52dddd2", + "metadata": {}, + "outputs": [], + "source": [ + "tstar = 2.064\n", + "\n", + "se = sd/np.sqrt(n)\n", + "\n", + "se" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2dfbab7d", + "metadata": {}, + "outputs": [], + "source": [ + "lcb = mean - tstar * se\n", + "ucb = mean + tstar * se\n", + "(lcb, ucb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "649c18b1", + "metadata": {}, + "outputs": [], + "source": [ + "#..OR use statsmodels instead\n", + "sm.stats.DescrStatsW(df[\"#ColumnName\"]).zconfint_mean()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Label Prediction (Binary Example).ipynb b/Label Prediction (Binary Example).ipynb new file mode 100644 index 0000000..8cd9edc --- /dev/null +++ b/Label Prediction (Binary Example).ipynb @@ -0,0 +1,814 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ee34a7c4", + "metadata": {}, + "source": [ + "## Import Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1a23a10f", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "import sklearn\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "markdown", + "id": "3333920d", + "metadata": {}, + "source": [ + "## Dataset\n", + "For our dataset, you can find it [here.](https://www.kaggle.com/datasets/elakiricoder/gender-classification-dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5aea2295", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
long_hairforehead_width_cmforehead_height_cmnose_widenose_longlips_thindistance_nose_to_lip_longgender
0111.86.11011Male
1014.05.40010Female
2011.86.31111Male
3014.46.10111Male
4113.55.90000Female
\n", + "
" + ], + "text/plain": [ + " long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n", + "0 1 11.8 6.1 1 0 \n", + "1 0 14.0 5.4 0 0 \n", + "2 0 11.8 6.3 1 1 \n", + "3 0 14.4 6.1 0 1 \n", + "4 1 13.5 5.9 0 0 \n", + "\n", + " lips_thin distance_nose_to_lip_long gender \n", + "0 1 1 Male \n", + "1 1 0 Female \n", + "2 1 1 Male \n", + "3 1 1 Male \n", + "4 0 0 Female " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load dataset\n", + "df = pd.read_csv(r'D:\\archive\\gender_classification_v7.csv', encoding='utf-8')\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "58b8ed5e", + "metadata": {}, + "source": [ + "## Data Pre-processing\n", + "For this example I skipped the Descriptive Statistics, and went to minor adjustments." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d93ff56d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "long_hair int64\n", + "forehead_width_cm float64\n", + "forehead_height_cm float64\n", + "nose_wide int64\n", + "nose_long int64\n", + "lips_thin int64\n", + "distance_nose_to_lip_long int64\n", + "gender object\n", + "dtype: object" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check Data types of dataframe columns\n", + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "19ae1cf5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
long_hairforehead_width_cmforehead_height_cmnose_widenose_longlips_thindistance_nose_to_lip_longgender
0111.86.110110
1014.05.400101
2011.86.311110
3014.46.101110
4113.55.900001
\n", + "
" + ], + "text/plain": [ + " long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n", + "0 1 11.8 6.1 1 0 \n", + "1 0 14.0 5.4 0 0 \n", + "2 0 11.8 6.3 1 1 \n", + "3 0 14.4 6.1 0 1 \n", + "4 1 13.5 5.9 0 0 \n", + "\n", + " lips_thin distance_nose_to_lip_long gender \n", + "0 1 1 0 \n", + "1 1 0 1 \n", + "2 1 1 0 \n", + "3 1 1 0 \n", + "4 0 0 1 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Convert Gender labels into integer values, for classification\n", + "df['gender']=df['gender'].replace('Male',0)\n", + "df['gender']=df['gender'].replace('Female',1)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b573f11e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "long_hair int64\n", + "forehead_width_cm float64\n", + "forehead_height_cm float64\n", + "nose_wide int64\n", + "nose_long int64\n", + "lips_thin int64\n", + "distance_nose_to_lip_long int64\n", + "gender int64\n", + "dtype: object" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Now all is numeric data\n", + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "35388ca3", + "metadata": {}, + "outputs": [], + "source": [ + "# Split dataset into X (Features) and y (Labels)\n", + "\n", + "# X is ALL columns except the last column (usually the label to be predicted)\n", + "X = df.iloc[:,:-1]\n", + "# y is the LABEL column (to be predicted)\n", + "y = df.iloc[:,-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "14c3347e", + "metadata": {}, + "outputs": [], + "source": [ + "# Use sklearn's train_test_split function imported before\n", + "X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)" + ] + }, + { + "cell_type": "markdown", + "id": "fe832e3f", + "metadata": {}, + "source": [ + "## Using 4 Classifiers\n", + "It is sugggested to take a deeper look of the parameters provided in documentations below, for better tweaking of the classifiers.\n", + "- [Logistic Regression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html)\n", + "- [Decision Tree](https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html)\n", + "- [SVM](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html)\n", + "- [K-Means / KNN](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f83a2e5c", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.svm import SVC\n", + "from sklearn.neighbors import KNeighborsClassifier" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "dc4c2062", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logistic Regression Accuracy: 0.9682063587282543\n", + "Decision Tree Accuracy: 0.8792241551689662\n", + "SVM Accuracy: 0.967006598680264\n", + "K-Means Accuracy: 0.9754049190161967\n" + ] + } + ], + "source": [ + "# Logistic Regression\n", + "# Train the model\n", + "LogR = LogisticRegression(random_state=0, multi_class='ovr').fit(X_train, y_train)\n", + "# Predict the test set\n", + "LogR_pred = LogR.predict(X_test)\n", + "\n", + "# Decision Tree\n", + "dtree = DecisionTreeClassifier(max_depth = 2).fit(X_train, y_train)\n", + "dtree_pred = dtree.predict(X_test)\n", + "\n", + "# SVM\n", + "svm = SVC(kernel='linear',C=1).fit(X_train, y_train)\n", + "svm_pred = svm.predict(X_test)\n", + "\n", + "# K-Means\n", + "knn = KNeighborsClassifier(n_neighbors=5).fit(X_train,y_train)\n", + "knn_pred = knn.predict(X_test)\n", + "\n", + "# See Accuracy of each classifier\n", + "print(\"Logistic Regression Accuracy: \"+ str(LogR.score(X,y)))\n", + "print(\"Decision Tree Accuracy: \"+ str(dtree.score(X,y)))\n", + "print(\"SVM Accuracy: \"+ str(svm.score(X,y)))\n", + "print(\"K-Means Accuracy: \"+ str(knn.score(X,y)))" + ] + }, + { + "cell_type": "markdown", + "id": "00f72b96", + "metadata": {}, + "source": [ + "## Try on a new dataset\n", + "Use one (or many) model above as predictor in a new dataset. Assuming we have the same columns but different values, we get.." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "9c24db9a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
long_hairforehead_width_cmforehead_height_cmnose_widenose_longlips_thindistance_nose_to_lip_long
0114.56.70111
1114.05.90000
2112.96.41001
\n", + "
" + ], + "text/plain": [ + " long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n", + "0 1 14.5 6.7 0 1 \n", + "1 1 14.0 5.9 0 0 \n", + "2 1 12.9 6.4 1 0 \n", + "\n", + " lips_thin distance_nose_to_lip_long \n", + "0 1 1 \n", + "1 0 0 \n", + "2 0 1 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# For this example we use 3 rows of data to be predicted\n", + "dval = pd.read_csv(r'D:\\archive\\valgend.csv', encoding='utf-8')\n", + "dval.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ad501b6a", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "# predict with knn (change to which model you choose)\n", + "knn_pred_new = knn.predict(dval)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "8896ab72", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 0], dtype=int64)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# See the result\n", + "knn_pred_new" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "7fa9db00", + "metadata": {}, + "outputs": [], + "source": [ + "# Add new column in new dataframe for placing the results, pass the \"result\" from before\n", + "dval[\"pred_gender\"]=knn_pred_new" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "6155a519", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
long_hairforehead_width_cmforehead_height_cmnose_widenose_longlips_thindistance_nose_to_lip_longpred_gender
0114.56.701110
1114.05.900001
2112.96.410010
\n", + "
" + ], + "text/plain": [ + " long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n", + "0 1 14.5 6.7 0 1 \n", + "1 1 14.0 5.9 0 0 \n", + "2 1 12.9 6.4 1 0 \n", + "\n", + " lips_thin distance_nose_to_lip_long pred_gender \n", + "0 1 1 0 \n", + "1 0 0 1 \n", + "2 0 1 0 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# See data with appended prediction (last column)\n", + "dval.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "c2587a57", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
long_hairforehead_width_cmforehead_height_cmnose_widenose_longlips_thindistance_nose_to_lip_longpred_gender
0114.56.70111Male
1114.05.90000Female
2112.96.41001Male
\n", + "
" + ], + "text/plain": [ + " long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n", + "0 1 14.5 6.7 0 1 \n", + "1 1 14.0 5.9 0 0 \n", + "2 1 12.9 6.4 1 0 \n", + "\n", + " lips_thin distance_nose_to_lip_long pred_gender \n", + "0 1 1 Male \n", + "1 0 0 Female \n", + "2 0 1 Male " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Converting back to labels\n", + "dval['pred_gender']=dval['pred_gender'].replace(0,'Male')\n", + "dval['pred_gender']=dval['pred_gender'].replace(1,'Female')\n", + "dval.head()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Multi-label pred.ipynb b/Multi-label pred.ipynb new file mode 100644 index 0000000..5b32f73 --- /dev/null +++ b/Multi-label pred.ipynb @@ -0,0 +1,794 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "5c3d106c", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt \n", + "import seaborn as sns \n", + "import plotly as py\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7e7ad082", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdSepalLengthCmSepalWidthCmPetalLengthCmPetalWidthCmSpecies
015.13.51.40.2Iris-setosa
124.93.01.40.2Iris-setosa
234.73.21.30.2Iris-setosa
344.63.11.50.2Iris-setosa
455.03.61.40.2Iris-setosa
\n", + "
" + ], + "text/plain": [ + " Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species\n", + "0 1 5.1 3.5 1.4 0.2 Iris-setosa\n", + "1 2 4.9 3.0 1.4 0.2 Iris-setosa\n", + "2 3 4.7 3.2 1.3 0.2 Iris-setosa\n", + "3 4 4.6 3.1 1.5 0.2 Iris-setosa\n", + "4 5 5.0 3.6 1.4 0.2 Iris-setosa" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(r'D:\\archive\\iris.csv', encoding='utf-8')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a85eca81", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdSepalLengthCmSepalWidthCmPetalLengthCmPetalWidthCm
count150.000000150.000000150.000000150.000000150.000000
mean75.5000005.8433333.0540003.7586671.198667
std43.4453680.8280660.4335941.7644200.763161
min1.0000004.3000002.0000001.0000000.100000
25%38.2500005.1000002.8000001.6000000.300000
50%75.5000005.8000003.0000004.3500001.300000
75%112.7500006.4000003.3000005.1000001.800000
max150.0000007.9000004.4000006.9000002.500000
\n", + "
" + ], + "text/plain": [ + " Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm\n", + "count 150.000000 150.000000 150.000000 150.000000 150.000000\n", + "mean 75.500000 5.843333 3.054000 3.758667 1.198667\n", + "std 43.445368 0.828066 0.433594 1.764420 0.763161\n", + "min 1.000000 4.300000 2.000000 1.000000 0.100000\n", + "25% 38.250000 5.100000 2.800000 1.600000 0.300000\n", + "50% 75.500000 5.800000 3.000000 4.350000 1.300000\n", + "75% 112.750000 6.400000 3.300000 5.100000 1.800000\n", + "max 150.000000 7.900000 4.400000 6.900000 2.500000" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "fd80a4a8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Id int64\n", + "SepalLengthCm float64\n", + "SepalWidthCm float64\n", + "PetalLengthCm float64\n", + "PetalWidthCm float64\n", + "Species object\n", + "dtype: object" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cc10d9c3", + "metadata": {}, + "outputs": [], + "source": [ + "import sklearn" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d07459e2", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f917c7bd", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "9d3e54c7", + "metadata": {}, + "outputs": [], + "source": [ + "iris = datasets.load_iris()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "1c34bd6d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'data': array([[5.1, 3.5, 1.4, 0.2],\n", + " [4.9, 3. , 1.4, 0.2],\n", + " [4.7, 3.2, 1.3, 0.2],\n", + " [4.6, 3.1, 1.5, 0.2],\n", + " [5. , 3.6, 1.4, 0.2],\n", + " [5.4, 3.9, 1.7, 0.4],\n", + " [4.6, 3.4, 1.4, 0.3],\n", + " [5. , 3.4, 1.5, 0.2],\n", + " [4.4, 2.9, 1.4, 0.2],\n", + " [4.9, 3.1, 1.5, 0.1],\n", + " [5.4, 3.7, 1.5, 0.2],\n", + " [4.8, 3.4, 1.6, 0.2],\n", + " [4.8, 3. , 1.4, 0.1],\n", + " [4.3, 3. , 1.1, 0.1],\n", + " [5.8, 4. , 1.2, 0.2],\n", + " [5.7, 4.4, 1.5, 0.4],\n", + " [5.4, 3.9, 1.3, 0.4],\n", + " [5.1, 3.5, 1.4, 0.3],\n", + " [5.7, 3.8, 1.7, 0.3],\n", + " [5.1, 3.8, 1.5, 0.3],\n", + " [5.4, 3.4, 1.7, 0.2],\n", + " [5.1, 3.7, 1.5, 0.4],\n", + " [4.6, 3.6, 1. , 0.2],\n", + " [5.1, 3.3, 1.7, 0.5],\n", + " [4.8, 3.4, 1.9, 0.2],\n", + " [5. , 3. , 1.6, 0.2],\n", + " [5. , 3.4, 1.6, 0.4],\n", + " [5.2, 3.5, 1.5, 0.2],\n", + " [5.2, 3.4, 1.4, 0.2],\n", + " [4.7, 3.2, 1.6, 0.2],\n", + " [4.8, 3.1, 1.6, 0.2],\n", + " [5.4, 3.4, 1.5, 0.4],\n", + " [5.2, 4.1, 1.5, 0.1],\n", + " [5.5, 4.2, 1.4, 0.2],\n", + " [4.9, 3.1, 1.5, 0.2],\n", + " [5. , 3.2, 1.2, 0.2],\n", + " [5.5, 3.5, 1.3, 0.2],\n", + " [4.9, 3.6, 1.4, 0.1],\n", + " [4.4, 3. , 1.3, 0.2],\n", + " [5.1, 3.4, 1.5, 0.2],\n", + " [5. , 3.5, 1.3, 0.3],\n", + " [4.5, 2.3, 1.3, 0.3],\n", + " [4.4, 3.2, 1.3, 0.2],\n", + " [5. , 3.5, 1.6, 0.6],\n", + " [5.1, 3.8, 1.9, 0.4],\n", + " [4.8, 3. , 1.4, 0.3],\n", + " [5.1, 3.8, 1.6, 0.2],\n", + " [4.6, 3.2, 1.4, 0.2],\n", + " [5.3, 3.7, 1.5, 0.2],\n", + " [5. , 3.3, 1.4, 0.2],\n", + " [7. , 3.2, 4.7, 1.4],\n", + " [6.4, 3.2, 4.5, 1.5],\n", + " [6.9, 3.1, 4.9, 1.5],\n", + " [5.5, 2.3, 4. , 1.3],\n", + " [6.5, 2.8, 4.6, 1.5],\n", + " [5.7, 2.8, 4.5, 1.3],\n", + " [6.3, 3.3, 4.7, 1.6],\n", + " [4.9, 2.4, 3.3, 1. ],\n", + " [6.6, 2.9, 4.6, 1.3],\n", + " [5.2, 2.7, 3.9, 1.4],\n", + " [5. , 2. , 3.5, 1. ],\n", + " [5.9, 3. , 4.2, 1.5],\n", + " [6. , 2.2, 4. , 1. ],\n", + " [6.1, 2.9, 4.7, 1.4],\n", + " [5.6, 2.9, 3.6, 1.3],\n", + " [6.7, 3.1, 4.4, 1.4],\n", + " [5.6, 3. , 4.5, 1.5],\n", + " [5.8, 2.7, 4.1, 1. ],\n", + " [6.2, 2.2, 4.5, 1.5],\n", + " [5.6, 2.5, 3.9, 1.1],\n", + " [5.9, 3.2, 4.8, 1.8],\n", + " [6.1, 2.8, 4. , 1.3],\n", + " [6.3, 2.5, 4.9, 1.5],\n", + " [6.1, 2.8, 4.7, 1.2],\n", + " [6.4, 2.9, 4.3, 1.3],\n", + " [6.6, 3. , 4.4, 1.4],\n", + " [6.8, 2.8, 4.8, 1.4],\n", + " [6.7, 3. , 5. , 1.7],\n", + " [6. , 2.9, 4.5, 1.5],\n", + " [5.7, 2.6, 3.5, 1. ],\n", + " [5.5, 2.4, 3.8, 1.1],\n", + " [5.5, 2.4, 3.7, 1. ],\n", + " [5.8, 2.7, 3.9, 1.2],\n", + " [6. , 2.7, 5.1, 1.6],\n", + " [5.4, 3. , 4.5, 1.5],\n", + " [6. , 3.4, 4.5, 1.6],\n", + " [6.7, 3.1, 4.7, 1.5],\n", + " [6.3, 2.3, 4.4, 1.3],\n", + " [5.6, 3. , 4.1, 1.3],\n", + " [5.5, 2.5, 4. , 1.3],\n", + " [5.5, 2.6, 4.4, 1.2],\n", + " [6.1, 3. , 4.6, 1.4],\n", + " [5.8, 2.6, 4. , 1.2],\n", + " [5. , 2.3, 3.3, 1. ],\n", + " [5.6, 2.7, 4.2, 1.3],\n", + " [5.7, 3. , 4.2, 1.2],\n", + " [5.7, 2.9, 4.2, 1.3],\n", + " [6.2, 2.9, 4.3, 1.3],\n", + " [5.1, 2.5, 3. , 1.1],\n", + " [5.7, 2.8, 4.1, 1.3],\n", + " [6.3, 3.3, 6. , 2.5],\n", + " [5.8, 2.7, 5.1, 1.9],\n", + " [7.1, 3. , 5.9, 2.1],\n", + " [6.3, 2.9, 5.6, 1.8],\n", + " [6.5, 3. , 5.8, 2.2],\n", + " [7.6, 3. , 6.6, 2.1],\n", + " [4.9, 2.5, 4.5, 1.7],\n", + " [7.3, 2.9, 6.3, 1.8],\n", + " [6.7, 2.5, 5.8, 1.8],\n", + " [7.2, 3.6, 6.1, 2.5],\n", + " [6.5, 3.2, 5.1, 2. ],\n", + " [6.4, 2.7, 5.3, 1.9],\n", + " [6.8, 3. , 5.5, 2.1],\n", + " [5.7, 2.5, 5. , 2. ],\n", + " [5.8, 2.8, 5.1, 2.4],\n", + " [6.4, 3.2, 5.3, 2.3],\n", + " [6.5, 3. , 5.5, 1.8],\n", + " [7.7, 3.8, 6.7, 2.2],\n", + " [7.7, 2.6, 6.9, 2.3],\n", + " [6. , 2.2, 5. , 1.5],\n", + " [6.9, 3.2, 5.7, 2.3],\n", + " [5.6, 2.8, 4.9, 2. ],\n", + " [7.7, 2.8, 6.7, 2. ],\n", + " [6.3, 2.7, 4.9, 1.8],\n", + " [6.7, 3.3, 5.7, 2.1],\n", + " [7.2, 3.2, 6. , 1.8],\n", + " [6.2, 2.8, 4.8, 1.8],\n", + " [6.1, 3. , 4.9, 1.8],\n", + " [6.4, 2.8, 5.6, 2.1],\n", + " [7.2, 3. , 5.8, 1.6],\n", + " [7.4, 2.8, 6.1, 1.9],\n", + " [7.9, 3.8, 6.4, 2. ],\n", + " [6.4, 2.8, 5.6, 2.2],\n", + " [6.3, 2.8, 5.1, 1.5],\n", + " [6.1, 2.6, 5.6, 1.4],\n", + " [7.7, 3. , 6.1, 2.3],\n", + " [6.3, 3.4, 5.6, 2.4],\n", + " [6.4, 3.1, 5.5, 1.8],\n", + " [6. , 3. , 4.8, 1.8],\n", + " [6.9, 3.1, 5.4, 2.1],\n", + " [6.7, 3.1, 5.6, 2.4],\n", + " [6.9, 3.1, 5.1, 2.3],\n", + " [5.8, 2.7, 5.1, 1.9],\n", + " [6.8, 3.2, 5.9, 2.3],\n", + " [6.7, 3.3, 5.7, 2.5],\n", + " [6.7, 3. , 5.2, 2.3],\n", + " [6.3, 2.5, 5. , 1.9],\n", + " [6.5, 3. , 5.2, 2. ],\n", + " [6.2, 3.4, 5.4, 2.3],\n", + " [5.9, 3. , 5.1, 1.8]]),\n", + " 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", + " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", + " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]),\n", + " 'frame': None,\n", + " 'target_names': array(['setosa', 'versicolor', 'virginica'], dtype='\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ResultsReachImpressionsVideo_playsLink_clicksEngagementAmount_Spent
015341534153514480622
1859385931059902220
214057157245701405
313139614790134723
43761650924013539568035711133
\n", + "" + ], + "text/plain": [ + " Results Reach Impressions Video_plays Link_clicks Engagement \\\n", + "0 1534 1534 1535 1448 0 62 \n", + "1 8593 8593 10599 0 2 2 \n", + "2 140 571 572 457 0 140 \n", + "3 13 1396 1479 0 13 47 \n", + "4 37616 5092 40135 39568 0 35711 \n", + "\n", + " Amount_Spent \n", + "0 2 \n", + "1 20 \n", + "2 5 \n", + "3 23 \n", + "4 133 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = {'Results': [1534,8593,140,13,37616,1060,694,64,17744],\n", + " 'Reach': [1534,8593,571,1396,5092,6933,2008,2825,6154],\n", + " 'Impressions': [1535,10599,572,1479,40135,11468,2435,5087,21332],\n", + " 'Video_plays': [1448,0,457,0,39568,0,1225,0,20905],\n", + " 'Link_clicks': [0,2,0,13,0,100,1,49,0],\n", + " 'Engagement': [62,2,140,47,35711,1060,694,145,15604],\n", + " 'Amount_Spent': [2,20,5,23,133,89,37,85,76]}\n", + "\n", + "df = pd.DataFrame(data)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b7bba8a6", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.multioutput import MultiOutputRegressor\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import mean_squared_error, r2_score" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "764ebe28", + "metadata": {}, + "outputs": [], + "source": [ + "# X contains the features (Amount spent)\n", + "X = df['Amount_Spent']\n", + "\n", + "# y contains the target variable (Results, Reach, Impressions, Video Plays, Link clicks, and Post engagement)\n", + "y = df[['Results', 'Reach', 'Impressions', 'Video_plays', 'Link_clicks', 'Engagement']]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "fa4f048c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MultiOutputRegressor(estimator=LinearRegression())" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Split the data into training and testing sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# Create a Linear Regression model and wrap it in MultiOutputRegressor\n", + "model = MultiOutputRegressor(LinearRegression())\n", + "\n", + "# Train the model with the training data\n", + "model.fit(X_train.values.reshape(-1, 1), y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "53fda5c4", + "metadata": {}, + "outputs": [], + "source": [ + "# Predict the columns using the test data\n", + "y_pred = model.predict(X_test.values.reshape(-1, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "2fcd8845", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean Squared Error: 100374744.3348547\n", + "R-squared: -3679.080219755561\n" + ] + } + ], + "source": [ + "# Evaluate the model's performance using metrics like Mean Squared Error (MSE) and R-squared (R2)\n", + "mse = mean_squared_error(y_test, y_pred)\n", + "r2 = r2_score(y_test, y_pred)\n", + "\n", + "print(\"Mean Squared Error:\", mse)\n", + "print(\"R-squared:\", r2)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "7c2584f6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicted values: [ 726.14777024 1945.43688854 2292.37143629 1016.27642398 8.87683812\n", + " 240.56563231]\n" + ] + } + ], + "source": [ + "# Predict the columns for a new value of 'Amount spent'\n", + "X_new = [[20]]\n", + "predicted_values = model.predict([[20]])\n", + "print(\"Predicted values:\", predicted_values[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "545f8034", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ResultsReachImpressionsVideo_playsLink_clicksEngagement
0726.147771945.4368892292.3714361016.2764248.876838240.565632
\n", + "
" + ], + "text/plain": [ + " Results Reach Impressions Video_plays Link_clicks Engagement\n", + "0 726.14777 1945.436889 2292.371436 1016.276424 8.876838 240.565632" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Convert the predicted values into a new DataFrame\n", + "predicted_df = pd.DataFrame(data=predicted_values, columns=y.columns)\n", + "predicted_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf9c18ec", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Numeral_Regression.ipynb b/Numeral_Regression.ipynb new file mode 100644 index 0000000..e27fdc6 --- /dev/null +++ b/Numeral_Regression.ipynb @@ -0,0 +1,292 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "1732817d", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2c67c4f6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ResultsReachImpressionsVideo_playsLink_clicksEngagementAmount_Spent
015341534153514480622
1859385931059902220
214057157245701405
313139614790134723
43761650924013539568035711133
\n", + "
" + ], + "text/plain": [ + " Results Reach Impressions Video_plays Link_clicks Engagement \\\n", + "0 1534 1534 1535 1448 0 62 \n", + "1 8593 8593 10599 0 2 2 \n", + "2 140 571 572 457 0 140 \n", + "3 13 1396 1479 0 13 47 \n", + "4 37616 5092 40135 39568 0 35711 \n", + "\n", + " Amount_Spent \n", + "0 2 \n", + "1 20 \n", + "2 5 \n", + "3 23 \n", + "4 133 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = {'Results': [1534,8593,140,13,37616,1060,694,64,17744],\n", + " 'Reach': [1534,8593,571,1396,5092,6933,2008,2825,6154],\n", + " 'Impressions': [1535,10599,572,1479,40135,11468,2435,5087,21332],\n", + " 'Video_plays': [1448,0,457,0,39568,0,1225,0,20905],\n", + " 'Link_clicks': [0,2,0,13,0,100,1,49,0],\n", + " 'Engagement': [62,2,140,47,35711,1060,694,145,15604],\n", + " 'Amount_Spent': [2,20,5,23,133,89,37,85,76]}\n", + "\n", + "df = pd.DataFrame(data)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "096de0cb", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.metrics import mean_squared_error, r2_score" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "00517f34", + "metadata": {}, + "outputs": [], + "source": [ + "# X contains the features (Results, Reach, Impressions, Video Plays, Link clicks, and Post engagement)\n", + "X = df[['Results', 'Reach', 'Impressions', 'Video_plays', 'Link_clicks', 'Engagement']]\n", + "\n", + "# y contains the target variable (Amount spent)\n", + "y = df['Amount_Spent']" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a56a0001", + "metadata": {}, + "outputs": [], + "source": [ + "# Split dataset\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e54736e3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Linear Regression model\n", + "model = LinearRegression()\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2b5aa068", + "metadata": {}, + "outputs": [], + "source": [ + "# Predict model using X_test\n", + "y_pred = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f9eb2f9e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean Squared Error: 10546.18825415638\n", + "R-squared: -8.984556927011957\n" + ] + } + ], + "source": [ + "# Evaluate model performance\n", + "mse = mean_squared_error(y_test, y_pred)\n", + "r2 = r2_score(y_test, y_pred)\n", + "\n", + "print(\"Mean Squared Error:\", mse)\n", + "print(\"R-squared:\", r2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0430cf6c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicted amount spent: -34.41443487262584\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\sang.yogi\\Anaconda3\\lib\\site-packages\\sklearn\\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "# Predict the amount spent for new data\n", + "X_new = [[100, 2000, 5000, 1000, 50, 150]]\n", + "predicted_amount_spent = model.predict(X_new)\n", + "print(\"Predicted amount spent:\", predicted_amount_spent[0])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d17c4a06", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}