mirror of
https://github.com/youronlydimwit/Data_ScienceUse_Cases.git
synced 2025-12-17 15:49:59 +01:00
Add files via upload
This commit is contained in:
192
Confidence_Interval.ipynb
Normal file
192
Confidence_Interval.ipynb
Normal file
@@ -0,0 +1,192 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "410cdd47",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "f769b682",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"0.01390952774409444"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# T-Multiplier\n",
|
||||
"tstar = 1.96\n",
|
||||
"# P hat value\n",
|
||||
"p = .85\n",
|
||||
"# Number of observations\n",
|
||||
"n = 659\n",
|
||||
"\n",
|
||||
"# Calculate Standard Error\n",
|
||||
"se = np.sqrt((p * (1 - p))/n)\n",
|
||||
"se"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "d77c95f1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(0.8227373256215749, 0.8772626743784251)"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Lower confidence band\n",
|
||||
"lcb = p - tstar * se\n",
|
||||
"# Upper confidence band\n",
|
||||
"ucb = p + tstar * se\n",
|
||||
"# Show confidence bands\n",
|
||||
"(lcb, ucb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "1d08b43b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Same process, using statsmodels library\n",
|
||||
"import statsmodels.api as sm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "41cb97c9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(0.8227378265796143, 0.8772621734203857)"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Get confidence bands\n",
|
||||
"# n = observations\n",
|
||||
"# p = result of a survey \n",
|
||||
"sm.stats.proportion_confint(n * p, n)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4234b441",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Try to import dataset\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"df = pd.read_csv(\"Cartwheeldata.csv\")\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d03c3d4f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Mean of a column\n",
|
||||
"mean = df[\"CWDistance\"].mean()\n",
|
||||
"# Standard deviation of a column\n",
|
||||
"sd = df[\"CWDistance\"].std()\n",
|
||||
"# Rows of the dataframe\n",
|
||||
"n = len(df)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c52dddd2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tstar = 2.064\n",
|
||||
"\n",
|
||||
"se = sd/np.sqrt(n)\n",
|
||||
"\n",
|
||||
"se"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2dfbab7d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"lcb = mean - tstar * se\n",
|
||||
"ucb = mean + tstar * se\n",
|
||||
"(lcb, ucb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "649c18b1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#..OR use statsmodels instead\n",
|
||||
"sm.stats.DescrStatsW(df[\"#ColumnName\"]).zconfint_mean()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
814
Label Prediction (Binary Example).ipynb
Normal file
814
Label Prediction (Binary Example).ipynb
Normal file
@@ -0,0 +1,814 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ee34a7c4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Import Libraries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "1a23a10f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import os\n",
|
||||
"import sklearn\n",
|
||||
"from sklearn.model_selection import train_test_split"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3333920d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Dataset\n",
|
||||
"For our dataset, you can find it [here.](https://www.kaggle.com/datasets/elakiricoder/gender-classification-dataset)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "5aea2295",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>long_hair</th>\n",
|
||||
" <th>forehead_width_cm</th>\n",
|
||||
" <th>forehead_height_cm</th>\n",
|
||||
" <th>nose_wide</th>\n",
|
||||
" <th>nose_long</th>\n",
|
||||
" <th>lips_thin</th>\n",
|
||||
" <th>distance_nose_to_lip_long</th>\n",
|
||||
" <th>gender</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>11.8</td>\n",
|
||||
" <td>6.1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>14.0</td>\n",
|
||||
" <td>5.4</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>11.8</td>\n",
|
||||
" <td>6.3</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>14.4</td>\n",
|
||||
" <td>6.1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>13.5</td>\n",
|
||||
" <td>5.9</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n",
|
||||
"0 1 11.8 6.1 1 0 \n",
|
||||
"1 0 14.0 5.4 0 0 \n",
|
||||
"2 0 11.8 6.3 1 1 \n",
|
||||
"3 0 14.4 6.1 0 1 \n",
|
||||
"4 1 13.5 5.9 0 0 \n",
|
||||
"\n",
|
||||
" lips_thin distance_nose_to_lip_long gender \n",
|
||||
"0 1 1 Male \n",
|
||||
"1 1 0 Female \n",
|
||||
"2 1 1 Male \n",
|
||||
"3 1 1 Male \n",
|
||||
"4 0 0 Female "
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Load dataset\n",
|
||||
"df = pd.read_csv(r'D:\\archive\\gender_classification_v7.csv', encoding='utf-8')\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "58b8ed5e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Data Pre-processing\n",
|
||||
"For this example I skipped the Descriptive Statistics, and went to minor adjustments."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "d93ff56d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"long_hair int64\n",
|
||||
"forehead_width_cm float64\n",
|
||||
"forehead_height_cm float64\n",
|
||||
"nose_wide int64\n",
|
||||
"nose_long int64\n",
|
||||
"lips_thin int64\n",
|
||||
"distance_nose_to_lip_long int64\n",
|
||||
"gender object\n",
|
||||
"dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Check Data types of dataframe columns\n",
|
||||
"df.dtypes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "19ae1cf5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>long_hair</th>\n",
|
||||
" <th>forehead_width_cm</th>\n",
|
||||
" <th>forehead_height_cm</th>\n",
|
||||
" <th>nose_wide</th>\n",
|
||||
" <th>nose_long</th>\n",
|
||||
" <th>lips_thin</th>\n",
|
||||
" <th>distance_nose_to_lip_long</th>\n",
|
||||
" <th>gender</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>11.8</td>\n",
|
||||
" <td>6.1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>14.0</td>\n",
|
||||
" <td>5.4</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>11.8</td>\n",
|
||||
" <td>6.3</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>14.4</td>\n",
|
||||
" <td>6.1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>13.5</td>\n",
|
||||
" <td>5.9</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n",
|
||||
"0 1 11.8 6.1 1 0 \n",
|
||||
"1 0 14.0 5.4 0 0 \n",
|
||||
"2 0 11.8 6.3 1 1 \n",
|
||||
"3 0 14.4 6.1 0 1 \n",
|
||||
"4 1 13.5 5.9 0 0 \n",
|
||||
"\n",
|
||||
" lips_thin distance_nose_to_lip_long gender \n",
|
||||
"0 1 1 0 \n",
|
||||
"1 1 0 1 \n",
|
||||
"2 1 1 0 \n",
|
||||
"3 1 1 0 \n",
|
||||
"4 0 0 1 "
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Convert Gender labels into integer values, for classification\n",
|
||||
"df['gender']=df['gender'].replace('Male',0)\n",
|
||||
"df['gender']=df['gender'].replace('Female',1)\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "b573f11e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"long_hair int64\n",
|
||||
"forehead_width_cm float64\n",
|
||||
"forehead_height_cm float64\n",
|
||||
"nose_wide int64\n",
|
||||
"nose_long int64\n",
|
||||
"lips_thin int64\n",
|
||||
"distance_nose_to_lip_long int64\n",
|
||||
"gender int64\n",
|
||||
"dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Now all is numeric data\n",
|
||||
"df.dtypes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "35388ca3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Split dataset into X (Features) and y (Labels)\n",
|
||||
"\n",
|
||||
"# X is ALL columns except the last column (usually the label to be predicted)\n",
|
||||
"X = df.iloc[:,:-1]\n",
|
||||
"# y is the LABEL column (to be predicted)\n",
|
||||
"y = df.iloc[:,-1]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "14c3347e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Use sklearn's train_test_split function imported before\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fe832e3f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using 4 Classifiers\n",
|
||||
"It is sugggested to take a deeper look of the parameters provided in documentations below, for better tweaking of the classifiers.\n",
|
||||
"- [Logistic Regression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html)\n",
|
||||
"- [Decision Tree](https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html)\n",
|
||||
"- [SVM](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html)\n",
|
||||
"- [K-Means / KNN](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "f83a2e5c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.tree import DecisionTreeClassifier\n",
|
||||
"from sklearn.svm import SVC\n",
|
||||
"from sklearn.neighbors import KNeighborsClassifier"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "dc4c2062",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Logistic Regression Accuracy: 0.9682063587282543\n",
|
||||
"Decision Tree Accuracy: 0.8792241551689662\n",
|
||||
"SVM Accuracy: 0.967006598680264\n",
|
||||
"K-Means Accuracy: 0.9754049190161967\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Logistic Regression\n",
|
||||
"# Train the model\n",
|
||||
"LogR = LogisticRegression(random_state=0, multi_class='ovr').fit(X_train, y_train)\n",
|
||||
"# Predict the test set\n",
|
||||
"LogR_pred = LogR.predict(X_test)\n",
|
||||
"\n",
|
||||
"# Decision Tree\n",
|
||||
"dtree = DecisionTreeClassifier(max_depth = 2).fit(X_train, y_train)\n",
|
||||
"dtree_pred = dtree.predict(X_test)\n",
|
||||
"\n",
|
||||
"# SVM\n",
|
||||
"svm = SVC(kernel='linear',C=1).fit(X_train, y_train)\n",
|
||||
"svm_pred = svm.predict(X_test)\n",
|
||||
"\n",
|
||||
"# K-Means\n",
|
||||
"knn = KNeighborsClassifier(n_neighbors=5).fit(X_train,y_train)\n",
|
||||
"knn_pred = knn.predict(X_test)\n",
|
||||
"\n",
|
||||
"# See Accuracy of each classifier\n",
|
||||
"print(\"Logistic Regression Accuracy: \"+ str(LogR.score(X,y)))\n",
|
||||
"print(\"Decision Tree Accuracy: \"+ str(dtree.score(X,y)))\n",
|
||||
"print(\"SVM Accuracy: \"+ str(svm.score(X,y)))\n",
|
||||
"print(\"K-Means Accuracy: \"+ str(knn.score(X,y)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "00f72b96",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Try on a new dataset\n",
|
||||
"Use one (or many) model above as predictor in a new dataset. Assuming we have the same columns but different values, we get.."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "9c24db9a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>long_hair</th>\n",
|
||||
" <th>forehead_width_cm</th>\n",
|
||||
" <th>forehead_height_cm</th>\n",
|
||||
" <th>nose_wide</th>\n",
|
||||
" <th>nose_long</th>\n",
|
||||
" <th>lips_thin</th>\n",
|
||||
" <th>distance_nose_to_lip_long</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>14.5</td>\n",
|
||||
" <td>6.7</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>14.0</td>\n",
|
||||
" <td>5.9</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>12.9</td>\n",
|
||||
" <td>6.4</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n",
|
||||
"0 1 14.5 6.7 0 1 \n",
|
||||
"1 1 14.0 5.9 0 0 \n",
|
||||
"2 1 12.9 6.4 1 0 \n",
|
||||
"\n",
|
||||
" lips_thin distance_nose_to_lip_long \n",
|
||||
"0 1 1 \n",
|
||||
"1 0 0 \n",
|
||||
"2 0 1 "
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# For this example we use 3 rows of data to be predicted\n",
|
||||
"dval = pd.read_csv(r'D:\\archive\\valgend.csv', encoding='utf-8')\n",
|
||||
"dval.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "ad501b6a",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# predict with knn (change to which model you choose)\n",
|
||||
"knn_pred_new = knn.predict(dval)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "8896ab72",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([0, 1, 0], dtype=int64)"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# See the result\n",
|
||||
"knn_pred_new"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "7fa9db00",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Add new column in new dataframe for placing the results, pass the \"result\" from before\n",
|
||||
"dval[\"pred_gender\"]=knn_pred_new"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "6155a519",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>long_hair</th>\n",
|
||||
" <th>forehead_width_cm</th>\n",
|
||||
" <th>forehead_height_cm</th>\n",
|
||||
" <th>nose_wide</th>\n",
|
||||
" <th>nose_long</th>\n",
|
||||
" <th>lips_thin</th>\n",
|
||||
" <th>distance_nose_to_lip_long</th>\n",
|
||||
" <th>pred_gender</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>14.5</td>\n",
|
||||
" <td>6.7</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>14.0</td>\n",
|
||||
" <td>5.9</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>12.9</td>\n",
|
||||
" <td>6.4</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n",
|
||||
"0 1 14.5 6.7 0 1 \n",
|
||||
"1 1 14.0 5.9 0 0 \n",
|
||||
"2 1 12.9 6.4 1 0 \n",
|
||||
"\n",
|
||||
" lips_thin distance_nose_to_lip_long pred_gender \n",
|
||||
"0 1 1 0 \n",
|
||||
"1 0 0 1 \n",
|
||||
"2 0 1 0 "
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# See data with appended prediction (last column)\n",
|
||||
"dval.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "c2587a57",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>long_hair</th>\n",
|
||||
" <th>forehead_width_cm</th>\n",
|
||||
" <th>forehead_height_cm</th>\n",
|
||||
" <th>nose_wide</th>\n",
|
||||
" <th>nose_long</th>\n",
|
||||
" <th>lips_thin</th>\n",
|
||||
" <th>distance_nose_to_lip_long</th>\n",
|
||||
" <th>pred_gender</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>14.5</td>\n",
|
||||
" <td>6.7</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>14.0</td>\n",
|
||||
" <td>5.9</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>12.9</td>\n",
|
||||
" <td>6.4</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Male</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n",
|
||||
"0 1 14.5 6.7 0 1 \n",
|
||||
"1 1 14.0 5.9 0 0 \n",
|
||||
"2 1 12.9 6.4 1 0 \n",
|
||||
"\n",
|
||||
" lips_thin distance_nose_to_lip_long pred_gender \n",
|
||||
"0 1 1 Male \n",
|
||||
"1 0 0 Female \n",
|
||||
"2 0 1 Male "
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Converting back to labels\n",
|
||||
"dval['pred_gender']=dval['pred_gender'].replace(0,'Male')\n",
|
||||
"dval['pred_gender']=dval['pred_gender'].replace(1,'Female')\n",
|
||||
"dval.head()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
794
Multi-label pred.ipynb
Normal file
794
Multi-label pred.ipynb
Normal file
@@ -0,0 +1,794 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "5c3d106c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt \n",
|
||||
"import seaborn as sns \n",
|
||||
"import plotly as py\n",
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7e7ad082",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Id</th>\n",
|
||||
" <th>SepalLengthCm</th>\n",
|
||||
" <th>SepalWidthCm</th>\n",
|
||||
" <th>PetalLengthCm</th>\n",
|
||||
" <th>PetalWidthCm</th>\n",
|
||||
" <th>Species</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>5.1</td>\n",
|
||||
" <td>3.5</td>\n",
|
||||
" <td>1.4</td>\n",
|
||||
" <td>0.2</td>\n",
|
||||
" <td>Iris-setosa</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>4.9</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>1.4</td>\n",
|
||||
" <td>0.2</td>\n",
|
||||
" <td>Iris-setosa</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>4.7</td>\n",
|
||||
" <td>3.2</td>\n",
|
||||
" <td>1.3</td>\n",
|
||||
" <td>0.2</td>\n",
|
||||
" <td>Iris-setosa</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>4.6</td>\n",
|
||||
" <td>3.1</td>\n",
|
||||
" <td>1.5</td>\n",
|
||||
" <td>0.2</td>\n",
|
||||
" <td>Iris-setosa</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>3.6</td>\n",
|
||||
" <td>1.4</td>\n",
|
||||
" <td>0.2</td>\n",
|
||||
" <td>Iris-setosa</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species\n",
|
||||
"0 1 5.1 3.5 1.4 0.2 Iris-setosa\n",
|
||||
"1 2 4.9 3.0 1.4 0.2 Iris-setosa\n",
|
||||
"2 3 4.7 3.2 1.3 0.2 Iris-setosa\n",
|
||||
"3 4 4.6 3.1 1.5 0.2 Iris-setosa\n",
|
||||
"4 5 5.0 3.6 1.4 0.2 Iris-setosa"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df = pd.read_csv(r'D:\\archive\\iris.csv', encoding='utf-8')\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a85eca81",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Id</th>\n",
|
||||
" <th>SepalLengthCm</th>\n",
|
||||
" <th>SepalWidthCm</th>\n",
|
||||
" <th>PetalLengthCm</th>\n",
|
||||
" <th>PetalWidthCm</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>count</th>\n",
|
||||
" <td>150.000000</td>\n",
|
||||
" <td>150.000000</td>\n",
|
||||
" <td>150.000000</td>\n",
|
||||
" <td>150.000000</td>\n",
|
||||
" <td>150.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>mean</th>\n",
|
||||
" <td>75.500000</td>\n",
|
||||
" <td>5.843333</td>\n",
|
||||
" <td>3.054000</td>\n",
|
||||
" <td>3.758667</td>\n",
|
||||
" <td>1.198667</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>std</th>\n",
|
||||
" <td>43.445368</td>\n",
|
||||
" <td>0.828066</td>\n",
|
||||
" <td>0.433594</td>\n",
|
||||
" <td>1.764420</td>\n",
|
||||
" <td>0.763161</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>min</th>\n",
|
||||
" <td>1.000000</td>\n",
|
||||
" <td>4.300000</td>\n",
|
||||
" <td>2.000000</td>\n",
|
||||
" <td>1.000000</td>\n",
|
||||
" <td>0.100000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>25%</th>\n",
|
||||
" <td>38.250000</td>\n",
|
||||
" <td>5.100000</td>\n",
|
||||
" <td>2.800000</td>\n",
|
||||
" <td>1.600000</td>\n",
|
||||
" <td>0.300000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50%</th>\n",
|
||||
" <td>75.500000</td>\n",
|
||||
" <td>5.800000</td>\n",
|
||||
" <td>3.000000</td>\n",
|
||||
" <td>4.350000</td>\n",
|
||||
" <td>1.300000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>75%</th>\n",
|
||||
" <td>112.750000</td>\n",
|
||||
" <td>6.400000</td>\n",
|
||||
" <td>3.300000</td>\n",
|
||||
" <td>5.100000</td>\n",
|
||||
" <td>1.800000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>max</th>\n",
|
||||
" <td>150.000000</td>\n",
|
||||
" <td>7.900000</td>\n",
|
||||
" <td>4.400000</td>\n",
|
||||
" <td>6.900000</td>\n",
|
||||
" <td>2.500000</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm\n",
|
||||
"count 150.000000 150.000000 150.000000 150.000000 150.000000\n",
|
||||
"mean 75.500000 5.843333 3.054000 3.758667 1.198667\n",
|
||||
"std 43.445368 0.828066 0.433594 1.764420 0.763161\n",
|
||||
"min 1.000000 4.300000 2.000000 1.000000 0.100000\n",
|
||||
"25% 38.250000 5.100000 2.800000 1.600000 0.300000\n",
|
||||
"50% 75.500000 5.800000 3.000000 4.350000 1.300000\n",
|
||||
"75% 112.750000 6.400000 3.300000 5.100000 1.800000\n",
|
||||
"max 150.000000 7.900000 4.400000 6.900000 2.500000"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.describe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "fd80a4a8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Id int64\n",
|
||||
"SepalLengthCm float64\n",
|
||||
"SepalWidthCm float64\n",
|
||||
"PetalLengthCm float64\n",
|
||||
"PetalWidthCm float64\n",
|
||||
"Species object\n",
|
||||
"dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.dtypes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "cc10d9c3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sklearn"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "d07459e2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.metrics import confusion_matrix\n",
|
||||
"from sklearn.model_selection import train_test_split"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "f917c7bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn import datasets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9d3e54c7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iris = datasets.load_iris()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "1c34bd6d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'data': array([[5.1, 3.5, 1.4, 0.2],\n",
|
||||
" [4.9, 3. , 1.4, 0.2],\n",
|
||||
" [4.7, 3.2, 1.3, 0.2],\n",
|
||||
" [4.6, 3.1, 1.5, 0.2],\n",
|
||||
" [5. , 3.6, 1.4, 0.2],\n",
|
||||
" [5.4, 3.9, 1.7, 0.4],\n",
|
||||
" [4.6, 3.4, 1.4, 0.3],\n",
|
||||
" [5. , 3.4, 1.5, 0.2],\n",
|
||||
" [4.4, 2.9, 1.4, 0.2],\n",
|
||||
" [4.9, 3.1, 1.5, 0.1],\n",
|
||||
" [5.4, 3.7, 1.5, 0.2],\n",
|
||||
" [4.8, 3.4, 1.6, 0.2],\n",
|
||||
" [4.8, 3. , 1.4, 0.1],\n",
|
||||
" [4.3, 3. , 1.1, 0.1],\n",
|
||||
" [5.8, 4. , 1.2, 0.2],\n",
|
||||
" [5.7, 4.4, 1.5, 0.4],\n",
|
||||
" [5.4, 3.9, 1.3, 0.4],\n",
|
||||
" [5.1, 3.5, 1.4, 0.3],\n",
|
||||
" [5.7, 3.8, 1.7, 0.3],\n",
|
||||
" [5.1, 3.8, 1.5, 0.3],\n",
|
||||
" [5.4, 3.4, 1.7, 0.2],\n",
|
||||
" [5.1, 3.7, 1.5, 0.4],\n",
|
||||
" [4.6, 3.6, 1. , 0.2],\n",
|
||||
" [5.1, 3.3, 1.7, 0.5],\n",
|
||||
" [4.8, 3.4, 1.9, 0.2],\n",
|
||||
" [5. , 3. , 1.6, 0.2],\n",
|
||||
" [5. , 3.4, 1.6, 0.4],\n",
|
||||
" [5.2, 3.5, 1.5, 0.2],\n",
|
||||
" [5.2, 3.4, 1.4, 0.2],\n",
|
||||
" [4.7, 3.2, 1.6, 0.2],\n",
|
||||
" [4.8, 3.1, 1.6, 0.2],\n",
|
||||
" [5.4, 3.4, 1.5, 0.4],\n",
|
||||
" [5.2, 4.1, 1.5, 0.1],\n",
|
||||
" [5.5, 4.2, 1.4, 0.2],\n",
|
||||
" [4.9, 3.1, 1.5, 0.2],\n",
|
||||
" [5. , 3.2, 1.2, 0.2],\n",
|
||||
" [5.5, 3.5, 1.3, 0.2],\n",
|
||||
" [4.9, 3.6, 1.4, 0.1],\n",
|
||||
" [4.4, 3. , 1.3, 0.2],\n",
|
||||
" [5.1, 3.4, 1.5, 0.2],\n",
|
||||
" [5. , 3.5, 1.3, 0.3],\n",
|
||||
" [4.5, 2.3, 1.3, 0.3],\n",
|
||||
" [4.4, 3.2, 1.3, 0.2],\n",
|
||||
" [5. , 3.5, 1.6, 0.6],\n",
|
||||
" [5.1, 3.8, 1.9, 0.4],\n",
|
||||
" [4.8, 3. , 1.4, 0.3],\n",
|
||||
" [5.1, 3.8, 1.6, 0.2],\n",
|
||||
" [4.6, 3.2, 1.4, 0.2],\n",
|
||||
" [5.3, 3.7, 1.5, 0.2],\n",
|
||||
" [5. , 3.3, 1.4, 0.2],\n",
|
||||
" [7. , 3.2, 4.7, 1.4],\n",
|
||||
" [6.4, 3.2, 4.5, 1.5],\n",
|
||||
" [6.9, 3.1, 4.9, 1.5],\n",
|
||||
" [5.5, 2.3, 4. , 1.3],\n",
|
||||
" [6.5, 2.8, 4.6, 1.5],\n",
|
||||
" [5.7, 2.8, 4.5, 1.3],\n",
|
||||
" [6.3, 3.3, 4.7, 1.6],\n",
|
||||
" [4.9, 2.4, 3.3, 1. ],\n",
|
||||
" [6.6, 2.9, 4.6, 1.3],\n",
|
||||
" [5.2, 2.7, 3.9, 1.4],\n",
|
||||
" [5. , 2. , 3.5, 1. ],\n",
|
||||
" [5.9, 3. , 4.2, 1.5],\n",
|
||||
" [6. , 2.2, 4. , 1. ],\n",
|
||||
" [6.1, 2.9, 4.7, 1.4],\n",
|
||||
" [5.6, 2.9, 3.6, 1.3],\n",
|
||||
" [6.7, 3.1, 4.4, 1.4],\n",
|
||||
" [5.6, 3. , 4.5, 1.5],\n",
|
||||
" [5.8, 2.7, 4.1, 1. ],\n",
|
||||
" [6.2, 2.2, 4.5, 1.5],\n",
|
||||
" [5.6, 2.5, 3.9, 1.1],\n",
|
||||
" [5.9, 3.2, 4.8, 1.8],\n",
|
||||
" [6.1, 2.8, 4. , 1.3],\n",
|
||||
" [6.3, 2.5, 4.9, 1.5],\n",
|
||||
" [6.1, 2.8, 4.7, 1.2],\n",
|
||||
" [6.4, 2.9, 4.3, 1.3],\n",
|
||||
" [6.6, 3. , 4.4, 1.4],\n",
|
||||
" [6.8, 2.8, 4.8, 1.4],\n",
|
||||
" [6.7, 3. , 5. , 1.7],\n",
|
||||
" [6. , 2.9, 4.5, 1.5],\n",
|
||||
" [5.7, 2.6, 3.5, 1. ],\n",
|
||||
" [5.5, 2.4, 3.8, 1.1],\n",
|
||||
" [5.5, 2.4, 3.7, 1. ],\n",
|
||||
" [5.8, 2.7, 3.9, 1.2],\n",
|
||||
" [6. , 2.7, 5.1, 1.6],\n",
|
||||
" [5.4, 3. , 4.5, 1.5],\n",
|
||||
" [6. , 3.4, 4.5, 1.6],\n",
|
||||
" [6.7, 3.1, 4.7, 1.5],\n",
|
||||
" [6.3, 2.3, 4.4, 1.3],\n",
|
||||
" [5.6, 3. , 4.1, 1.3],\n",
|
||||
" [5.5, 2.5, 4. , 1.3],\n",
|
||||
" [5.5, 2.6, 4.4, 1.2],\n",
|
||||
" [6.1, 3. , 4.6, 1.4],\n",
|
||||
" [5.8, 2.6, 4. , 1.2],\n",
|
||||
" [5. , 2.3, 3.3, 1. ],\n",
|
||||
" [5.6, 2.7, 4.2, 1.3],\n",
|
||||
" [5.7, 3. , 4.2, 1.2],\n",
|
||||
" [5.7, 2.9, 4.2, 1.3],\n",
|
||||
" [6.2, 2.9, 4.3, 1.3],\n",
|
||||
" [5.1, 2.5, 3. , 1.1],\n",
|
||||
" [5.7, 2.8, 4.1, 1.3],\n",
|
||||
" [6.3, 3.3, 6. , 2.5],\n",
|
||||
" [5.8, 2.7, 5.1, 1.9],\n",
|
||||
" [7.1, 3. , 5.9, 2.1],\n",
|
||||
" [6.3, 2.9, 5.6, 1.8],\n",
|
||||
" [6.5, 3. , 5.8, 2.2],\n",
|
||||
" [7.6, 3. , 6.6, 2.1],\n",
|
||||
" [4.9, 2.5, 4.5, 1.7],\n",
|
||||
" [7.3, 2.9, 6.3, 1.8],\n",
|
||||
" [6.7, 2.5, 5.8, 1.8],\n",
|
||||
" [7.2, 3.6, 6.1, 2.5],\n",
|
||||
" [6.5, 3.2, 5.1, 2. ],\n",
|
||||
" [6.4, 2.7, 5.3, 1.9],\n",
|
||||
" [6.8, 3. , 5.5, 2.1],\n",
|
||||
" [5.7, 2.5, 5. , 2. ],\n",
|
||||
" [5.8, 2.8, 5.1, 2.4],\n",
|
||||
" [6.4, 3.2, 5.3, 2.3],\n",
|
||||
" [6.5, 3. , 5.5, 1.8],\n",
|
||||
" [7.7, 3.8, 6.7, 2.2],\n",
|
||||
" [7.7, 2.6, 6.9, 2.3],\n",
|
||||
" [6. , 2.2, 5. , 1.5],\n",
|
||||
" [6.9, 3.2, 5.7, 2.3],\n",
|
||||
" [5.6, 2.8, 4.9, 2. ],\n",
|
||||
" [7.7, 2.8, 6.7, 2. ],\n",
|
||||
" [6.3, 2.7, 4.9, 1.8],\n",
|
||||
" [6.7, 3.3, 5.7, 2.1],\n",
|
||||
" [7.2, 3.2, 6. , 1.8],\n",
|
||||
" [6.2, 2.8, 4.8, 1.8],\n",
|
||||
" [6.1, 3. , 4.9, 1.8],\n",
|
||||
" [6.4, 2.8, 5.6, 2.1],\n",
|
||||
" [7.2, 3. , 5.8, 1.6],\n",
|
||||
" [7.4, 2.8, 6.1, 1.9],\n",
|
||||
" [7.9, 3.8, 6.4, 2. ],\n",
|
||||
" [6.4, 2.8, 5.6, 2.2],\n",
|
||||
" [6.3, 2.8, 5.1, 1.5],\n",
|
||||
" [6.1, 2.6, 5.6, 1.4],\n",
|
||||
" [7.7, 3. , 6.1, 2.3],\n",
|
||||
" [6.3, 3.4, 5.6, 2.4],\n",
|
||||
" [6.4, 3.1, 5.5, 1.8],\n",
|
||||
" [6. , 3. , 4.8, 1.8],\n",
|
||||
" [6.9, 3.1, 5.4, 2.1],\n",
|
||||
" [6.7, 3.1, 5.6, 2.4],\n",
|
||||
" [6.9, 3.1, 5.1, 2.3],\n",
|
||||
" [5.8, 2.7, 5.1, 1.9],\n",
|
||||
" [6.8, 3.2, 5.9, 2.3],\n",
|
||||
" [6.7, 3.3, 5.7, 2.5],\n",
|
||||
" [6.7, 3. , 5.2, 2.3],\n",
|
||||
" [6.3, 2.5, 5. , 1.9],\n",
|
||||
" [6.5, 3. , 5.2, 2. ],\n",
|
||||
" [6.2, 3.4, 5.4, 2.3],\n",
|
||||
" [5.9, 3. , 5.1, 1.8]]),\n",
|
||||
" 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
||||
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
||||
" 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
|
||||
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
|
||||
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
|
||||
" 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
|
||||
" 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]),\n",
|
||||
" 'frame': None,\n",
|
||||
" 'target_names': array(['setosa', 'versicolor', 'virginica'], dtype='<U10'),\n",
|
||||
" 'DESCR': '.. _iris_dataset:\\n\\nIris plants dataset\\n--------------------\\n\\n**Data Set Characteristics:**\\n\\n :Number of Instances: 150 (50 in each of three classes)\\n :Number of Attributes: 4 numeric, predictive attributes and the class\\n :Attribute Information:\\n - sepal length in cm\\n - sepal width in cm\\n - petal length in cm\\n - petal width in cm\\n - class:\\n - Iris-Setosa\\n - Iris-Versicolour\\n - Iris-Virginica\\n \\n :Summary Statistics:\\n\\n ============== ==== ==== ======= ===== ====================\\n Min Max Mean SD Class Correlation\\n ============== ==== ==== ======= ===== ====================\\n sepal length: 4.3 7.9 5.84 0.83 0.7826\\n sepal width: 2.0 4.4 3.05 0.43 -0.4194\\n petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)\\n petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)\\n ============== ==== ==== ======= ===== ====================\\n\\n :Missing Attribute Values: None\\n :Class Distribution: 33.3% for each of 3 classes.\\n :Creator: R.A. Fisher\\n :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\\n :Date: July, 1988\\n\\nThe famous Iris database, first used by Sir R.A. Fisher. The dataset is taken\\nfrom Fisher\\'s paper. Note that it\\'s the same as in R, but not as in the UCI\\nMachine Learning Repository, which has two wrong data points.\\n\\nThis is perhaps the best known database to be found in the\\npattern recognition literature. Fisher\\'s paper is a classic in the field and\\nis referenced frequently to this day. (See Duda & Hart, for example.) The\\ndata set contains 3 classes of 50 instances each, where each class refers to a\\ntype of iris plant. One class is linearly separable from the other 2; the\\nlatter are NOT linearly separable from each other.\\n\\n.. topic:: References\\n\\n - Fisher, R.A. \"The use of multiple measurements in taxonomic problems\"\\n Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\\n Mathematical Statistics\" (John Wiley, NY, 1950).\\n - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\\n (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.\\n - Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\\n Structure and Classification Rule for Recognition in Partially Exposed\\n Environments\". IEEE Transactions on Pattern Analysis and Machine\\n Intelligence, Vol. PAMI-2, No. 1, 67-71.\\n - Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\". IEEE Transactions\\n on Information Theory, May 1972, 431-433.\\n - See also: 1988 MLC Proceedings, 54-64. Cheeseman et al\"s AUTOCLASS II\\n conceptual clustering system finds 3 classes in the data.\\n - Many, many more ...',\n",
|
||||
" 'feature_names': ['sepal length (cm)',\n",
|
||||
" 'sepal width (cm)',\n",
|
||||
" 'petal length (cm)',\n",
|
||||
" 'petal width (cm)'],\n",
|
||||
" 'filename': 'iris.csv',\n",
|
||||
" 'data_module': 'sklearn.datasets.data'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"iris"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "606f7821",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X = df.iloc[:,:-1]\n",
|
||||
"y = df.iloc[:,-1]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "4f7dade3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "85c8563e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.tree import DecisionTreeClassifier\n",
|
||||
"from sklearn.svm import SVC\n",
|
||||
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||||
"from sklearn.naive_bayes import GaussianNB"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "da7a7529",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica'\n",
|
||||
" 'Iris-setosa' 'Iris-versicolor' 'Iris-setosa' 'Iris-versicolor'\n",
|
||||
" 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-versicolor'\n",
|
||||
" 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa'\n",
|
||||
" 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n",
|
||||
" 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n",
|
||||
" 'Iris-virginica' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'\n",
|
||||
" 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'\n",
|
||||
" 'Iris-setosa' 'Iris-virginica' 'Iris-virginica' 'Iris-versicolor'\n",
|
||||
" 'Iris-setosa' 'Iris-versicolor']\n",
|
||||
"[[13 0 0]\n",
|
||||
" [ 0 16 0]\n",
|
||||
" [ 0 1 8]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dtree = DecisionTreeClassifier(max_depth = 2).fit(X_train, y_train)\n",
|
||||
"dtree_pred = dtree.predict(X_test)\n",
|
||||
"#confusionmatrix\n",
|
||||
"cf = confusion_matrix(y_test,dtree_pred)\n",
|
||||
"print(dtree_pred)\n",
|
||||
"print(cf)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "17682b48",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"0.9736842105263158"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dtree.score(X_test,y_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "4f4d6dc0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"svm_mod = SVC(kernel='linear',C=1).fit(X_train, y_train)\n",
|
||||
"svm_pred= svm_mod.predict(X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "a2280e55",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"accsvm=svm_mod.score(X_test, svm_pred)\n",
|
||||
"print(accsvm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"id": "798a5c23",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[13 0 0]\n",
|
||||
" [ 0 16 0]\n",
|
||||
" [ 0 0 9]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"svmc = confusion_matrix(y_test, svm_pred)\n",
|
||||
"print(svmc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "ae7a988b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"knn=KNeighborsClassifier(n_neighbors=7).fit(X_train,y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"id": "2fa42653",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"accknn=knn.score(X_test,y_test)\n",
|
||||
"print(accknn)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"id": "f801cbdf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica'\n",
|
||||
" 'Iris-setosa' 'Iris-virginica' 'Iris-setosa' 'Iris-versicolor'\n",
|
||||
" 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-versicolor'\n",
|
||||
" 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa'\n",
|
||||
" 'Iris-versicolor' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n",
|
||||
" 'Iris-virginica' 'Iris-versicolor' 'Iris-setosa' 'Iris-setosa'\n",
|
||||
" 'Iris-virginica' 'Iris-setosa' 'Iris-setosa' 'Iris-versicolor'\n",
|
||||
" 'Iris-versicolor' 'Iris-setosa' 'Iris-virginica' 'Iris-versicolor'\n",
|
||||
" 'Iris-setosa' 'Iris-virginica' 'Iris-virginica' 'Iris-versicolor'\n",
|
||||
" 'Iris-setosa' 'Iris-versicolor']\n",
|
||||
"[[13 0 0]\n",
|
||||
" [ 0 16 0]\n",
|
||||
" [ 0 0 9]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"knn_pred = knn.predict(X_test)\n",
|
||||
"cm = confusion_matrix(y_test, knn_pred)\n",
|
||||
"print(knn_pred)\n",
|
||||
"print(cm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"id": "17a6e503",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"114 Iris-virginica\n",
|
||||
"62 Iris-versicolor\n",
|
||||
"33 Iris-setosa\n",
|
||||
"107 Iris-virginica\n",
|
||||
"7 Iris-setosa\n",
|
||||
"100 Iris-virginica\n",
|
||||
"40 Iris-setosa\n",
|
||||
"86 Iris-versicolor\n",
|
||||
"76 Iris-versicolor\n",
|
||||
"71 Iris-versicolor\n",
|
||||
"134 Iris-virginica\n",
|
||||
"51 Iris-versicolor\n",
|
||||
"73 Iris-versicolor\n",
|
||||
"54 Iris-versicolor\n",
|
||||
"63 Iris-versicolor\n",
|
||||
"37 Iris-setosa\n",
|
||||
"78 Iris-versicolor\n",
|
||||
"90 Iris-versicolor\n",
|
||||
"45 Iris-setosa\n",
|
||||
"16 Iris-setosa\n",
|
||||
"121 Iris-virginica\n",
|
||||
"66 Iris-versicolor\n",
|
||||
"24 Iris-setosa\n",
|
||||
"8 Iris-setosa\n",
|
||||
"126 Iris-virginica\n",
|
||||
"22 Iris-setosa\n",
|
||||
"44 Iris-setosa\n",
|
||||
"97 Iris-versicolor\n",
|
||||
"93 Iris-versicolor\n",
|
||||
"26 Iris-setosa\n",
|
||||
"137 Iris-virginica\n",
|
||||
"84 Iris-versicolor\n",
|
||||
"27 Iris-setosa\n",
|
||||
"127 Iris-virginica\n",
|
||||
"132 Iris-virginica\n",
|
||||
"59 Iris-versicolor\n",
|
||||
"18 Iris-setosa\n",
|
||||
"83 Iris-versicolor\n",
|
||||
"Name: Species, dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"y_test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "72e4ee91",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
345
Multi-output_Regression.ipynb
Normal file
345
Multi-output_Regression.ipynb
Normal file
@@ -0,0 +1,345 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "05e4b4c5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "ef0e3a1e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Results</th>\n",
|
||||
" <th>Reach</th>\n",
|
||||
" <th>Impressions</th>\n",
|
||||
" <th>Video_plays</th>\n",
|
||||
" <th>Link_clicks</th>\n",
|
||||
" <th>Engagement</th>\n",
|
||||
" <th>Amount_Spent</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1534</td>\n",
|
||||
" <td>1534</td>\n",
|
||||
" <td>1535</td>\n",
|
||||
" <td>1448</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>62</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>8593</td>\n",
|
||||
" <td>8593</td>\n",
|
||||
" <td>10599</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>140</td>\n",
|
||||
" <td>571</td>\n",
|
||||
" <td>572</td>\n",
|
||||
" <td>457</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>140</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>13</td>\n",
|
||||
" <td>1396</td>\n",
|
||||
" <td>1479</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>13</td>\n",
|
||||
" <td>47</td>\n",
|
||||
" <td>23</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>37616</td>\n",
|
||||
" <td>5092</td>\n",
|
||||
" <td>40135</td>\n",
|
||||
" <td>39568</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>35711</td>\n",
|
||||
" <td>133</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Results Reach Impressions Video_plays Link_clicks Engagement \\\n",
|
||||
"0 1534 1534 1535 1448 0 62 \n",
|
||||
"1 8593 8593 10599 0 2 2 \n",
|
||||
"2 140 571 572 457 0 140 \n",
|
||||
"3 13 1396 1479 0 13 47 \n",
|
||||
"4 37616 5092 40135 39568 0 35711 \n",
|
||||
"\n",
|
||||
" Amount_Spent \n",
|
||||
"0 2 \n",
|
||||
"1 20 \n",
|
||||
"2 5 \n",
|
||||
"3 23 \n",
|
||||
"4 133 "
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data = {'Results': [1534,8593,140,13,37616,1060,694,64,17744],\n",
|
||||
" 'Reach': [1534,8593,571,1396,5092,6933,2008,2825,6154],\n",
|
||||
" 'Impressions': [1535,10599,572,1479,40135,11468,2435,5087,21332],\n",
|
||||
" 'Video_plays': [1448,0,457,0,39568,0,1225,0,20905],\n",
|
||||
" 'Link_clicks': [0,2,0,13,0,100,1,49,0],\n",
|
||||
" 'Engagement': [62,2,140,47,35711,1060,694,145,15604],\n",
|
||||
" 'Amount_Spent': [2,20,5,23,133,89,37,85,76]}\n",
|
||||
"\n",
|
||||
"df = pd.DataFrame(data)\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "b7bba8a6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.multioutput import MultiOutputRegressor\n",
|
||||
"from sklearn.linear_model import LinearRegression\n",
|
||||
"from sklearn.metrics import mean_squared_error, r2_score"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "764ebe28",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# X contains the features (Amount spent)\n",
|
||||
"X = df['Amount_Spent']\n",
|
||||
"\n",
|
||||
"# y contains the target variable (Results, Reach, Impressions, Video Plays, Link clicks, and Post engagement)\n",
|
||||
"y = df[['Results', 'Reach', 'Impressions', 'Video_plays', 'Link_clicks', 'Engagement']]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "fa4f048c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"MultiOutputRegressor(estimator=LinearRegression())"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Split the data into training and testing sets\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
||||
"\n",
|
||||
"# Create a Linear Regression model and wrap it in MultiOutputRegressor\n",
|
||||
"model = MultiOutputRegressor(LinearRegression())\n",
|
||||
"\n",
|
||||
"# Train the model with the training data\n",
|
||||
"model.fit(X_train.values.reshape(-1, 1), y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "53fda5c4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Predict the columns using the test data\n",
|
||||
"y_pred = model.predict(X_test.values.reshape(-1, 1))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "2fcd8845",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Mean Squared Error: 100374744.3348547\n",
|
||||
"R-squared: -3679.080219755561\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Evaluate the model's performance using metrics like Mean Squared Error (MSE) and R-squared (R2)\n",
|
||||
"mse = mean_squared_error(y_test, y_pred)\n",
|
||||
"r2 = r2_score(y_test, y_pred)\n",
|
||||
"\n",
|
||||
"print(\"Mean Squared Error:\", mse)\n",
|
||||
"print(\"R-squared:\", r2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "7c2584f6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Predicted values: [ 726.14777024 1945.43688854 2292.37143629 1016.27642398 8.87683812\n",
|
||||
" 240.56563231]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Predict the columns for a new value of 'Amount spent'\n",
|
||||
"X_new = [[20]]\n",
|
||||
"predicted_values = model.predict([[20]])\n",
|
||||
"print(\"Predicted values:\", predicted_values[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "545f8034",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Results</th>\n",
|
||||
" <th>Reach</th>\n",
|
||||
" <th>Impressions</th>\n",
|
||||
" <th>Video_plays</th>\n",
|
||||
" <th>Link_clicks</th>\n",
|
||||
" <th>Engagement</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>726.14777</td>\n",
|
||||
" <td>1945.436889</td>\n",
|
||||
" <td>2292.371436</td>\n",
|
||||
" <td>1016.276424</td>\n",
|
||||
" <td>8.876838</td>\n",
|
||||
" <td>240.565632</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Results Reach Impressions Video_plays Link_clicks Engagement\n",
|
||||
"0 726.14777 1945.436889 2292.371436 1016.276424 8.876838 240.565632"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Convert the predicted values into a new DataFrame\n",
|
||||
"predicted_df = pd.DataFrame(data=predicted_values, columns=y.columns)\n",
|
||||
"predicted_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bf9c18ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
292
Numeral_Regression.ipynb
Normal file
292
Numeral_Regression.ipynb
Normal file
@@ -0,0 +1,292 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "1732817d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "2c67c4f6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Results</th>\n",
|
||||
" <th>Reach</th>\n",
|
||||
" <th>Impressions</th>\n",
|
||||
" <th>Video_plays</th>\n",
|
||||
" <th>Link_clicks</th>\n",
|
||||
" <th>Engagement</th>\n",
|
||||
" <th>Amount_Spent</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1534</td>\n",
|
||||
" <td>1534</td>\n",
|
||||
" <td>1535</td>\n",
|
||||
" <td>1448</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>62</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>8593</td>\n",
|
||||
" <td>8593</td>\n",
|
||||
" <td>10599</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>140</td>\n",
|
||||
" <td>571</td>\n",
|
||||
" <td>572</td>\n",
|
||||
" <td>457</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>140</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>13</td>\n",
|
||||
" <td>1396</td>\n",
|
||||
" <td>1479</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>13</td>\n",
|
||||
" <td>47</td>\n",
|
||||
" <td>23</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>37616</td>\n",
|
||||
" <td>5092</td>\n",
|
||||
" <td>40135</td>\n",
|
||||
" <td>39568</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>35711</td>\n",
|
||||
" <td>133</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Results Reach Impressions Video_plays Link_clicks Engagement \\\n",
|
||||
"0 1534 1534 1535 1448 0 62 \n",
|
||||
"1 8593 8593 10599 0 2 2 \n",
|
||||
"2 140 571 572 457 0 140 \n",
|
||||
"3 13 1396 1479 0 13 47 \n",
|
||||
"4 37616 5092 40135 39568 0 35711 \n",
|
||||
"\n",
|
||||
" Amount_Spent \n",
|
||||
"0 2 \n",
|
||||
"1 20 \n",
|
||||
"2 5 \n",
|
||||
"3 23 \n",
|
||||
"4 133 "
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data = {'Results': [1534,8593,140,13,37616,1060,694,64,17744],\n",
|
||||
" 'Reach': [1534,8593,571,1396,5092,6933,2008,2825,6154],\n",
|
||||
" 'Impressions': [1535,10599,572,1479,40135,11468,2435,5087,21332],\n",
|
||||
" 'Video_plays': [1448,0,457,0,39568,0,1225,0,20905],\n",
|
||||
" 'Link_clicks': [0,2,0,13,0,100,1,49,0],\n",
|
||||
" 'Engagement': [62,2,140,47,35711,1060,694,145,15604],\n",
|
||||
" 'Amount_Spent': [2,20,5,23,133,89,37,85,76]}\n",
|
||||
"\n",
|
||||
"df = pd.DataFrame(data)\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "096de0cb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.linear_model import LinearRegression\n",
|
||||
"from sklearn.metrics import mean_squared_error, r2_score"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "00517f34",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# X contains the features (Results, Reach, Impressions, Video Plays, Link clicks, and Post engagement)\n",
|
||||
"X = df[['Results', 'Reach', 'Impressions', 'Video_plays', 'Link_clicks', 'Engagement']]\n",
|
||||
"\n",
|
||||
"# y contains the target variable (Amount spent)\n",
|
||||
"y = df['Amount_Spent']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a56a0001",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Split dataset\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "e54736e3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"LinearRegression()"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Linear Regression model\n",
|
||||
"model = LinearRegression()\n",
|
||||
"model.fit(X_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2b5aa068",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Predict model using X_test\n",
|
||||
"y_pred = model.predict(X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "f9eb2f9e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Mean Squared Error: 10546.18825415638\n",
|
||||
"R-squared: -8.984556927011957\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Evaluate model performance\n",
|
||||
"mse = mean_squared_error(y_test, y_pred)\n",
|
||||
"r2 = r2_score(y_test, y_pred)\n",
|
||||
"\n",
|
||||
"print(\"Mean Squared Error:\", mse)\n",
|
||||
"print(\"R-squared:\", r2)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "0430cf6c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Predicted amount spent: -34.41443487262584\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\sang.yogi\\Anaconda3\\lib\\site-packages\\sklearn\\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Predict the amount spent for new data\n",
|
||||
"X_new = [[100, 2000, 5000, 1000, 50, 150]]\n",
|
||||
"predicted_amount_spent = model.predict(X_new)\n",
|
||||
"print(\"Predicted amount spent:\", predicted_amount_spent[0])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d17c4a06",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user