Files
Data_ScienceUse_Cases/Multi-output_Regression.ipynb
2023-09-05 10:19:56 +07:00

346 lines
9.3 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "05e4b4c5",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "ef0e3a1e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Results</th>\n",
" <th>Reach</th>\n",
" <th>Impressions</th>\n",
" <th>Video_plays</th>\n",
" <th>Link_clicks</th>\n",
" <th>Engagement</th>\n",
" <th>Amount_Spent</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1534</td>\n",
" <td>1534</td>\n",
" <td>1535</td>\n",
" <td>1448</td>\n",
" <td>0</td>\n",
" <td>62</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>8593</td>\n",
" <td>8593</td>\n",
" <td>10599</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>140</td>\n",
" <td>571</td>\n",
" <td>572</td>\n",
" <td>457</td>\n",
" <td>0</td>\n",
" <td>140</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>1396</td>\n",
" <td>1479</td>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" <td>47</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>37616</td>\n",
" <td>5092</td>\n",
" <td>40135</td>\n",
" <td>39568</td>\n",
" <td>0</td>\n",
" <td>35711</td>\n",
" <td>133</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Results Reach Impressions Video_plays Link_clicks Engagement \\\n",
"0 1534 1534 1535 1448 0 62 \n",
"1 8593 8593 10599 0 2 2 \n",
"2 140 571 572 457 0 140 \n",
"3 13 1396 1479 0 13 47 \n",
"4 37616 5092 40135 39568 0 35711 \n",
"\n",
" Amount_Spent \n",
"0 2 \n",
"1 20 \n",
"2 5 \n",
"3 23 \n",
"4 133 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = {'Results': [1534,8593,140,13,37616,1060,694,64,17744],\n",
" 'Reach': [1534,8593,571,1396,5092,6933,2008,2825,6154],\n",
" 'Impressions': [1535,10599,572,1479,40135,11468,2435,5087,21332],\n",
" 'Video_plays': [1448,0,457,0,39568,0,1225,0,20905],\n",
" 'Link_clicks': [0,2,0,13,0,100,1,49,0],\n",
" 'Engagement': [62,2,140,47,35711,1060,694,145,15604],\n",
" 'Amount_Spent': [2,20,5,23,133,89,37,85,76]}\n",
"\n",
"df = pd.DataFrame(data)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "b7bba8a6",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"from sklearn.multioutput import MultiOutputRegressor\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.metrics import mean_squared_error, r2_score"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "764ebe28",
"metadata": {},
"outputs": [],
"source": [
"# X contains the features (Amount spent)\n",
"X = df['Amount_Spent']\n",
"\n",
"# y contains the target variable (Results, Reach, Impressions, Video Plays, Link clicks, and Post engagement)\n",
"y = df[['Results', 'Reach', 'Impressions', 'Video_plays', 'Link_clicks', 'Engagement']]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "fa4f048c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MultiOutputRegressor(estimator=LinearRegression())"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Split the data into training and testing sets\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n",
"# Create a Linear Regression model and wrap it in MultiOutputRegressor\n",
"model = MultiOutputRegressor(LinearRegression())\n",
"\n",
"# Train the model with the training data\n",
"model.fit(X_train.values.reshape(-1, 1), y_train)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "53fda5c4",
"metadata": {},
"outputs": [],
"source": [
"# Predict the columns using the test data\n",
"y_pred = model.predict(X_test.values.reshape(-1, 1))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "2fcd8845",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mean Squared Error: 100374744.3348547\n",
"R-squared: -3679.080219755561\n"
]
}
],
"source": [
"# Evaluate the model's performance using metrics like Mean Squared Error (MSE) and R-squared (R2)\n",
"mse = mean_squared_error(y_test, y_pred)\n",
"r2 = r2_score(y_test, y_pred)\n",
"\n",
"print(\"Mean Squared Error:\", mse)\n",
"print(\"R-squared:\", r2)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "7c2584f6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predicted values: [ 726.14777024 1945.43688854 2292.37143629 1016.27642398 8.87683812\n",
" 240.56563231]\n"
]
}
],
"source": [
"# Predict the columns for a new value of 'Amount spent'\n",
"X_new = [[20]]\n",
"predicted_values = model.predict([[20]])\n",
"print(\"Predicted values:\", predicted_values[0])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "545f8034",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Results</th>\n",
" <th>Reach</th>\n",
" <th>Impressions</th>\n",
" <th>Video_plays</th>\n",
" <th>Link_clicks</th>\n",
" <th>Engagement</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>726.14777</td>\n",
" <td>1945.436889</td>\n",
" <td>2292.371436</td>\n",
" <td>1016.276424</td>\n",
" <td>8.876838</td>\n",
" <td>240.565632</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Results Reach Impressions Video_plays Link_clicks Engagement\n",
"0 726.14777 1945.436889 2292.371436 1016.276424 8.876838 240.565632"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Convert the predicted values into a new DataFrame\n",
"predicted_df = pd.DataFrame(data=predicted_values, columns=y.columns)\n",
"predicted_df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bf9c18ec",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}