Files
Data_ScienceUse_Cases/Numeral_Regression.ipynb
2023-09-05 10:19:56 +07:00

293 lines
7.5 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "1732817d",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "2c67c4f6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Results</th>\n",
" <th>Reach</th>\n",
" <th>Impressions</th>\n",
" <th>Video_plays</th>\n",
" <th>Link_clicks</th>\n",
" <th>Engagement</th>\n",
" <th>Amount_Spent</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1534</td>\n",
" <td>1534</td>\n",
" <td>1535</td>\n",
" <td>1448</td>\n",
" <td>0</td>\n",
" <td>62</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>8593</td>\n",
" <td>8593</td>\n",
" <td>10599</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>140</td>\n",
" <td>571</td>\n",
" <td>572</td>\n",
" <td>457</td>\n",
" <td>0</td>\n",
" <td>140</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>1396</td>\n",
" <td>1479</td>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" <td>47</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>37616</td>\n",
" <td>5092</td>\n",
" <td>40135</td>\n",
" <td>39568</td>\n",
" <td>0</td>\n",
" <td>35711</td>\n",
" <td>133</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Results Reach Impressions Video_plays Link_clicks Engagement \\\n",
"0 1534 1534 1535 1448 0 62 \n",
"1 8593 8593 10599 0 2 2 \n",
"2 140 571 572 457 0 140 \n",
"3 13 1396 1479 0 13 47 \n",
"4 37616 5092 40135 39568 0 35711 \n",
"\n",
" Amount_Spent \n",
"0 2 \n",
"1 20 \n",
"2 5 \n",
"3 23 \n",
"4 133 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = {'Results': [1534,8593,140,13,37616,1060,694,64,17744],\n",
" 'Reach': [1534,8593,571,1396,5092,6933,2008,2825,6154],\n",
" 'Impressions': [1535,10599,572,1479,40135,11468,2435,5087,21332],\n",
" 'Video_plays': [1448,0,457,0,39568,0,1225,0,20905],\n",
" 'Link_clicks': [0,2,0,13,0,100,1,49,0],\n",
" 'Engagement': [62,2,140,47,35711,1060,694,145,15604],\n",
" 'Amount_Spent': [2,20,5,23,133,89,37,85,76]}\n",
"\n",
"df = pd.DataFrame(data)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "096de0cb",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.metrics import mean_squared_error, r2_score"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "00517f34",
"metadata": {},
"outputs": [],
"source": [
"# X contains the features (Results, Reach, Impressions, Video Plays, Link clicks, and Post engagement)\n",
"X = df[['Results', 'Reach', 'Impressions', 'Video_plays', 'Link_clicks', 'Engagement']]\n",
"\n",
"# y contains the target variable (Amount spent)\n",
"y = df['Amount_Spent']"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a56a0001",
"metadata": {},
"outputs": [],
"source": [
"# Split dataset\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e54736e3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression()"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Linear Regression model\n",
"model = LinearRegression()\n",
"model.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "2b5aa068",
"metadata": {},
"outputs": [],
"source": [
"# Predict model using X_test\n",
"y_pred = model.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "f9eb2f9e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mean Squared Error: 10546.18825415638\n",
"R-squared: -8.984556927011957\n"
]
}
],
"source": [
"# Evaluate model performance\n",
"mse = mean_squared_error(y_test, y_pred)\n",
"r2 = r2_score(y_test, y_pred)\n",
"\n",
"print(\"Mean Squared Error:\", mse)\n",
"print(\"R-squared:\", r2)\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "0430cf6c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predicted amount spent: -34.41443487262584\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\sang.yogi\\Anaconda3\\lib\\site-packages\\sklearn\\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"# Predict the amount spent for new data\n",
"X_new = [[100, 2000, 5000, 1000, 50, 150]]\n",
"predicted_amount_spent = model.predict(X_new)\n",
"print(\"Predicted amount spent:\", predicted_amount_spent[0])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d17c4a06",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}