mirror of
https://github.com/youronlydimwit/Data_ScienceUse_Cases.git
synced 2025-12-17 15:49:59 +01:00
293 lines
7.5 KiB
Plaintext
293 lines
7.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "1732817d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "2c67c4f6",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Results</th>\n",
|
|
" <th>Reach</th>\n",
|
|
" <th>Impressions</th>\n",
|
|
" <th>Video_plays</th>\n",
|
|
" <th>Link_clicks</th>\n",
|
|
" <th>Engagement</th>\n",
|
|
" <th>Amount_Spent</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1534</td>\n",
|
|
" <td>1534</td>\n",
|
|
" <td>1535</td>\n",
|
|
" <td>1448</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>62</td>\n",
|
|
" <td>2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>8593</td>\n",
|
|
" <td>8593</td>\n",
|
|
" <td>10599</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>20</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>140</td>\n",
|
|
" <td>571</td>\n",
|
|
" <td>572</td>\n",
|
|
" <td>457</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>140</td>\n",
|
|
" <td>5</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>13</td>\n",
|
|
" <td>1396</td>\n",
|
|
" <td>1479</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>13</td>\n",
|
|
" <td>47</td>\n",
|
|
" <td>23</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>37616</td>\n",
|
|
" <td>5092</td>\n",
|
|
" <td>40135</td>\n",
|
|
" <td>39568</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>35711</td>\n",
|
|
" <td>133</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Results Reach Impressions Video_plays Link_clicks Engagement \\\n",
|
|
"0 1534 1534 1535 1448 0 62 \n",
|
|
"1 8593 8593 10599 0 2 2 \n",
|
|
"2 140 571 572 457 0 140 \n",
|
|
"3 13 1396 1479 0 13 47 \n",
|
|
"4 37616 5092 40135 39568 0 35711 \n",
|
|
"\n",
|
|
" Amount_Spent \n",
|
|
"0 2 \n",
|
|
"1 20 \n",
|
|
"2 5 \n",
|
|
"3 23 \n",
|
|
"4 133 "
|
|
]
|
|
},
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"data = {'Results': [1534,8593,140,13,37616,1060,694,64,17744],\n",
|
|
" 'Reach': [1534,8593,571,1396,5092,6933,2008,2825,6154],\n",
|
|
" 'Impressions': [1535,10599,572,1479,40135,11468,2435,5087,21332],\n",
|
|
" 'Video_plays': [1448,0,457,0,39568,0,1225,0,20905],\n",
|
|
" 'Link_clicks': [0,2,0,13,0,100,1,49,0],\n",
|
|
" 'Engagement': [62,2,140,47,35711,1060,694,145,15604],\n",
|
|
" 'Amount_Spent': [2,20,5,23,133,89,37,85,76]}\n",
|
|
"\n",
|
|
"df = pd.DataFrame(data)\n",
|
|
"df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "096de0cb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.linear_model import LinearRegression\n",
|
|
"from sklearn.metrics import mean_squared_error, r2_score"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "00517f34",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# X contains the features (Results, Reach, Impressions, Video Plays, Link clicks, and Post engagement)\n",
|
|
"X = df[['Results', 'Reach', 'Impressions', 'Video_plays', 'Link_clicks', 'Engagement']]\n",
|
|
"\n",
|
|
"# y contains the target variable (Amount spent)\n",
|
|
"y = df['Amount_Spent']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "a56a0001",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Split dataset\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "e54736e3",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"LinearRegression()"
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Linear Regression model\n",
|
|
"model = LinearRegression()\n",
|
|
"model.fit(X_train, y_train)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "2b5aa068",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Predict model using X_test\n",
|
|
"y_pred = model.predict(X_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "f9eb2f9e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Mean Squared Error: 10546.18825415638\n",
|
|
"R-squared: -8.984556927011957\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Evaluate model performance\n",
|
|
"mse = mean_squared_error(y_test, y_pred)\n",
|
|
"r2 = r2_score(y_test, y_pred)\n",
|
|
"\n",
|
|
"print(\"Mean Squared Error:\", mse)\n",
|
|
"print(\"R-squared:\", r2)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "0430cf6c",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Predicted amount spent: -34.41443487262584\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"C:\\Users\\sang.yogi\\Anaconda3\\lib\\site-packages\\sklearn\\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
|
|
" warnings.warn(\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Predict the amount spent for new data\n",
|
|
"X_new = [[100, 2000, 5000, 1000, 50, 150]]\n",
|
|
"predicted_amount_spent = model.predict(X_new)\n",
|
|
"print(\"Predicted amount spent:\", predicted_amount_spent[0])\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d17c4a06",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|