Data_ScienceUse_Cases/Confidence_Interval.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "410cdd47",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f769b682",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.01390952774409444"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# T-Multiplier\n",
    "tstar = 1.96\n",
    "# P hat value\n",
    "p = .85\n",
    "# Number of observations\n",
    "n = 659\n",
    "\n",
    "# Calculate Standard Error\n",
    "se = np.sqrt((p * (1 - p))/n)\n",
    "se"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d77c95f1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.8227373256215749, 0.8772626743784251)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Lower confidence band\n",
    "lcb = p - tstar * se\n",
    "# Upper confidence band\n",
    "ucb = p + tstar * se\n",
    "# Show confidence bands\n",
    "(lcb, ucb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1d08b43b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Same process, using statsmodels library\n",
    "import statsmodels.api as sm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "41cb97c9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.8227378265796143, 0.8772621734203857)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Get confidence bands\n",
    "# n = observations\n",
    "# p = result of a survey \n",
    "sm.stats.proportion_confint(n * p, n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4234b441",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Try to import dataset\n",
    "import pandas as pd\n",
    "\n",
    "df = pd.read_csv(\"Cartwheeldata.csv\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d03c3d4f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Mean of a column\n",
    "mean = df[\"CWDistance\"].mean()\n",
    "# Standard deviation of a column\n",
    "sd = df[\"CWDistance\"].std()\n",
    "# Rows of the dataframe\n",
    "n = len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c52dddd2",
   "metadata": {},
   "outputs": [],
   "source": [
    "tstar = 2.064\n",
    "\n",
    "se = sd/np.sqrt(n)\n",
    "\n",
    "se"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2dfbab7d",
   "metadata": {},
   "outputs": [],
   "source": [
    "lcb = mean - tstar * se\n",
    "ucb = mean + tstar * se\n",
    "(lcb, ucb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "649c18b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "#..OR use statsmodels instead\n",
    "sm.stats.DescrStatsW(df[\"#ColumnName\"]).zconfint_mean()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}