{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "410cdd47", "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "id": "f769b682", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.01390952774409444" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# T-Multiplier\n", "tstar = 1.96\n", "# P hat value\n", "p = .85\n", "# Number of observations\n", "n = 659\n", "\n", "# Calculate Standard Error\n", "se = np.sqrt((p * (1 - p))/n)\n", "se" ] }, { "cell_type": "code", "execution_count": 3, "id": "d77c95f1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.8227373256215749, 0.8772626743784251)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Lower confidence band\n", "lcb = p - tstar * se\n", "# Upper confidence band\n", "ucb = p + tstar * se\n", "# Show confidence bands\n", "(lcb, ucb)" ] }, { "cell_type": "code", "execution_count": 4, "id": "1d08b43b", "metadata": {}, "outputs": [], "source": [ "# Same process, using statsmodels library\n", "import statsmodels.api as sm" ] }, { "cell_type": "code", "execution_count": 5, "id": "41cb97c9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.8227378265796143, 0.8772621734203857)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Get confidence bands\n", "# n = observations\n", "# p = result of a survey \n", "sm.stats.proportion_confint(n * p, n)" ] }, { "cell_type": "code", "execution_count": null, "id": "4234b441", "metadata": {}, "outputs": [], "source": [ "# Try to import dataset\n", "import pandas as pd\n", "\n", "df = pd.read_csv(\"Cartwheeldata.csv\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "d03c3d4f", "metadata": {}, "outputs": [], "source": [ "# Mean of a column\n", "mean = df[\"CWDistance\"].mean()\n", "# Standard deviation of a column\n", "sd = df[\"CWDistance\"].std()\n", "# Rows of the dataframe\n", "n = len(df)" ] }, { "cell_type": "code", "execution_count": null, "id": "c52dddd2", "metadata": {}, "outputs": [], "source": [ "tstar = 2.064\n", "\n", "se = sd/np.sqrt(n)\n", "\n", "se" ] }, { "cell_type": "code", "execution_count": null, "id": "2dfbab7d", "metadata": {}, "outputs": [], "source": [ "lcb = mean - tstar * se\n", "ucb = mean + tstar * se\n", "(lcb, ucb)" ] }, { "cell_type": "code", "execution_count": null, "id": "649c18b1", "metadata": {}, "outputs": [], "source": [ "#..OR use statsmodels instead\n", "sm.stats.DescrStatsW(df[\"#ColumnName\"]).zconfint_mean()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }