# -*- coding: utf-8 -*-
"""
Created on Mon May 31 16:48:38 2021

@author: Sim
"""

import pandas as pd
import numpy as np

df = pd.read_csv(r'D:\HTEX\Pythonbk\codesdata\bmi.csv')

ftab0 = df['gender'].value_counts()
print("Freq Tab 0 \n", ftab0)

ftab1 = df['gender'].value_counts(normalize=True, sort=True, ascending=True)
print("Freq Tab 1 \n", ftab1)

ftab2 = df['height'].value_counts(bins=[-np.Inf, 150, 155, 160, 165, 170, 175, np.Inf], 
                                  sort=False)
print("Freq Tab 2 \n", ftab2)

###############################################################

ftab3 = pd.crosstab(index=df['gender'], columns='count')
print(ftab3)

ftab4 = pd.crosstab(index='count', columns=df['gender'])
print(ftab4)


################# Cross Tables ##############################

ctab1 = pd.crosstab(index=df['gender'], columns=df['religion'])
print(ctab1)

ctab2 = pd.crosstab(index=df['gender'], columns=df['religion'], margins=True)
print(ctab2)

pd.crosstab(index=df['gender'], columns=df['religion'], margins=True, normalize='all')

 
ctab3 = ctab2 / ctab2.loc['All','All']
print(ctab3)

pd.crosstab(index=df['gender'], columns=df['religion'], margins=True, normalize='columns')

ctab4 = ctab2 /ctab2.loc['All',]
print(ctab4)

################## mean/var from freq ##############

x = np.linspace(147.5, 177.5, 7)
wmean = np.sum(x * ftab2)/np.sum(ftab2)
print(wmean)
wvar = np.sum( (x-wmean)**2*ftab2 ) / (np.sum(ftab2)-1)
print(wvar)
print(df['height'].mean(), df['height'].var(ddof=1))





