# -*- coding: utf-8 -*-
"""
Created on Tue Jan  4 12:14:45 2022

@author: Sim
"""
def test_var_1samp(vv, nn, value, alternative='two-sided'):
    import scipy.stats as st
    
    df = nn - 1 
    chi0 = (df*vv)/value
   
    if alternative == 'greater':
        pval = 1 - st.chi2.cdf(chi0, df)
    elif alternative == 'less':
        pval = st.chi2.cdf(chi0,df)
    else:
        pval = st.chi2.cdf(chi0,df)
        if pval < 0.5:
            pval = pval * 2
        else:
            pval = (1-pval)*2
    
    return(chi0, pval)

def ci_var_1samp(vv, nn, level=95):
    import scipy.stats as st
    
    df = nn - 1
    lbd = df*vv/st.chi2.ppf(1-(100-level)/100/2,df)
    ubd = df*vv/st.chi2.ppf((100-level)/100/2,df)
    return((lbd,ubd))
    
def test_var_2samp(var1, n1, var2, n2, alternative='two-sided'):
    import scipy.stats as st
    
    df1 = n1 - 1; df2 = n2 - 1 
    F0 = var1/var2
   
    if alternative == 'greater':
        pval = 1 - st.f.cdf(F0, df1, df2)
    elif alternative == 'less':
        pval = st.f.cdf(F0,df2, df2)
    else:
        pval = st.f.cdf(F0,df1,df2)
        if pval < 0.5:
            pval = pval * 2
        else:
            pval = (1-pval)*2
    
    return(F0, pval)

def ci_var_2samp(var1, n1, var2, n2, level=95):
    import scipy.stats as st
    
    F0 = var1/var2
    df1 = n1 - 1; df2 = n2 - 1 
    
    lbd = F0/st.f.ppf(1-(100-level)/100/2, df1, df2)
    ubd = F0/st.f.ppf((100-level)/100/2, df1, df2)
    return((lbd,ubd))

import numpy as np, pandas as pd

df = pd.read_csv(r'D:\HTEX\Pythonbk\codesdata\bmi.csv')
df['bmi'] = df['weight']/(df['height']/100)**2

vbmi = np.var(df['bmi'], ddof=1)
lbd, ubd = ci_var_1samp(vbmi, df.shape[0])
print(vbmi, lbd, ubd)
vbmi = np.var(df['bmi'], ddof=1)
lbd, ubd = ci_var_1samp(vbmi, df.shape[0], level=99)
print(vbmi, lbd, ubd)

#-----------------------------------------------------------
mbmi = df.groupby('gender').get_group('M')['bmi']
fbmi = df.groupby('gender').get_group('F')['bmi']
F, p = test_var_2samp(np.var(mbmi, ddof=1), len(mbmi), 
                      np.var(fbmi, ddof=1), len(fbmi))
print(F,p)
#------------------------------------------------------
lbd, ubd = ci_var_2samp(np.var(mbmi, ddof=1), len(mbmi), 
                        np.var(fbmi, ddof=1), len(fbmi))
print(lbd, ubd)
#----------------------------------------------------------
ybmi = df.groupby('marriage').get_group('Y')['bmi']
nbmi = df.groupby('marriage').get_group('N')['bmi']
F, p = test_var_2samp(np.var(ybmi, ddof=1), len(ybmi), 
                      np.var(nbmi, ddof=1), len(nbmi))
print(F,p)

lbd, ubd = ci_var_2samp(np.var(ybmi, ddof=1), len(ybmi), 
                        np.var(nbmi, ddof=1), len(nbmi))
print(lbd, ubd)
