# -*- coding: utf-8 -*-
"""
Created on Tue Dec  7 12:59:20 2021

@author: Sim
"""
import pandas as pd
import numpy as np
import scipy.stats as st
import statsmodels.stats.multicomp as mc

wdf = pd.DataFrame([ [1, 90, 97, 98, 95], [2, 88, 92, 90, 93],
                    [3, 93, 97, 97, 94] ], 
                    columns=['trt', 'obs1', 'obs2', 'obs3','obs4'])

F1, p1 = st.f_oneway(wdf['obs1'], wdf['obs2'], wdf['obs3'], wdf['obs4'])
print(F1, p1)

ldf = pd.DataFrame({'trt':['1', '2', '3']*4,
          'score': [90, 88, 93, 97, 92, 97, 98, 90, 97, 95, 93, 94]})
          

comp = mc.MultiComparison(ldf['score'], ldf['trt'])

hsd = comp.tukeyhsd()
print('Tukey HSD:\n', hsd.summary())

def contrast(dfl, ctrt, indep):  # assumes long format data 
  mm = dfl.groupby([indep]).mean()  # group mean
  vv = dfl.groupby([indep]).var(ddof=1) # group std
  nn = dfl.groupby([indep]).count()  # group nobs
  N = np.sum(nn)       # total nobs
  dfE = N-len(mm)      # 자유도
  SSE = np.sum((nn-1)*vv) 
  MSE = SSE/dfE
  statist = np.sum(mm*ctrt) / np.sqrt(MSE*np.sum(ctrt**2/nn))
  pvalue = (1-st.t.cdf(np.abs(statist), dfE))*2 # for two-sided test
  return([statist[0], pvalue[0]])

#-----------------  
ctrt = np.array([1,-2,1]).reshape(-1,1)  
# groupby에 의해 생성된 값이 열벡터로 내부계산에서 차원 일치를 위해 열로
t1, p1 = contrast(ldf, ctrt, 'trt')
print(t1, p1)

