# -*- coding: utf-8 -*-
"""
Created on Tue Dec  7 14:22:33 2021

@author: Sim
"""
import pandas as pd, numpy as np
from statsmodels.formula.api import ols
import statsmodels.api as sm
import scipy.stats as st, matplotlib.pyplot as plt
import statsmodels.stats.multicomp as mc

df = pd.DataFrame({'noAd':[13.891, 10.172, 13.891, 12.078, 15.203, 10.922, 
                           18.156,  8.094, 12.032, 16.782,  9.125, 14.265],
                   '3by3':[ 9.297,  8.859,  9.843,  4.703, 11.703,  9.078,
                            8.625,  7.437,  6.125, 10.625,  6.125,  7.657],
                   'mtrx':[ 8.610,  9.422,  8.000,  5.485,  9.531,  7.563,
                           7.234 ,  5.844,  7.891,  8.906,  5.266,  5.688],
                   'xoss':[16.141,  7.219,  9.438, 11.719, 18.953, 11.406,
                           13.797, 10.625, 12.032, 11.484,  9.688,   8.39]})
dfT = df.T # long format
col = ['time'+ str(i) for i in range(12)]
dfT['trt'] = dfT.index
dfT.columns = col + ['trt']
dfl = pd.wide_to_long(dfT, stubnames='time', i='trt', j='obs')
dfl = dfl.reset_index()
print(dfl)

res1 = ols('time~trt', data=dfl).fit()
aov_table1 = sm.stats.anova_lm(res1, type=3)
print(aov_table1)
#------------------------------------------------------------------
print(dfl.groupby('trt')['time'].mean())
print(dfl.groupby('trt')['time'].var(ddof=1))
df.boxplot(column=['noAd', '3by3','mtrx', 'xoss'])
#-------------------------------------------------------------------
comp = mc.MultiComparison(dfl['time'],dfl['trt'])
hsd = comp.tukeyhsd(0.05)
print(hsd.summary())
#-------------------------------------------------
fig, ax = plt.subplots(1,1)
hsd.plot_simultaneous(ax=ax)
#-------------------------------------------------------------------                  
def contrast(dfl, ctrt, indep):  # assumes long format data 
  mm = dfl.groupby([indep]).mean()  # group mean
  vv = dfl.groupby([indep]).var(ddof=1) # group std
  nn = dfl.groupby([indep]).count()  # group nobs
  N = np.sum(nn)       # total nobs
  dfE = N-len(mm)      # 자유도
  SSE = np.sum((nn-1)*vv) 
  MSE = SSE/dfE
  statist = np.sum(mm*ctrt) / np.sqrt(MSE*np.sum(ctrt**2/nn))
  pvalue = (1-st.t.cdf(np.abs(statist), dfE))*2 # for two-sided test
  return([statist[0], pvalue[0]])
#------------------------------------------------------------------
ctrt = np.array([-1,-1,3,-1]).reshape(-1,1)  
# groupby에 의해 생성된 값이 열벡터로 내부계산에서 차원 일치를 위해 열로
# 또한 groupy는 값에 의한 오름차순이므로 2by3, mtrx, noAd, xoss 순서
t1, p1 = contrast(dfl[['time', 'trt']], ctrt, 'trt')
print(t1, p1)
#--------------------------------------------------------------------
ctrt = np.array([1,-1,-1,1]).reshape(-1,1)  
# groupby에 의해 생성된 값이 열벡터로 내부계산에서 차원 일치를 위해 열로
t2, p2 = contrast(dfl[['time', 'trt']], ctrt, 'trt')
print(t2, p2)

