# -*- coding: utf-8 -*-
"""
Created on Wed Jan  5 17:27:08 2022

@author: Sim
"""
import pandas as pd, numpy as np 
from statsmodels.formula.api import ols
import statsmodels.api as sm
import matplotlib.pyplot as plt
import statsmodels.stats.multicomp as mc
import scipy.stats as st

drugA = [13,  10,  14,  9, 12, 11,  7]
drugB = [18, 10, 16, 12, 19, 14, 13, 12]
drugC = [8,  9,  4,  6, 13,  8, 12, 10, 2]

#dfw = pd.DataFrame({'drugA': drugA, 'drugB':drugB, 'drugC':drugC})

trt = ['drugA']*len(drugA) + ['drugB']*len(drugB) + ['drugC']*len(drugC)
dfl = pd.DataFrame({'trt': trt, 'response': drugA+drugB+drugC})
#----------------------------------------------------------------
res1 = ols('response~trt', data=dfl).fit()
aov_table1 = sm.stats.anova_lm(res1, type=3)
print(aov_table1)
print(dfl.groupby('trt').mean())
print(dfl.groupby('trt').var(ddof=1))
#----------------------------------------------------------------
plt.boxplot([drugA, drugB, drugC])
plt.show()
#-----------------------------------------------------------
comp = mc.MultiComparison(dfl['response'],dfl['trt'])
hsd = comp.tukeyhsd(0.05)
print(hsd.summary())
#-------------------------------------------------
fig, ax = plt.subplots(1,1)
hsd.plot_simultaneous(ax=ax)
#-------------------------------------------------
bonf, a, b = comp.allpairtest(st.ttest_ind, method='bonf')
print(bonf)
#------------------------------------------------------

def contrast2(dfl, ctrt, indep, alternative='two-sided', alpha=0.05):  
  mm = dfl.groupby([indep]).mean()  # group mean
  vv = dfl.groupby([indep]).var(ddof=1) # group std
  nn = dfl.groupby([indep]).count()  # group nobs
  N = np.sum(nn)       # total nobs
  dfE = N-len(mm)      # 자유도
  SSE = np.sum((nn-1)*vv) 
  MSE = SSE/dfE
  
  se = np.sqrt(MSE*np.sum(ctrt**2/nn))
  cntrt = np.sum(mm*ctrt)
  lci = cntrt - st.t.ppf(1-alpha/2, dfE)*se
  uci = cntrt + st.t.ppf(1-alpha/2, dfE)*se
  
  statist = cntrt / se
  if alternative == 'greater':
    pvalue = 1-st.t.cdf(np.abs(statist), dfE)
  elif alternative == 'less':
    pvalue = st.t.cdf(np.abs(statist), dfE)
  else:
    pvalue = (1-st.t.cdf(np.abs(statist), dfE))*2 
  return([statist[0], pvalue[0], (lci[0], uci[0])])
#---------------------------------------------------------------------  
ctrt = np.array([1, 1, -2]).reshape(-1,1)  
# groupby에 의해 생성된 값이 열벡터로 내부계산에서 차원 일치를 위해 열로
res1 = contrast2(dfl, ctrt, 'trt')
print(res1)
#---------------------------------------------------------------------
t0, p = st.ttest_ind(drugA, drugB)
print(t0, p)
ctrt2 = np.array([1, -1, 0]).reshape(-1,1)  
res2 = contrast2(dfl, ctrt2, 'trt')
print(res2)

