# -*- coding: utf-8 -*-
"""
Created on Sun Dec 26 12:20:40 2021

@author: Sim
"""
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols
import scipy.stats as st

df = pd.read_csv(r'D:\HTEX\Pythonbk\codesdata\WorldHappinessData2015s.csv')
df.sort_values(by='GDP')

plt.plot(df['GDP'], df['happiness'], '*')
#----------------------------------------------
lm1 = ols('happiness ~ GDP', data=df).fit()

print(lm1.params)
pred1 = lm1.fittedvalues
plt.plot(df['GDP'], pred1)
#------------------------------------------------
print(lm1.summary())

corr = df.corr().iloc[0,1]
R2 = lm1.rsquared
print('상관계수 r:', corr, 'R^2: ', R2, 'r^2', corr**2)
#--------------------------------------------------
aovtable = sm.stats.anova_lm(lm1)
print(aovtable)
print('F-값: ', lm1.fvalue)
#----------------------------------------------------
resid1 = lm1.resid
print('SSE by 잔차: ',sum(resid1**2))
#---------------------------------------------------
n = df.shape[0]  # 자료의 개수
ave = df.mean()  # 평균계산

Sxx = sum(df['GDP']**2) - n*ave[1]**2
Syy = sum(df['happiness']**2) - n*ave[0]**2
b1_cor = corr*(Syy**.5)/(Sxx**.5)
b1_ols = lm1.params[1]
print('두개의 b1 계산 비교: ', b1_cor, b1_ols)
#----------------------------------------
pred2 = lm1.predict(pd.DataFrame({'GDP': [0.7, 1, 1.3]}))
print('예측값 2: ', pred2)
#------------------------------------------------
print(lm1.conf_int(alpha=0.01))
#-------------------------------------------
t0 = (lm1.params[1]-2)/lm1.bse[1]
print(t0)
pval = 1-st.t.cdf(t0, lm1.nobs-2)
print(pval)
#=========================

