# -*- coding: utf-8 -*-
"""
Created on Fri Jan  7 10:24:24 2022

@author: Sim
"""
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd

link = "http://jupiter.hallym.ac.kr/ftpdata/data/crime.html"
urif = urlopen(link)
myfile = urif.read()
urif.close()
myfile = myfile.decode('euc-kr') # 또는 ansi

soup = BeautifulSoup(myfile, "html.parser")
xx = soup.xmp.text # 데이터는 <XMP> 태그에
xx = xx.split('\n') # 줄바꿈ㅇ; \n으로 데이터에 
xx.pop(0), xx.pop() # 첫줄과 마지막 줄의 빈칸 제거(<XMP> 태그 뒤에 Enter)
colnames = xx.pop(0) # 남은 첫줄은 변수이름
colnames = colnames.split()
df = pd.DataFrame([row.split() for row in xx])
df.columns = colnames # 변수이름
for i in range(1,len(colnames)): # 자료가 모두 str
    df[colnames[i]] = df[colnames[i]].astype(float)

print(df.corr())