카테고리 없음
데이터 추출 해버리기 5 (DB 데이터 추가) - 작성중
웅수몬
2023. 2. 13. 09:06
import pymysql
import json
import pandas as pd
import numpy as np
import datetime
# 코로나 백신 데이터 가져오기
from data_scraping.corona_vaccine_data_scraping import get_vaccine_scraping
# 코로나 데이터 가져오기
from data_scraping.corona_data_scraping import get_corona_scraping
# 코로나 관련 api 가져오기
from data_scraping.corona_api import get_level_api, get_text_api, get_exchange_api
# 대사관 데이터 가져오기
from data_scraping.embassy_data_scraping import get_embassy_data
# 안전 데이터, 범죄 데이터 가져오기
from data_scraping.safety_scraping import get_safety_data
from data_scraping.terror_scraping import get_terror_data
# Safety_Score 데이터 가공 함수
from score import SafetyScore
class DB_Update:
def __init__(self):
print(str(datetime.datetime.now()) + ": db update - background")
# 기능 1. corona vaccine data update, 주기 : 24시간
def update_Corona_Vaccine_Data(self):
# 1. scraping
vaccine_data = get_vaccine_scraping()
# 2. iso 컬럼 추가
with open('./json_file/country_ISO.json', 'r') as f:
iso_list = json.load(f)
for i in range(len(vaccine_data)):
vaccine_data[i]["iso_code"] = vaccine_data[i]["country"]
for j in range(len(iso_list)):
if vaccine_data[i]["country"] == iso_list[j]["Name"]:
vaccine_data[i]["iso_code"] = iso_list[j]["Code"]
# 3. db 연결
conn = pymysql.connect(host="localhost", user="coalarm", password="v4SxXqsLz", db="coalarm", charset="utf8")
cur = conn.cursor()
cur.execute('TRUNCATE TABLE Corona_Vaccine_Data') # 테이블 레코드 비우기
# 4. 해당 테이블에 데이터 추가
for i in range(len(vaccine_data)):
cur.execute('INSERT INTO Corona_Vaccine_Data VALUES("{0}", "{1}", "{2}", "{3}")'.format(\
vaccine_data[i]["country"], \
vaccine_data[i]["iso_code"], \
float(vaccine_data[i]["partly"]),\
float(vaccine_data[i]["fully"])))
conn.commit()
conn.close()
print(str(datetime.datetime.now()) + ": corona_vaccine_date table update complete")
# 기능 2. corona data update, 주기 : 1시간
def update_Corona_Data(self):
# 1. scraping
get_corona_data = get_corona_scraping()
# 2. 중복 나라 제거
country_list = []
corona_data = []
for i in range(len(get_corona_data)):
if get_corona_data[i]["country_name"] in country_list:
continue
country_list.append(get_corona_data[i]["country_name"])
corona_data.append(get_corona_data[i])
# 3. iso, continent 컬럼 추가
with open('./json_file/country_ISO.json', 'r') as f:
iso_data = json.load(f)
for i in range(len(corona_data)):
corona_data[i]['country_iso_alp2'] = "없음"
for j in iso_data:
if corona_data[i]['country_name'] == j['Name']:
corona_data[i]['country_iso_alp2'] = j['Code']
with open('./json_file/continent.json', 'r') as f:
continent = json.load(f)
for i in range(len(corona_data)):
corona_data[i]["continent"] = "없음"
for j in continent:
if corona_data[i]["country_iso_alp2"] == j["iso_code"]:
corona_data[i]["continent"] = j["continent"]
# 4. db 연결
conn = pymysql.connect(host="localhost", user="coalarm", password="v4SxXqsLz", db="coalarm", charset="utf8")
cur = conn.cursor()
cur.execute('TRUNCATE TABLE Corona_Data') # 테이블 레코드 비우기
# 5. 해당 테이블에 데이터 추가
for i in range(len(corona_data)):
cur.execute('INSERT INTO Corona_Data VALUES("{0}", "{1}", "{2}", "{3}", "{4}", "{5}", "{6}", "{7}", "{8}", "{9}", "{10}", "{11}")'.format(\
corona_data[i]["country_name"], \
corona_data[i]["country_iso_alp2"], \
corona_data[i]["continent"], \
int(corona_data[i]["total_confirmed"]), \
int(corona_data[i]["new_confirmed"]), \
int(corona_data[i]["total_deaths"]), \
int(corona_data[i]["new_deaths"]), \
int(corona_data[i]["total_recovered"]), \
int(corona_data[i]["new_recovered"]), \
float(corona_data[i]["recovery"]), \
float(corona_data[i]["fatality"]), \
float(corona_data[i]["incidence"])))
conn.commit()
conn.close()
print(str(datetime.datetime.now()) + ": corona_data table update complete")
# 기능 3. api date update, 주기 : 24시간
def update_Api_Data(self):
# api data
# 1. api 호출
text_data = get_text_api()
get_level_data = get_level_api()
# 2. 중복 나라 제거
iso_list = []
level_data = []
for i in range(len(get_level_data)):
if get_level_data[i]["country_iso_alp2"] in iso_list:
continue
iso_list.append(get_level_data[i]["country_iso_alp2"])
level_data.append(get_level_data[i]) # level_data key : ["country_nm", "alarm_lvl", "country_iso_alp2", "country_eng_nm"]
# 3. 기반 데이터(country_kr_ISO.json)에 api 데이터 병합
with open('./json_file/country_kr_ISO.json', 'r') as f:
json_country_kr = json.load(f) # json_country_kr key : ["country_kr", "iso_code"]
dict_list = []
for i in json_country_kr:
i["alarm_lvl"] = -1
i["country_eng_nm"] = "없음"
for r in level_data:
if i["iso_code"] == r["country_iso_alp2"]:
if r['alarm_lvl'] == None:
i['alarm_lvl'] = 5
else:
i["alarm_lvl"] = r["alarm_lvl"]
i["country_eng_nm"] = r["country_eng_nm"]
dict_list.append({'country_name' : i["country_eng_nm"], 'country_iso_alp2' : i["iso_code"], \
'alarm_lvl' : i["alarm_lvl"], "country_kr" : i["country_kr"]})
for i in dict_list:
i["notice"] = None
for j in text_data:
if i['country_iso_alp2'] == j['country_iso_alp2']:
i["notice"] = j["notice"].replace("'", "`").replace('"', "`") # 따옴표들 백틱으로 변경
# 4. db 연결
conn = pymysql.connect(host="localhost", user="coalarm", password="v4SxXqsLz", db="coalarm", charset="utf8")
cur = conn.cursor()
cur.execute('TRUNCATE TABLE Api_Data') # 테이블 레코드 비우기
# 5. 해당 테이블에 데이터 추가
for i in range(len(dict_list)):
cur.execute('INSERT INTO Api_Data VALUES("{0}", "{1}", "{2}", "{3}", "{4}")'.format(\
dict_list[i]["country_name"], \
dict_list[i]["country_iso_alp2"], \
dict_list[i]["country_kr"], \
float(dict_list[i]["alarm_lvl"]),\
dict_list[i]["notice"]))
conn.commit()
conn.close()
print(str(datetime.datetime.now()) + ": api_data table update complete")
# exchange_data
# 1. api 호출
exchange_data = get_exchange_api()
if len(exchange_data) != 0:
# 2. db 연결
conn = pymysql.connect(host="localhost", user="coalarm", password="v4SxXqsLz", db="coalarm", charset="utf8")
cur = conn.cursor()
cur.execute('TRUNCATE TABLE Exchange_Data') # 테이블 레코드 비우기
# 3. 해당 테이블에 데이터 추가
for i in range(len(exchange_data)):
cur.execute('INSERT INTO Exchange_Data VALUES("{0}", "{1}", "{2}")'.format(\
exchange_data[i]["cur_nm"], \
exchange_data[i]["cur_unit"], \
exchange_data[i]["deal_bas_r"]))
conn.commit()
conn.close()
print(str(datetime.datetime.now()) + ": exchange table update complete")
else:
print(str(datetime.datetime.now()) + ": 주말엔 exchange api가 안와요")
# 기능 4. embassy data update, 주기 : 24시간
def update_Embassy_Data(self):
# 1. scraping
embassy_data = get_embassy_data()
# return column : ['country_eng_nm', 'country_iso_alp2', 'country_nm', 'embassy_kor_nm', 'url']
# 3. db 연결
conn = pymysql.connect(host="localhost", user="coalarm", password="v4SxXqsLz", db="coalarm", charset="utf8")
cur = conn.cursor()
cur.execute('TRUNCATE TABLE Embassy_Data') # 테이블 레코드 비우기
# 4. 해당 테이블에 데이터 추가
for i in range(len(embassy_data)):
cur.execute('INSERT INTO Embassy_Data VALUES("{0}", "{1}", "{2}")'.format(\
embassy_data[i]["country_iso_alp2"], \
embassy_data[i]["embassy_kor_nm"], \
embassy_data[i]["url"]))
conn.commit()
conn.close()
print(str(datetime.datetime.now()) + ": embassy_data table update complete")
# 기능 5. safety data update, 주기 : 24시간
def update_Safety_Data(self):
# 1. scraping
safety_data = get_safety_data() # return : ['Country', 'Safety_index', 'Numbeo_index', 'Homicide_rate']
terror_data = get_terror_data() # return : ['Ranking', 'Country', 'Last', 'Previous']
# 2. 기반 데이터(country_ISO.json)에 api 데이터 병합
with open('./json_file/country_ISO.json', 'r') as f:
country_iso = json.load(f) # json_country key : ["Code", "Name"]
for i in safety_data:
i["iso_code"] = "없음"
for j in country_iso:
if i["Country"] == j["Name"]:
i["iso_code"] = j["Code"]
for i in terror_data:
i["iso_code"] = "없음"
for j in country_iso:
if i["Country"] == j["Name"]:
i["iso_code"] = j["Code"]
df_safety_data = pd.DataFrame(safety_data)
df_terror_data = pd.DataFrame(terror_data)
data = pd.merge(df_safety_data, df_terror_data, how = 'outer', on = "iso_code").fillna(-1)
data = data.to_dict(orient = "records")
# return : {'Safety_index', 'Numbeo_index', 'Homicide_rate', 'iso_code', 'Last', 'Previous'}
# 3. db 연결
conn = pymysql.connect(host="localhost", user="coalarm", password="v4SxXqsLz", db="coalarm", charset="utf8")
cur = conn.cursor()
cur.execute('TRUNCATE TABLE Safety_Data') # 테이블 레코드 비우기
# 4. 해당 테이블에 데이터 추가
for i in range(len(data)):
cur.execute('INSERT INTO Safety_Data VALUES("{0}", "{1}", "{2}", "{3}", "{4}", "{5}")'.format(\
data[i]["iso_code"], \
float(data[i]["Safety_index"]),\
float(data[i]["Numbeo_index"]),\
float(data[i]["Homicide_rate"]),\
float(data[i]["Last"]),\
float(data[i]["Previous"])))
conn.commit()
conn.close()
print(str(datetime.datetime.now()) + ": safety_data table update complete")
# 기능 6. Safety_Score update, 주기 : 24시간
def update_Safety_Score(self):
# 1. 기반 데이터 호출
with open('./json_file/new_continent.json', 'r') as f:
df_continent = pd.DataFrame(json.load(f)) # json_country key : ["iso_code", "continent"]
with open('./json_file/country_kr_ISO.json', 'r') as f:
json_country_kr = json.load(f) # json_country_kr key : ["country_kr", "iso_code"]
# 2. db 연결
conn = pymysql.connect(host='localhost', user="coalarm", password="v4SxXqsLz", db="coalarm", charset="utf8")
cur = conn.cursor()
cur.execute('TRUNCATE TABLE Safety_Score') # 테이블 레코드 비우기
'''
필요한 input 값
[
{
'iso_code': (value),
'total_caeses_per_1million_population' : (value),
'recovered': (value),
'critical': (value),
'fully_vaccinated': (value),
'lvl': (value),
'homicide_rate': (value),
'safety_index': (value),
'numbeo_index': (value),
'last_terrorism': (value),
'previous_terrorism': (value),
},
...
]
(value)가 nan, -1 인 값은 대륙(서유럽 등의 소분류) 평균 적용
이후에도 (value)가 nan, -1 인 값은 전 세계 평균 적용
'''
# 3. db 데이터 꺼내와서 가공
cur.execute("select v.iso_code, v.fully_vaccinated, s.homicide_rate, a.caution, c.total_caeses_per_1million_population, c.recovered_ratio, c.critical_ratio, \
s.safety_index, s.numbeo_index, s.last_terrorism, s.previous_terrorism \
from Corona_Vaccine_Data v \
join Safety_Data s using(iso_code) \
join Api_Data a using(iso_code) \
join Corona_Data c using(iso_code)")
row_headers=[x[0] for x in cur.description]
rv = cur.fetchall()
recommend_data=[]
for result in rv:
recommend_data.append(dict(zip(row_headers,result)))
# -1 -> nan
df_recommend_data = pd.DataFrame(recommend_data).replace(-1, np.NaN)
df_recommend_data["caution"] = df_recommend_data["caution"].apply(lambda x : x if x != 5 else 1.5)
df_recommend_data = pd.merge(df_continent, df_recommend_data, how = 'left', on = "iso_code").groupby("continent").apply(lambda x: x.fillna(x.mean()))
df_recommend_data = df_recommend_data.drop(["continent"], axis=1).reset_index()
# df_recommend_data = df_recommend_data.dropna(axis=0)
df_recommend_data = df_recommend_data.fillna(df_recommend_data.mean())
df_recommend_data = df_recommend_data.to_dict(orient = "records")
#print(len(df_recommend_data), type(df_recommend_data))
df_score = SafetyScore(df_recommend_data)
score = []
for i in range(len(df_score)):
dict_score = {}
dict_score['iso_code'] = df_score['iso_code'][i]
dict_score['score'] = round(float(df_score['score'][i]), 2)
dict_score["country_kr"] = dict_score["iso_code"]
for j in json_country_kr:
if dict_score["iso_code"] == j["iso_code"]:
dict_score["country_kr"] = j["country_kr"]
score.append(dict_score)
# 4. 해당 테이블에 데이터 추가
for i in range(len(score)):
cur.execute("INSERT INTO Safety_Score VALUES('{0}', '{1}', '{2}')".format(\
score[i]["iso_code"], \
score[i]["country_kr"], \
score[i]["score"]))
conn.commit()
conn.close()
print(str(datetime.datetime.now()) + ": Safety Score table update complete")