This commit is contained in:
dsyoon
2021-02-16 04:29:48 +09:00
parent e78aad9259
commit 890418a3ae
28 changed files with 54202 additions and 1 deletions

View File

@@ -0,0 +1,77 @@
import os
import shutil
import datetime
from stockpredictor.crawler.toJsonFile.FnGuideCrawler import FnGuideCrawler
from stockpredictor.crawler.toJsonFile.MetaCrawler import MetaCrawler
from stockpredictor.crawler.toJsonFile.StockCrawler import StockCrawler
today = datetime.datetime.now().strftime("%Y-%m-%d")
PROJECT_HOME = "../../.."
crawler = FnGuideCrawler()
print("[KOSPI 상장기업 재무제표 다운로드]")
inFileName = PROJECT_HOME + '/resources/fnguide.json'
outFileName = PROJECT_HOME + '/resources/fnguide.temp.json'
if os.path.isfile(inFileName):
shutil.copy(inFileName, outFileName)
crawler.crawl_fnguide(outFileName)
if os.path.isfile(inFileName):
os.remove(inFileName)
shutil.move(outFileName, inFileName)
crawler = MetaCrawler()
print("[환율 (USD, JPY, EUR, CNY), 원유 (WTI), 국제금]")
inFileName = PROJECT_HOME + '/resources/meta_1.json'
outFileName = PROJECT_HOME + '/resources/meta_1.temp.json'
if os.path.isfile(inFileName):
shutil.copy(inFileName, outFileName)
crawler.crawl_stocks(outFileName)
if os.path.isfile(inFileName):
os.remove(inFileName)
shutil.move(outFileName, inFileName)
print("[투자자별 매매동향(Trading_Trend)]")
inFileName = PROJECT_HOME + '/resources/meta_2.json'
outFileName = PROJECT_HOME + '/resources/meta_2.temp.json'
if os.path.isfile(inFileName):
shutil.copy(inFileName, outFileName)
crawler.crawl_trading_trend(outFileName)
if os.path.isfile(inFileName):
os.remove(inFileName)
shutil.move(outFileName, inFileName)
print("[증시자금동향 (신용잔고, 펀드자금 잔고)]")
inFileName = PROJECT_HOME + '/resources/meta_3.json'
outFileName = PROJECT_HOME + '/resources/meta_3.temp.json'
if os.path.isfile(inFileName):
shutil.copy(inFileName, outFileName)
crawler.crawl_money_trend(outFileName)
if os.path.isfile(inFileName):
os.remove(inFileName)
shutil.move(outFileName, inFileName)
print("[국내 시장금리]")
inFileName = PROJECT_HOME + '/resources/meta_4.json'
outFileName = PROJECT_HOME + '/resources/meta_4.temp.json'
if os.path.isfile(inFileName):
shutil.copy(inFileName, outFileName)
crawler.crawl_interest_rates(outFileName)
if os.path.isfile(inFileName):
os.remove(inFileName)
shutil.move(outFileName, inFileName)
print("[종목 다운로드]")
inFileName = PROJECT_HOME + '/resources/stock.json'
outFileName = PROJECT_HOME + '/resources/stock.temp.json'
if os.path.isfile(inFileName):
shutil.copy(inFileName, outFileName)
crawler = StockCrawler()
crawler.crawl_stocks(outFileName)
crawler.get_stocks_avg(outFileName, inFileName)
if os.path.isfile(outFileName):
os.remove(outFileName)
print ("done...")

View File

@@ -0,0 +1,113 @@
from bs4 import BeautifulSoup
from pandas import DataFrame, Series
import requests as re
import pandas as pd
import json
import requests
class FnGuideCrawler:
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
def getStockInfo(self):
#code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header).text)[0]
# code_df = pd.read_excel('../resources/stock/상장법인목록.xls')
# 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)
# 우리가 필요한 것은 회사명과 종목코드이기 때문에 필요없는 column들은 제외해준다.
code_df = code_df[['회사명', '종목코드']]
# 한글로된 컬럼명을 영어로 바꿔준다.
code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'})
###print (code_df.head())
return code_df
# FnGuide에서 크롤링한 KOSPI 상장기업의 재무제표
# http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221294884955&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
def get_fnguide_table(self, code):
url = re.get('http://comp.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A%s'%(code.strip()))
url = url.content
html = BeautifulSoup(url,'html.parser')
body = html.find('body')
try:
fn_body = body.find('div', {'class': 'fng_body asp_body'})
ur_table = fn_body.find('div', {'id': 'div15'})
table = ur_table.find('div', {'id': 'highlight_D_Y'})
tbody = table.find('tbody')
tr = tbody.find_all('tr')
Table = DataFrame()
except:
return {}
for i in tr:
''' 자료 항목 가져오기'''
category = i.find('span', {'class': 'txt_acd'})
if category == None:
category = i.find('th')
category = category.text.strip()
'''값 가져오기'''
value_list = []
j = i.find_all('td', {'class': 'r'})
for value in j:
temp = value.text.replace(',', '').strip()
try:
temp = float(temp)
value_list.append(temp)
except:
value_list.append(0)
Table['%s' % (category)] = value_list
''' 기간 가져오기 '''
thead = table.find('thead')
tr_2 = thead.find('tr', {'class': 'td_gapcolor2'}).find_all('th')
year_list = []
for i in tr_2:
try:
temp_year = i.find('span', {'class': 'txt_acd'}).text
except:
temp_year = i.text
temp_year = temp_year.replace("/",".")+".01"
year_list.append(temp_year)
Table.index = year_list
return Table.T.to_dict()
def crawl_fnguide(self, inFileName):
code_df = self.getStockInfo()
outFp = open(inFileName, 'w', encoding='utf-8')
idx = 0
for item in code_df.values:
item_name = item[0]
item_code = item[1]
idx += 1
print(idx, item_name)
fnGuideData = self.get_fnguide_table(item_code)
stock = {"NAME": item_name, "CODE": item_code, "PRICE": fnGuideData}
outFp.write(json.dumps(stock, ensure_ascii=False) + "\n")
outFp.close()
return
if __name__ == "__main__":
crawler = FnGuideCrawler()
crawler.get_fnguide_table('155660')

View File

@@ -0,0 +1,278 @@
import json
import datetime
import requests
import pandas as pd
import os
class MetaCrawler:
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
limit_page_count = 10000
def __init__(self):
return
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221288761509
def crawl_stocks(self, inFileName):
stocks = []
if os.path.isfile(inFileName):
inFp = open(inFileName, 'r', encoding='utf-8')
for line in inFp.readlines():
line = line.strip()
if line:
stocks.append(json.loads(line))
inFp.close()
outFp = open(inFileName, 'w', encoding='utf-8')
inputs = []
inputs.append( {'NAME':'USD', 'CODE':'FX_USDKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW'} ) # 미국 USD
inputs.append( {'NAME':'JPY', 'CODE':'FX_JPYKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_JPYKRW'} ) # 일본 JPY
inputs.append( {'NAME':'EUR', 'CODE':'FX_EURKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_EURKRW'} ) # 유럽연합 EUR'
inputs.append( {'NAME':'CNY', 'CODE':'FX_CNYKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_CNYKRW'} ) # 중국 CNY
inputs.append( {'NAME':'WTI', 'CODE':'OIL_CL', 'URL':'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=OIL_CL&fdtc=2'} ) # WTI
inputs.append( {'NAME':'GOLD', 'CODE':'CMDT_GC', 'URL':'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_GC&fdtc=2'} ) # 국제 금
for i in range(len(inputs)):
input = inputs[i]
if len(stocks) == 0:
meta = {}
meta["NAME"] = input['NAME']
meta["CODE"] = input['CODE']
meta["PRICE"] = []
lastDay = "1900.01.01"
else:
meta = stocks[i]
lastDay = meta['PRICE'][0]['DATE']
finish = False
for i in range(1, self.limit_page_count):
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header).text)
# 마지막 페이지 까지 받기
if len(html[0].날짜.values) <= 1:
break
for j in range(0, len(html[0].values)):
item = html[0].values[j]
if input['NAME'] in ('USD', 'JPY', 'EUR', 'CNY'):
if j == 0:
continue
if item[0] <= lastDay:
finish = True
break
meta["PRICE"].append({
"DATE": item[0], # 날짜
"close": item[1], # 매매기준율
"diff": item[2] # 전일대비
})
elif input['NAME'] in ('WTI', 'GOLD'):
if item[0] <= lastDay:
finish = True
break
meta["PRICE"].append({
"DATE": item[0], # 날짜
"close": item[1], # 종가
"diff": item[2], # 전일대비
"rate": item[3] # 등락율
})
if finish:
break
meta["PRICE"] = sorted(meta["PRICE"], key=lambda x: x['DATE'], reverse=True)
outFp.write(json.dumps(meta, ensure_ascii=False) + "\n")
outFp.close()
return
# 투자자별 매매동향 (Trading_Trend) 크롤링
# (pri, 개인)
# (for, 외국인)
# (ins, 기관합)
# (ins0, 금융투자)
# (ins1, 보험)
# (ins2, 투신 (사모))
# (ins3, 은행)
# (ins4, 기타금융기관)
# (ins5, 연기금 등)
# (cor, 기타법인)
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221289696771&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
def crawl_trading_trend(self, inFileName):
stocks = []
if os.path.isfile(inFileName):
inFp = open(inFileName, 'r', encoding='utf-8')
for line in inFp.readlines():
line = line.strip()
if line:
stocks.append(json.loads(line))
inFp.close()
outFp = open(inFileName, 'w', encoding='utf-8')
today = datetime.datetime.now().strftime("%Y%m%d")
url = 'http://finance.naver.com/sise/investorDealTrendDay.nhn?bizdate='+today+'&sosok=&page='
if len(stocks) == 0:
lastDay = "1900.01.01"
else:
lastDay = stocks[0]['DATE']
previousDay = ""
finish = False
for i in range(1, self.limit_page_count):
#html = pd.read_html(url + str(i), header=0)
html = pd.read_html(requests.get(url + str(i), headers=self.header).text)
# 마지막 페이지 까지 받기
if len(html[0].날짜.values) <= 2:
break
for j in range(1, len(html[0].values)):
item = html[0].values[j]
if str(item[0]) == "nan":
continue
if "20" + item[0] <= lastDay or item[0] == previousDay:
finish = True
break
meta = {
"DATE": "20"+item[0],
"pri": item[1], # 개인
"for": item[2], # 외국인
"ins": item[3], # 기관합
"ins0": item[4], # 금융투자
"ins1": item[5], # 보험
"ins2": item[6], # 투신 (사모)
"ins3": item[7], # 은행
"ins4": item[8], # 기타금융기관
"ins5": item[9], # 연기금 등
"cor": item[10]} # 기타법인
outFp.write(json.dumps(meta, ensure_ascii=False) + "\n")
print ("20"+item[0])
previousDay = html[0].values[2][0]
if finish:
break
if len(stocks) > 0:
for stock in stocks:
outFp.write(json.dumps(stock, ensure_ascii=False) + "\n")
outFp.close()
return
# 증시자금동향 (신용잔고, 펀드자금 잔고) 크롤링
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221290138187&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
def crawl_money_trend(self, inFileName):
stocks = []
if os.path.isfile(inFileName):
inFp = open(inFileName, 'r', encoding='utf-8')
for line in inFp.readlines():
line = line.strip()
if line:
stocks.append(json.loads(line))
inFp.close()
outFp = open(inFileName, 'w', encoding='utf-8')
url = 'http://finance.naver.com/sise/sise_deposit.nhn?&page='
if len(stocks) == 0:
lastDay = "1900.01.01"
else:
lastDay = stocks[0]['DATE']
previousDay = ""
finish = False
for i in range(1, self.limit_page_count):
#html = pd.read_html(url + str(i), header=0, encoding='euc-kr')
html = pd.read_html(requests.get(url + str(i), headers=self.header).text, encoding='euc-kr')
# 마지막 페이지 까지 받기
if len(html[0].날짜.values) <= 10:
break
for j in range(1, len(html[0].values)):
item = html[0].values[j]
if str(item[0]) == "nan":
continue
if "20"+item[0] <= lastDay or item[0] == previousDay:
finish = True
break
meta = {
"DATE": "20"+item[0],
"dep1_1": item[1], # 고객예탁금 누적
"dep1_2": item[2], # 고객예탁금 당일
"dep2_1": item[3], # 신용잔고 누적
"dep2_2": item[4], # 신용잔고 당일
"dep3_1": item[5], # 주식형펀드 누적
"dep3_2": item[6], # 주식형펀드 당일
"dep4_1": item[7], # 혼합형펀드 누적
"dep4_2": item[8], # 혼합형펀드 당일
"dep5_1": item[9], # 채권형펀드 누적
"dep5_2": item[10]} # 채권형펀드 당일
outFp.write(json.dumps(meta, ensure_ascii=False) + "\n")
print("20"+item[0])
if finish:
break
previousDay = html[0].values[2][0]
if len(stocks) > 0:
for stock in stocks:
outFp.write(json.dumps(stock, ensure_ascii=False) + "\n")
outFp.close()
return
# 국내 시장금리 크롤링
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221292348073&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
def crawl_interest_rates(self, inFileName):
stocks = []
if os.path.isfile(inFileName):
inFp = open(inFileName, 'r', encoding='utf-8')
for line in inFp.readlines():
line = line.strip()
if line:
stocks.append(json.loads(line))
inFp.close()
outFp = open(inFileName, 'w', encoding='utf-8')
inputs = []
inputs.append({'NAME': '91일 CD금리', 'CODE': 'IRR_CD91', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_CD91'})
inputs.append({'NAME': '콜금리', 'CODE': 'IRR_CALL', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_CALL'})
inputs.append({'NAME': '국고채(3년)', 'CODE': 'IRR_GOVT03Y', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_GOVT03Y'})
inputs.append({'NAME': '회사채(3년)', 'CODE': 'IRR_CORP03Y', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_CORP03Y'})
for i in range(len(inputs)):
input = inputs[i]
if len(stocks) == 0:
meta = {}
meta["NAME"] = input['NAME']
meta["CODE"] = input['CODE']
meta["PRICE"] = []
lastDay = "1900.01.01"
else:
meta = stocks[i]
lastDay = meta['PRICE'][0]['DATE']
finish = False
for i in range(1, self.limit_page_count):
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header).text)
# 마지막 페이지 까지 받기
if len(html[0].날짜.values) <= 1:
break
for j in range(len(html[0].values)):
item = html[0].values[j]
if str(item[0]) == "nan":
continue
if item[0] <= lastDay:
finish = True
break
meta["PRICE"].append({
"DATE": item[0],
"close": item[1], # 종가
"diff": item[2], # 전일대비
"rate": item[3]}) # 등락율
if finish:
break
print(meta["NAME"] + " / " + item[0])
meta["PRICE"] = sorted(meta["PRICE"], key=lambda x: x['DATE'], reverse=True)
outFp.write(json.dumps(meta, ensure_ascii=False) + "\n")
outFp.close()
return

View File

@@ -0,0 +1,695 @@
# https://bigdata-sk.tistory.com/10
import pandas as pd
import re
import json
import os
import requests
class Queue(object):
def __init__(self, max):
self.queue = []
self.max = max
def dequeue(self):
length = len(self.queue)
if length == 0 or length < self.max:
return -1
return self.queue.pop(0)
def enqueue(self, n):
length = len(self.queue)
if length == self.max:
self.dequeue()
self.queue.append(n)
pass
def sum(self):
sum = 0
for item in self.queue:
sum += item
return sum
def avg(self):
length = len(self.queue)
total = self.sum()
return round(total / length)
def print(self):
print(self.sum(), self.queue)
# 닐짜 형식으로 바뀐 this_date값을 확인 가능
# 읽어온 날짜 정보를 date형식으로 바꿀 일이 계속 생기므로 이 기능을 함수로 정의해줌.
# 함수명은 date_format()
class StockCrawler:
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
historical_prices = None
special_pattern = None
fnGuideCrawler = None
limit_page_count = 40
def __init__(self):
self.historical_prices = dict()
self.special_pattern = (
'[', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', ',', '.', '?', '"', ':', ';', '{', '}', '|', '<', '>',
']', '+', '-', '/', '=', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
return
def clean_str(self, string):
string = re.sub(r"\\", " ", string)
string = re.sub(r"\'", " ", string)
string = re.sub(r"\"", " ", string)
string = re.sub(r"`", " ", string)
string = re.sub(r"-", " ", string)
string = re.sub(r"\(.*?\)", " ", string)
string = re.sub(r" ", " ", string)
return string.strip().lower()
def getStockInfo(self):
#code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header).text)[0]
# code_df = pd.read_excel('../resources/stock/상장법인목록.xls')
# 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)
# 우리가 필요한 것은 회사명과 종목코드이기 때문에 필요없는 column들은 제외해준다.
code_df = code_df[['회사명', '종목코드']]
# 한글로된 컬럼명을 영어로 바꿔준다.
code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'})
###print (code_df.head())
return code_df
# 종목 이름을 입력하면 종목에 해당하는 코드를 불러와
# 네이버 금융(http://finance.naver.com)에 넣어줌
def get_url(self, item_name, code_df):
code = code_df.query("name=='{}'".format(item_name))['code'].to_string(index=False).strip()
url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code.strip())
return code, url
def date_format(slef, d):
d = str(d).replace('-', '.')
#yyyy = int(d.split('.')[0])
#mm = int(d.split('.')[1])
#dd = int(d.split('.')[2])
#this_date = dt.date(yyyy, mm, dd)
return d
def getCodeIndex(self, stocks, item_code):
for i, stock in enumerate(stocks):
if item_code == stock['CODE']:
return i
return -1
def crawl_stocks(self, inFileName):
stocks = []
if os.path.isfile(inFileName):
inFp = open(inFileName, 'r', encoding='utf-8')
for line in inFp.readlines():
line = line.strip()
if line:
stocks.append(json.loads(line))
inFp.close()
if len(stocks)>0:
stock_cosdak_inverse = {"NAME": 'KODEX 코스닥150선물인버스', "CODE": "251340", "PRICE": stocks[0]["PRICE"]}
stock_cosdak_reverage = {"NAME": 'KODEX 코스닥150 레버리지', "CODE": "233740", "PRICE": stocks[1]["PRICE"]}
stock_inverse = {"NAME": 'KODEX 200선물인버스2X', "CODE": "252670", "PRICE": stocks[2]["PRICE"]}
stock_reverage = {"NAME": 'KODEX 레버리지', "CODE": "122630", "PRICE": stocks[3]["PRICE"]}
stock_gold = {"NAME": 'KODEX 골드선물(H)', "CODE": "132030", "PRICE": stocks[4]["PRICE"]}
else:
stock_cosdak_inverse = {"NAME": 'KODEX 코스닥150선물인버스', "CODE": "251340", "PRICE": []}
stock_cosdak_reverage = {"NAME": 'KODEX 코스닥150 레버리지', "CODE": "233740", "PRICE": []}
stock_inverse = {"NAME": 'KODEX 200선물인버스2X', "CODE": "252670", "PRICE": []}
stock_reverage = {"NAME": 'KODEX 레버리지', "CODE": "122630", "PRICE": []}
stock_gold = {"NAME": 'KODEX 골드선물(H)', "CODE": "132030", "PRICE": []}
outFp = open(inFileName, "w", encoding="utf-8")
kodex_cosdak_inverse = self.crawl_specific_stock('KODEX 코스닥150선물인버스', '251340', stock_cosdak_inverse)
outFp.write(json.dumps(kodex_cosdak_inverse, ensure_ascii=False) + "\n")
kodex_cosdak_reverage = self.crawl_specific_stock('KODEX 코스닥150 레버리지', '233740', stock_cosdak_reverage)
outFp.write(json.dumps(kodex_cosdak_reverage, ensure_ascii=False) + "\n")
kodex_inverse = self.crawl_specific_stock('KODEX 200선물인버스2X', '252670', stock_inverse)
outFp.write(json.dumps(kodex_inverse, ensure_ascii=False) + "\n")
kodex_reverage = self.crawl_specific_stock('KODEX 레버리지', '122630', stock_reverage)
outFp.write(json.dumps(kodex_reverage, ensure_ascii=False) + "\n")
kodex_gold = self.crawl_specific_stock('KODEX 골드선물(H)', '132030', stock_gold)
outFp.write(json.dumps(kodex_gold, ensure_ascii=False) + "\n")
code_df = self.getStockInfo()
items = code_df.values
idx = 0
for item in items:
idx += 1
item_name = item[0]
item_code = item[1]
print(idx, item_name, item_code)
if len(stocks) > 0:
index = self.getCodeIndex(stocks, item_code)
if index < 0:
stock = {"NAME": item_name, "CODE": item_code, "PRICE": []}
else:
stock = {"NAME": item_name, "CODE": item_code, "PRICE": stocks[index]["PRICE"]}
else:
stock = {"NAME": item_name, "CODE": item_code, "PRICE": []}
stock = self.crawl_specific_stock(item_name, item_code, stock)
outFp.write(json.dumps(stock, ensure_ascii=False) + "\n")
outFp.close()
return
def get_stocks_avg(self, inFileName, outFileName):
outFp = open(outFileName, 'w', encoding='utf-8')
inFp = open(inFileName, 'r', encoding='utf-8')
idx = 0
for line in inFp.readlines():
idx += 1
line = line.strip()
if line:
jsonData = json.loads(line)
q_3 = Queue(3)
q_5 = Queue(5)
q_7 = Queue(7)
q_10 = Queue(10)
q_20 = Queue(20)
q_30 = Queue(30)
q_60 = Queue(60)
q_90 = Queue(90)
q_100 = Queue(100)
q_120 = Queue(120)
q_150 = Queue(150)
q_180 = Queue(180)
q_200 = Queue(200)
q_240 = Queue(240)
for item in jsonData["PRICE"]:
q_3.enqueue(item['close'])
q_5.enqueue(item['close'])
q_7.enqueue(item['close'])
q_10.enqueue(item['close'])
q_20.enqueue(item['close'])
q_30.enqueue(item['close'])
q_60.enqueue(item['close'])
q_90.enqueue(item['close'])
q_100.enqueue(item['close'])
q_120.enqueue(item['close'])
q_150.enqueue(item['close'])
q_180.enqueue(item['close'])
q_200.enqueue(item['close'])
q_240.enqueue(item['close'])
item['avg3'] = q_3.avg()
item['avg5'] = q_5.avg()
item['avg7'] = q_7.avg()
item['avg10'] = q_10.avg()
item['avg20'] = q_20.avg()
item['avg30'] = q_30.avg()
item['avg60'] = q_60.avg()
item['avg90'] = q_90.avg()
item['avg100'] = q_100.avg()
item['avg120'] = q_120.avg()
item['avg150'] = q_150.avg()
item['avg180'] = q_180.avg()
item['avg200'] = q_200.avg()
item['avg240'] = q_240.avg()
outFp.write(json.dumps(jsonData, ensure_ascii=False) + "\n")
inFp.close()
outFp.close()
return
def crawl_specific_stock(self, code_name, code, stock):
item_name = code_name
item_code = code
url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=item_code.strip())
# 일자 데이터를 담을 df라는 DataFrame 정의
df = pd.DataFrame()
lastDay = ""
if len(stock) > 0 and len(stock["PRICE"])-1 > 0:
lastDay = stock["PRICE"][len(stock["PRICE"])-1]["DATE"].replace("-",".")
lastPage = False
# 1페이지에서 1000페이지의 데이터만 가져오기
for page in range(1, self.limit_page_count):
# 최근 상장 기업의 마지막 반복되는 페이지를 제외시킨다.
pg_url = '{url}&page={page}'.format(url=url, page=page)
#html = pd.read_html(pg_url, header=0)
html = pd.read_html(requests.get(pg_url, headers=self.header).text)
count = 0
for date in html[0].날짜.values:
if type(date) is str:
count += 1
if date == lastDay:
lastPage = True
df = df.append(html[0], ignore_index=True)
break
if count == 10:
df = df.append(html[0], ignore_index=True)
else:
if lastPage == False:
df = df.append(html[0], ignore_index=True)
lastPage = True
else:
break
# df.dropna()를 이용해 결측값 있는 행 제거
df = df.dropna()
# 상위 5개 데이터 확인하기
###print (df.head())
# 한글로 된 컬럼명을 영어로 바꿔줌
df = df.rename(columns={'날짜': 'date', '종가': 'close', '전일비': 'diff', '시가': 'open', '고가': 'high', '저가': 'low', '거래량': 'volume'})
# 데이터의 타입을 int형으로 바꿔줌
df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[['close', 'diff', 'open', 'high', 'low', 'volume']].astype(int)
# 컬럼명 'date'의 타입을 date로 바꿔줌
df['date'] = pd.to_datetime(df['date'])
# 일자(date)를 기준으로 오름차순 정렬
#df = df.sort_values(by=['date'], ascending=True)
# 상위 5개 데이터 확인
###print (df.head())
if len(stock) > 0 and len(stock["PRICE"]) - 1 > 0:
lastDay = stock["PRICE"][len(stock["PRICE"])-1]["DATE"]
for values in df.values:
day = str(values[0]).split(' ')[0]
if lastDay == day:
break
stock["PRICE"].append({
"DATE": day,
df.columns[1]: values[1],
df.columns[2]: values[2],
df.columns[3]: values[3],
df.columns[4]: values[4],
df.columns[5]: values[5],
df.columns[6]: values[6],
})
#stock["PRICE"] = sorted(stock["PRICE"], key=lambda x: x['DATE'], reverse=True)
stock["PRICE"] = sorted(stock["PRICE"], key=lambda x: x['DATE'])
return stock
def update_stocks(self, inFileName):
stock_inverse = {"NAME": 'KODEX 200선물인버스2X', "CODE": "252670", "PRICE": []}
stock_reverage = {"NAME": 'KODEX 레버리지', "CODE": "122630", "PRICE": []}
stock_gold = {"NAME": 'KODEX 골드선물(H)', "CODE": "132030", "PRICE": []}
stocks = []
if os.path.isfile(inFileName):
inFp = open(inFileName, 'r', encoding='utf-8')
for line in inFp.readlines():
line = line.strip()
if line:
jsonData = json.loads(line)
jsonData["PRICE"] = sorted(jsonData["PRICE"], key=lambda x: x['DATE'], reverse=True)
if jsonData['CODE'] == "252670":
stock_inverse = jsonData
elif jsonData['CODE'] == "122630":
stock_reverage = jsonData
elif jsonData['CODE'] == "132030":
stock_gold = jsonData
else:
stocks.append(jsonData)
inFp.close()
outFp = open(inFileName, 'w', encoding='utf-8')
if len(stocks) == 0:
limit_page_count = 1000
code_df = self.getStockInfo()
stocks = code_df.values
else:
limit_page_count = 2
code_df = None
idx = 0
for item in stocks:
idx += 1
if limit_page_count == 1000:
item_name = item[0]
item_code = item[1]
print(idx, item_name)
stock = {"NAME": item_name, "CODE": item_code, "PRICE": []}
code, url = self.get_url(item_name, code_df)
else:
item_name = item['NAME']
item_code = item['CODE']
print(idx, item_name)
stock = {"NAME": item_name, "CODE": item_code, "PRICE": []}
url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=item_code.strip())
# 일자 데이터를 담을 df라는 DataFrame 정의
df = pd.DataFrame()
lastPage = False
# 1페이지에서 1000페이지의 데이터만 가져오기
for page in range(1, limit_page_count):
# 최근 상장 기업의 마지막 반복되는 페이지를 제외시킨다.
pg_url = '{url}&page={page}'.format(url=url, page=page)
#html = pd.read_html(pg_url, header=0)
html = pd.read_html(requests.get(pg_url, headers=self.header).text)
count = 0
for date in html[0].날짜.values:
if type(date) is str:
count += 1
if count == 10:
df = df.append(html[0], ignore_index=True)
else:
if lastPage == False:
df = df.append(html[0], ignore_index=True)
lastPage = True
else:
break
# df.dropna()를 이용해 결측값 있는 행 제거
df = df.dropna()
# 상위 5개 데이터 확인하기
###print (df.head())
# 한글로 된 컬럼명을 영어로 바꿔줌
df = df.rename(columns={'날짜': 'date', '종가': 'close', '전일비': 'diff', '시가': 'open', '고가': 'high', '저가': 'low', '거래량': 'volume'})
# 데이터의 타입을 int형으로 바꿔줌
df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[['close', 'diff', 'open', 'high', 'low', 'volume']].astype(int)
# 컬럼명 'date'의 타입을 date로 바꿔줌
df['date'] = pd.to_datetime(df['date'])
# 일자(date)를 기준으로 오름차순 정렬
# df = df.sort_values(by=['date'], ascending=False)
# 상위 5개 데이터 확인
###print (df.head())
q_3 = Queue(3)
q_5 = Queue(5)
q_7 = Queue(7)
q_10 = Queue(10)
q_20 = Queue(20)
q_30 = Queue(30)
q_60 = Queue(60)
q_90 = Queue(90)
q_100 = Queue(100)
q_120 = Queue(120)
q_150 = Queue(150)
q_180 = Queue(180)
q_200 = Queue(200)
q_240 = Queue(240)
if limit_page_count == 1000:
for values in df.values:
q_3.enqueue(values[1])
q_5.enqueue(values[1])
q_7.enqueue(values[1])
q_10.enqueue(values[1])
q_20.enqueue(values[1])
q_30.enqueue(values[1])
q_60.enqueue(values[1])
q_90.enqueue(values[1])
q_100.enqueue(values[1])
q_120.enqueue(values[1])
q_150.enqueue(values[1])
q_180.enqueue(values[1])
q_200.enqueue(values[1])
q_240.enqueue(values[1])
stock["PRICE"].append({
"DATE": str(values[0]).split(' ')[0],
df.columns[1]: values[1],
df.columns[2]: values[2],
df.columns[3]: values[3],
df.columns[4]: values[4],
df.columns[5]: values[5],
df.columns[6]: values[6],
'avg3': q_3.avg(),
'avg5': q_5.avg(),
'avg7': q_7.avg(),
'avg10': q_10.avg(),
'avg20': q_20.avg(),
'avg30': q_30.avg(),
'avg60': q_60.avg(),
'avg90': q_90.avg(),
'avg100': q_100.avg(),
'avg120': q_120.avg(),
'avg150': q_150.avg(),
'avg180': q_180.avg(),
'avg200': q_200.avg(),
'avg240': q_240.avg()
})
else:
for values in item["PRICE"]:
q_3.enqueue(values["close"])
q_5.enqueue(values["close"])
q_7.enqueue(values["close"])
q_10.enqueue(values["close"])
q_20.enqueue(values["close"])
q_30.enqueue(values["close"])
q_60.enqueue(values["close"])
q_90.enqueue(values["close"])
q_100.enqueue(values["close"])
q_120.enqueue(values["close"])
q_150.enqueue(values["close"])
q_180.enqueue(values["close"])
q_200.enqueue(values["close"])
q_240.enqueue(values["close"])
# 기존 파일에서 읽은 것
stock["PRICE"].append({
"DATE": str(values["DATE"]).split(' ')[0],
df.columns[1]: values["close"],
df.columns[2]: values["diff"],
df.columns[3]: values["open"],
df.columns[4]: values["high"],
df.columns[5]: values["low"],
df.columns[6]: values["volume"],
'avg3': q_5.avg(),
'avg5': q_5.avg(),
'avg7': q_5.avg(),
'avg10': q_10.avg(),
'avg20': q_20.avg(),
'avg30': q_30.avg(),
'avg60': q_60.avg(),
'avg90': q_90.avg(),
'avg100': q_100.avg(),
'avg120': q_120.avg(),
'avg150': q_150.avg(),
'avg180': q_180.avg(),
'avg200': q_200.avg(),
'avg240': q_240.avg()
})
if limit_page_count != 1000:
# 새로 웹에서 수집한 것
for values in df.values:
date = str(values[0]).split(' ')[0]
isExist = False
for i in range(len(stock["PRICE"])):
if (stock["PRICE"][i]['DATE'] == date):
stock["PRICE"][i][df.columns[1]] = values[1]
stock["PRICE"][i][df.columns[2]] = values[2]
stock["PRICE"][i][df.columns[3]] = values[3]
stock["PRICE"][i][df.columns[4]] = values[4]
stock["PRICE"][i][df.columns[5]] = values[5]
stock["PRICE"][i][df.columns[6]] = values[6]
isExist = True
break
# 새로운 데이터나 오늘 날짜의 데이터
if not isExist:
q_3.enqueue(values[1])
q_5.enqueue(values[1])
q_7.enqueue(values[1])
q_10.enqueue(values[1])
q_20.enqueue(values[1])
q_30.enqueue(values[1])
q_60.enqueue(values[1])
q_90.enqueue(values[1])
q_100.enqueue(values[1])
q_120.enqueue(values[1])
q_150.enqueue(values[1])
q_180.enqueue(values[1])
q_200.enqueue(values[1])
q_240.enqueue(values[1])
stock["PRICE"].append({
"DATE": str(values[0]).split(' ')[0],
df.columns[1]: values[1],
df.columns[2]: values[2],
df.columns[3]: values[3],
df.columns[4]: values[4],
df.columns[5]: values[5],
df.columns[6]: values[6],
'avg3': q_3.avg(),
'avg5': q_5.avg(),
'avg7': q_7.avg(),
'avg10': q_10.avg(),
'avg20': q_20.avg(),
'avg30': q_30.avg(),
'avg60': q_60.avg(),
'avg90': q_90.avg(),
'avg100': q_100.avg(),
'avg120': q_120.avg(),
'avg150': q_150.avg(),
'avg180': q_180.avg(),
'avg200': q_200.avg(),
'avg240': q_240.avg()
})
stock["PRICE"] = sorted(stock["PRICE"], key=lambda x: x['DATE'], reverse=True)
outFp.write(json.dumps(stock, ensure_ascii=False)+"\n")
kodex_inverse = self.crawl_specific_stock('KODEX 200선물인버스2X', '252670', stock_inverse)
outFp.write(json.dumps(kodex_inverse, ensure_ascii=False) + "\n")
kodex_reverage = self.crawl_specific_stock('KODEX 레버리지', '122630', stock_reverage)
outFp.write(json.dumps(kodex_reverage, ensure_ascii=False) + "\n")
kodex_gold = self.crawl_specific_stock('KODEX 골드선물(H)', '132030', stock_gold)
outFp.write(json.dumps(kodex_gold, ensure_ascii=False) + "\n")
outFp.close()
return
def update_specific_stock(self, code_name, code, stock):
item_name = code_name
item_code = code
print(item_name)
if len(stock["PRICE"]) == 0:
limit_page_count = 1000
else:
limit_page_count = 2
url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=item_code.strip())
# 일자 데이터를 담을 df라는 DataFrame 정의
df = pd.DataFrame()
lastPage = False
# 1페이지에서 1000페이지의 데이터만 가져오기
for page in range(1, limit_page_count):
# 최근 상장 기업의 마지막 반복되는 페이지를 제외시킨다.
pg_url = '{url}&page={page}'.format(url=url, page=page)
#html = pd.read_html(pg_url, header=0)
html = pd.read_html(requests.get(pg_url, headers=self.header).text)
count = 0
for date in html[0].날짜.values:
if type(date) is str:
count += 1
if count == 10:
df = df.append(html[0], ignore_index=True)
else:
if lastPage == False:
df = df.append(html[0], ignore_index=True)
lastPage = True
else:
break
# df.dropna()를 이용해 결측값 있는 행 제거
df = df.dropna()
# 상위 5개 데이터 확인하기
###print (df.head())
# 한글로 된 컬럼명을 영어로 바꿔줌
df = df.rename(columns={'날짜': 'date', '종가': 'close', '전일비': 'diff', '시가': 'open', '고가': 'high', '저가': 'low', '거래량': 'volume'})
# 데이터의 타입을 int형으로 바꿔줌
df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[['close', 'diff', 'open', 'high', 'low', 'volume']].astype(int)
# 컬럼명 'date'의 타입을 date로 바꿔줌
df['date'] = pd.to_datetime(df['date'])
# 일자(date)를 기준으로 오름차순 정렬
#df = df.sort_values(by=['date'], ascending=True)
# 상위 5개 데이터 확인
###print (df.head())
q_3 = Queue(3)
q_5 = Queue(5)
q_7 = Queue(7)
q_10 = Queue(10)
q_20 = Queue(20)
q_30 = Queue(30)
q_60 = Queue(60)
q_90 = Queue(90)
q_100 = Queue(100)
q_120 = Queue(120)
q_150 = Queue(150)
q_180 = Queue(180)
q_200 = Queue(200)
q_240 = Queue(240)
for values in df.values:
q_3.enqueue(values[1])
q_5.enqueue(values[1])
q_7.enqueue(values[1])
q_10.enqueue(values[1])
q_20.enqueue(values[1])
q_30.enqueue(values[1])
q_60.enqueue(values[1])
q_90.enqueue(values[1])
q_100.enqueue(values[1])
q_120.enqueue(values[1])
q_150.enqueue(values[1])
q_180.enqueue(values[1])
q_200.enqueue(values[1])
q_240.enqueue(values[1])
stock["PRICE"].append({
"DATE": str(values[0]).split(' ')[0],
df.columns[1]: values[1],
df.columns[2]: values[2],
df.columns[3]: values[3],
df.columns[4]: values[4],
df.columns[5]: values[5],
df.columns[6]: values[6],
'avg3': q_3.avg(),
'avg5': q_5.avg(),
'avg7': q_7.avg(),
'avg10': q_10.avg(),
'avg20': q_20.avg(),
'avg30': q_30.avg(),
'avg60': q_60.avg(),
'avg90': q_90.avg(),
'avg100': q_100.avg(),
'avg120': q_120.avg(),
'avg150': q_150.avg(),
'avg180': q_180.avg(),
'avg200': q_200.avg(),
'avg240': q_240.avg()
})
stock["PRICE"] = sorted(stock["PRICE"], key=lambda x: x['DATE'], reverse=True)
return stock

View File

@@ -0,0 +1,63 @@
import os
import shutil
import datetime
from stockpredictor.crawler.toSQLite.MetaCrawler import MetaCrawler
from stockpredictor.crawler.toSQLite.StockCrawler import StockCrawler
from stockpredictor.analysis.Analyzer import Analyzer
today = datetime.datetime.now().strftime("%Y-%m-%d")
# DB Browser for SQLite: http://hleecaster.com/python-sqlite3/
PROJECT_HOME = "../../.."
inFnguideFileName = PROJECT_HOME + '/resources/fnguide.db'
"""
crawler = FnGuideCrawler()
print("[KOSPI 상장기업 재무제표 다운로드]")
crawler.crawl_fnguide(inFnguideFileName)
"""
crawler = MetaCrawler()
print("\n[환율 (USD, JPY, EUR, CNY), 원유 (WTI), 국제금]")
inFileName = PROJECT_HOME + '/resources/meta_1.db'
crawler.crawl_stocks(inFileName)
print("\n[투자자별 매매동향(Trading_Trend)]")
inFileName = PROJECT_HOME + '/resources/meta_2.db'
crawler.crawl_trading_trend(inFileName)
print("\n[증시자금동향 (신용잔고, 펀드자금 잔고)]")
inFileName = PROJECT_HOME + '/resources/meta_3.db'
crawler.crawl_money_trend(inFileName)
print("\n[국내 시장금리]")
inFileName = PROJECT_HOME + '/resources/meta_4.db'
crawler.crawl_interest_rates(inFileName)
print("\n[종목 다운로드]")
inFileName = PROJECT_HOME + '/resources/stock.db'
crawler = StockCrawler()
crawler.crawl_etf_stocks(inFileName)
crawler.crawl_stocks(inFileName)
print("\n[종목 분석]")
# S: 분석까지 진행
inFileName = PROJECT_HOME + '/resources/stock.db'
analyzer = Analyzer(PROJECT_HOME, inFileName, inFnguideFileName)
analyzer.analyze()
print("\n[종목 결정]")
day = datetime.datetime.today().strftime("%Y%m%d")
outPath = PROJECT_HOME + "/resources/analysis/" + day
if os.path.isdir(outPath):
shutil.rmtree(outPath)
os.mkdir(outPath)
print("print to Html...")
analyzer.analyzeToHtml(outPath)
# E: 분석까지 진행
print ("done...")

View File

@@ -0,0 +1,124 @@
from bs4 import BeautifulSoup
from pandas import DataFrame, Series
import requests as re
import pandas as pd
import json
import sqlite3
import requests
class FnGuideCrawler:
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
def getStockInfo(self):
#code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header).text)
# code_df = pd.read_excel('../resources/stock/상장법인목록.xls')
# 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)
# 우리가 필요한 것은 회사명과 종목코드이기 때문에 필요없는 column들은 제외해준다.
code_df = code_df[['회사명', '종목코드']]
# 한글로된 컬럼명을 영어로 바꿔준다.
code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'})
###print (code_df.head())
return code_df
# FnGuide에서 크롤링한 KOSPI 상장기업의 재무제표
# http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221294884955&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
def get_fnguide_table(self, code):
url = re.get('http://comp.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A%s'%(code.strip()))
url = url.content
html = BeautifulSoup(url,'html.parser')
body = html.find('body')
try:
fn_body = body.find('div', {'class': 'fng_body asp_body'})
ur_table = fn_body.find('div', {'id': 'div15'})
table = ur_table.find('div', {'id': 'highlight_D_Y'})
tbody = table.find('tbody')
tr = tbody.find_all('tr')
Table = DataFrame()
except:
return {}
for i in tr:
''' 자료 항목 가져오기'''
category = i.find('span', {'class': 'txt_acd'})
if category == None:
category = i.find('th')
category = category.text.strip()
'''값 가져오기'''
value_list = []
j = i.find_all('td', {'class': 'r'})
for value in j:
temp = value.text.replace(',', '').strip()
try:
temp = float(temp)
value_list.append(temp)
except:
value_list.append(0)
Table['%s' % (category)] = value_list
''' 기간 가져오기 '''
thead = table.find('thead')
tr_2 = thead.find('tr', {'class': 'td_gapcolor2'}).find_all('th')
year_list = []
for i in tr_2:
try:
temp_year = i.find('span', {'class': 'txt_acd'}).text
except:
temp_year = i.text
temp_year = temp_year.replace("/",".")+".01"
year_list.append(temp_year)
Table.index = year_list
return Table.T.to_dict()
def crawl_fnguide(self, inFileName):
tableName = 'fnguide'
conn = sqlite3.connect(inFileName, isolation_level=None)
cursor = conn.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS "+tableName+" (CODE text PRIMARY KEY, NAME text, PRICE text)")
code_df = self.getStockInfo()
idx = 0
for item in code_df.values:
item_name = item[0]
item_code = item[1]
idx += 1
print(idx, item_name)
fnGuideData = self.get_fnguide_table(item_code)
text = json.dumps(fnGuideData, ensure_ascii=False)
cursor.execute('SELECT * FROM '+tableName+' WHERE CODE=?', (item_code, ))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO "+tableName+"(CODE, NAME, PRICE) VALUES(?, ?, ?)", (item_code, item_name, text))
else:
cursor.execute("UPDATE "+tableName+" SET PRICE=? WHERE CODE=?", (text, item_code))
cursor.close()
conn.close()
return
if __name__ == "__main__":
crawler = FnGuideCrawler()
crawler.get_fnguide_table('155660')

View File

@@ -0,0 +1,301 @@
import json
import datetime
import requests
import sqlite3
import pandas as pd
import os
class MetaCrawler:
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
limit_page_count = 10000
def __init__(self):
return
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221288761509
def crawl_stocks(self, inFileName):
tableName = 'meta_1'
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS "+tableName+" (CODE text PRIMARY KEY, NAME text, PRICE text)")
inputs = []
inputs.append( {'NAME':'USD', 'CODE':'FX_USDKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW'} ) # 미국 USD
inputs.append( {'NAME':'JPY', 'CODE':'FX_JPYKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_JPYKRW'} ) # 일본 JPY
inputs.append( {'NAME':'EUR', 'CODE':'FX_EURKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_EURKRW'} ) # 유럽연합 EUR'
inputs.append( {'NAME':'CNY', 'CODE':'FX_CNYKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_CNYKRW'} ) # 중국 CNY
inputs.append( {'NAME':'WTI', 'CODE':'OIL_CL', 'URL':'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=OIL_CL&fdtc=2'} ) # WTI
inputs.append( {'NAME':'GOLD', 'CODE':'CMDT_GC', 'URL':'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_GC&fdtc=2'} ) # 국제 금
for i in range(len(inputs)):
input = inputs[i]
meta = {}
meta["NAME"] = input['NAME']
meta["CODE"] = input['CODE']
meta["PRICE"] = []
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (meta["CODE"],))
result = cursor.fetchone()
if result == None:
lastDay = "1900.01.01"
else:
meta["PRICE"] = json.loads(result[2])
lastDay = meta['PRICE'][0]['DATE']
finish = False
for i in range(1, self.limit_page_count):
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header).text)
# 마지막 페이지 까지 받기
if len(html[0].날짜.values) <= 1:
break
for j in range(0, len(html[0].values)):
item = html[0].values[j]
if input['NAME'] in ('USD', 'JPY', 'EUR', 'CNY'):
if j == 0:
continue
if item[0] <= lastDay:
finish = True
break
meta["PRICE"].append({
"DATE": item[0], # 날짜
"close": item[1], # 매매기준율
"diff": item[2] # 전일대비
})
elif input['NAME'] in ('WTI', 'GOLD'):
if item[0] <= lastDay:
finish = True
break
meta["PRICE"].append({
"DATE": item[0], # 날짜
"close": item[1], # 종가
"diff": item[2], # 전일대비
"rate": item[3] # 등락율
})
if finish:
break
meta["PRICE"] = sorted(meta["PRICE"], key=lambda x: x['DATE'], reverse=True)
text = json.dumps(meta["PRICE"], ensure_ascii=False)
cursor.execute('SELECT * FROM '+tableName+' WHERE CODE=?', (meta["CODE"], ))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO "+tableName+"(CODE, NAME, PRICE) VALUES(?, ?, ?)", (meta["CODE"], meta["NAME"], text))
else:
cursor.execute("UPDATE "+tableName+" SET PRICE=? WHERE CODE=?", (text, meta["CODE"]))
conn.commit()
cursor.close()
conn.close()
return
# 투자자별 매매동향 (Trading_Trend) 크롤링
# (pri, 개인)
# (for, 외국인)
# (ins, 기관합)
# (ins0, 금융투자)
# (ins1, 보험)
# (ins2, 투신 (사모))
# (ins3, 은행)
# (ins4, 기타금융기관)
# (ins5, 연기금 등)
# (cor, 기타법인)
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221289696771&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
def crawl_trading_trend(self, inFileName):
tableName = 'meta_2'
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS "+tableName+" (DATE text PRIMARY KEY, pri integer, for integer, ins integer, ins0 integer, ins1 integer, ins2 integer, ins3 integer, ins4 integer, ins5 integer, cor integer)")
cursor.execute('SELECT * FROM ' + tableName + ' order by DATE desc')
result = cursor.fetchone()
if result == None:
lastDay = "1900.01.01"
else:
lastDay = result[0]
today = datetime.datetime.now().strftime("%Y%m%d")
url = 'http://finance.naver.com/sise/investorDealTrendDay.nhn?bizdate='+today+'&sosok=&page='
previousDay = ""
finish = False
for i in range(1, self.limit_page_count):
#html = pd.read_html(url + str(i), header=0)
html = pd.read_html(requests.get(url + str(i), headers=self.header).text)
# 마지막 페이지 까지 받기
if len(html[0].날짜.values) <= 2:
break
for j in range(1, len(html[0].values)):
item = html[0].values[j]
if str(item[0]) == "nan":
continue
if "20" + item[0] <= lastDay or item[0] == previousDay:
finish = True
break
meta = {
"DATE": "20"+item[0],
"pri": item[1], # 개인
"for": item[2], # 외국인
"ins": item[3], # 기관합
"ins0": item[4], # 금융투자
"ins1": item[5], # 보험
"ins2": item[6], # 투신 (사모)
"ins3": item[7], # 은행
"ins4": item[8], # 기타금융기관
"ins5": item[9], # 연기금 등
"cor": item[10]} # 기타법인
cursor.execute('SELECT * FROM ' + tableName + ' WHERE DATE=?', (meta["DATE"],))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(DATE, pri, for, ins, ins0, ins1, ins2, ins3, ins4, ins5, cor) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (meta["DATE"], meta["pri"], meta["for"], meta["ins"], meta["ins0"], meta["ins1"], meta["ins2"], meta["ins3"], meta["ins4"], meta["ins5"], meta["cor"]))
else:
cursor.execute("UPDATE " + tableName + " SET pri=?, for=?, ins=?, ins0=?, ins1=?, ins2=?, ins3=?, ins4=?, ins5=?, cor=? WHERE DATE=?", (meta["pri"], meta["for"], meta["ins"], meta["ins0"], meta["ins1"], meta["ins2"], meta["ins3"], meta["ins4"], meta["ins5"], meta["cor"], meta["DATE"]))
print ("20"+item[0])
previousDay = html[0].values[2][0]
if finish:
break
conn.commit()
cursor.close()
conn.close()
return
# 증시자금동향 (신용잔고, 펀드자금 잔고) 크롤링
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221290138187&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
def crawl_money_trend(self, inFileName):
tableName = 'meta_3'
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS "+tableName+" (DATE text PRIMARY KEY, dep1_1 integer, dep1_2 integer, dep2_1 integer, dep2_2 integer, dep3_1 integer, dep3_2 integer, dep4_1 integer, dep4_2 integer, dep5_1 integer, dep5_2 integer)")
cursor.execute('SELECT * FROM ' + tableName + ' order by DATE desc')
result = cursor.fetchone()
if result == None:
lastDay = "1900.01.01"
else:
lastDay = result[0]
previousDay = ""
url = 'http://finance.naver.com/sise/sise_deposit.nhn?&page='
finish = False
for i in range(1, self.limit_page_count):
#html = pd.read_html(url + str(i), header=0, encoding='euc-kr')
html = pd.read_html(requests.get(url + str(i), headers=self.header).text, encoding='euc-kr')
# 마지막 페이지 까지 받기
if len(html[0].날짜.values) <= 10:
break
for j in range(1, len(html[0].values)):
item = html[0].values[j]
if str(item[0]) == "nan":
continue
if "20"+item[0] <= lastDay or item[0] == previousDay:
finish = True
break
meta = {
"DATE": "20"+item[0],
"dep1_1": item[1], # 고객예탁금 누적
"dep1_2": item[2], # 고객예탁금 당일
"dep2_1": item[3], # 신용잔고 누적
"dep2_2": item[4], # 신용잔고 당일
"dep3_1": item[5], # 주식형펀드 누적
"dep3_2": item[6], # 주식형펀드 당일
"dep4_1": item[7], # 혼합형펀드 누적
"dep4_2": item[8], # 혼합형펀드 당일
"dep5_1": item[9], # 채권형펀드 누적
"dep5_2": item[10]} # 채권형펀드 당일
cursor.execute('SELECT * FROM ' + tableName + ' WHERE DATE=?', (meta["DATE"],))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(DATE, dep1_1, dep1_2, dep2_1, dep2_2, dep3_1, dep3_2, dep4_1, dep4_2, dep5_1, dep5_2) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (meta["DATE"], meta["dep1_1"], meta["dep1_2"], meta["dep2_1"], meta["dep2_2"], meta["dep3_1"], meta["dep3_2"], meta["dep4_1"], meta["dep4_2"], meta["dep5_1"], meta["dep5_2"]))
else:
cursor.execute("UPDATE " + tableName + " SET dep1_1=?, dep1_2=?, dep2_1=?, dep2_2=?, dep3_1=?, dep3_2=?, dep4_1=?, dep4_2=?, dep5_1=?, dep5_2=? WHERE DATE=?", (meta["dep1_1"], meta["dep1_2"], meta["dep2_1"], meta["dep2_2"], meta["dep3_1"], meta["dep3_2"], meta["dep4_1"], meta["dep4_2"], meta["dep5_1"], meta["dep5_2"], meta["DATE"]))
print("20"+item[0])
if finish:
break
previousDay = html[0].values[2][0]
conn.commit()
cursor.close()
conn.close()
return
# 국내 시장금리 크롤링
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221292348073&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
def crawl_interest_rates(self, inFileName):
tableName = 'meta_4'
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text PRIMARY KEY, NAME text, PRICE text)")
inputs = []
inputs.append({'NAME': '91일 CD금리', 'CODE': 'IRR_CD91', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_CD91'})
inputs.append({'NAME': '콜금리', 'CODE': 'IRR_CALL', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_CALL'})
inputs.append({'NAME': '국고채(3년)', 'CODE': 'IRR_GOVT03Y', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_GOVT03Y'})
inputs.append({'NAME': '회사채(3년)', 'CODE': 'IRR_CORP03Y', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_CORP03Y'})
for i in range(len(inputs)):
input = inputs[i]
meta = {}
meta["NAME"] = input['NAME']
meta["CODE"] = input['CODE']
meta["PRICE"] = []
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (meta["CODE"],))
result = cursor.fetchone()
if result == None:
lastDay = "1900.01.01"
else:
meta["PRICE"] = json.loads(result[2])
lastDay = meta['PRICE'][0]['DATE']
finish = False
for i in range(1, self.limit_page_count):
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header).text)
# 마지막 페이지 까지 받기
if len(html[0].날짜.values) <= 1:
break
for j in range(len(html[0].values)):
item = html[0].values[j]
if str(item[0]) == "nan":
continue
if item[0] <= lastDay:
finish = True
break
meta["PRICE"].append({
"DATE": item[0],
"close": item[1], # 종가
"diff": item[2], # 전일대비
"rate": item[3]}) # 등락율
if finish:
break
print(meta["NAME"] + " / " + item[0])
meta["PRICE"] = sorted(meta["PRICE"], key=lambda x: x['DATE'], reverse=True)
text = json.dumps(meta["PRICE"], ensure_ascii=False)
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (meta["CODE"],))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, PRICE) VALUES(?, ?, ?)", (meta["CODE"], meta["NAME"], text))
else:
cursor.execute("UPDATE " + tableName + " SET PRICE=? WHERE CODE=?", (text, meta["CODE"]))
conn.commit()
cursor.close()
conn.close()
return

View File

@@ -0,0 +1,352 @@
# https://bigdata-sk.tistory.com/10
import pandas as pd
import re
import json
import sqlite3
import requests
class Queue(object):
def __init__(self, max):
self.queue = []
self.max = max
def dequeue(self):
length = len(self.queue)
if length == 0 or length < self.max:
return -1
return self.queue.pop(0)
def enqueue(self, n):
length = len(self.queue)
if length == self.max:
self.dequeue()
self.queue.append(n)
pass
def sum(self):
sum = 0
for item in self.queue:
sum += item
return sum
def avg(self):
length = len(self.queue)
total = self.sum()
return round(total / length)
def print(self):
print(self.sum(), self.queue)
# 닐짜 형식으로 바뀐 this_date값을 확인 가능
# 읽어온 날짜 정보를 date형식으로 바꿀 일이 계속 생기므로 이 기능을 함수로 정의해줌.
# 함수명은 date_format()
class StockCrawler:
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
historical_prices = None
special_pattern = None
fnGuideCrawler = None
limit_page_count = 10000
def __init__(self):
self.historical_prices = dict()
self.special_pattern = (
'[', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', ',', '.', '?', '"', ':', ';', '{', '}', '|', '<', '>',
']', '+', '-', '/', '=', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
return
def clean_str(self, string):
string = re.sub(r"\\", " ", string)
string = re.sub(r"\'", " ", string)
string = re.sub(r"\"", " ", string)
string = re.sub(r"`", " ", string)
string = re.sub(r"-", " ", string)
string = re.sub(r"\(.*?\)", " ", string)
string = re.sub(r" ", " ", string)
return string.strip().lower()
def getStockInfo(self):
#code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header).text)[0]
# code_df = pd.read_excel('../resources/stock/상장법인목록.xls')
# 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)
# 우리가 필요한 것은 회사명과 종목코드이기 때문에 필요없는 column들은 제외해준다.
code_df = code_df[['회사명', '종목코드']]
# 한글로된 컬럼명을 영어로 바꿔준다.
code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'})
###print (code_df.head())
return code_df
# 종목 이름을 입력하면 종목에 해당하는 코드를 불러와
# 네이버 금융(http://finance.naver.com)에 넣어줌
def get_url(self, item_name, code_df):
code = code_df.query("name=='{}'".format(item_name))['code'].to_string(index=False).strip()
url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code.strip())
return code, url
def date_format(slef, d):
d = str(d).replace('-', '.')
#yyyy = int(d.split('.')[0])
#mm = int(d.split('.')[1])
#dd = int(d.split('.')[2])
#this_date = dt.date(yyyy, mm, dd)
return d
def getCodeIndex(self, stocks, item_code):
for i, stock in enumerate(stocks):
if item_code == stock['CODE']:
return i
return -1
def crawl_etf_stocks(self, inFileName):
tableName = 'stock'
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text PRIMARY KEY, NAME text, PRICE text)")
stocks = []
stocks.append({"NAME": 'KODEX 코스닥150선물인버스', "CODE": "251340", "PRICE": []})
stocks.append({"NAME": 'KODEX 코스닥150 레버리지', "CODE": "233740", "PRICE": []})
stocks.append({"NAME": 'KODEX 200선물인버스2X', "CODE": "252670", "PRICE": []})
stocks.append({"NAME": 'KODEX 레버리지', "CODE": "122630", "PRICE": []})
stocks.append({"NAME": 'KODEX 골드선물(H)', "CODE": "132030", "PRICE": []})
for stock in stocks:
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],))
result = cursor.fetchone()
if result != None:
stock["PRICE"] = json.loads(result[2])
self.crawl_specific_stock(stock)
text = json.dumps(stock['PRICE'], ensure_ascii=False)
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, PRICE) VALUES(?, ?, ?)", (stock["CODE"], stock["NAME"], text))
else:
cursor.execute("UPDATE " + tableName + " SET PRICE=? WHERE CODE=?", (text, stock["CODE"]))
conn.commit()
cursor.close()
conn.close()
return
def crawl_stocks(self, inFileName):
tableName = 'stock'
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text PRIMARY KEY, NAME text, PRICE text)")
code_df = self.getStockInfo()
items = code_df.values
idx = 0
for item in items:
idx += 1
item_name = item[0]
item_code = item[1]
print(idx, item_name, item_code)
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (item_code,))
result = cursor.fetchone()
stock = {"CODE": item_code, "NAME": item_name, "PRICE": []}
if result != None:
stock["PRICE"] = json.loads(result[2])
self.crawl_specific_stock(stock)
text = json.dumps(stock['PRICE'], ensure_ascii=False)
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, PRICE) VALUES(?, ?, ?)", (stock["CODE"], stock["NAME"], text))
else:
cursor.execute("UPDATE " + tableName + " SET PRICE=? WHERE CODE=?", (text, stock["CODE"]))
conn.commit()
cursor.close()
conn.close()
return
def get_data(self, stock):
url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=stock['CODE'].strip())
# 일자 데이터를 담을 df라는 DataFrame 정의
df = pd.DataFrame()
lastDay = ""
if len(stock) > 0 and len(stock["PRICE"]) - 1 > 0:
lastDay = stock["PRICE"][len(stock["PRICE"]) - 1]["DATE"].replace("-", ".")
lastPage = False
# 1페이지에서 1000페이지의 데이터만 가져오기
for page in range(1, self.limit_page_count):
# 최근 상장 기업의 마지막 반복되는 페이지를 제외시킨다.
pg_url = '{url}&page={page}'.format(url=url, page=page)
#html = pd.read_html(pg_url, header=0)
html = pd.read_html(requests.get(pg_url, headers=self.header).text)
count = 0
for date in html[0].날짜.values:
if type(date) is str:
count += 1
if date == lastDay:
lastPage = True
df = df.append(html[0], ignore_index=True)
break
if count == 10:
df = df.append(html[0], ignore_index=True)
else:
if lastPage == False:
df = df.append(html[0], ignore_index=True)
lastPage = True
else:
break
# df.dropna()를 이용해 결측값 있는 행 제거
df = df.dropna()
# 상위 5개 데이터 확인하기
###print (df.head())
# 한글로 된 컬럼명을 영어로 바꿔줌
df = df.rename(columns={'날짜': 'date', '종가': 'close', '전일비': 'diff', '시가': 'open', '고가': 'high', '저가': 'low',
'거래량': 'volume'})
# 데이터의 타입을 int형으로 바꿔줌
df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[
['close', 'diff', 'open', 'high', 'low', 'volume']].astype(int)
# 컬럼명 'date'의 타입을 date로 바꿔줌
df['date'] = pd.to_datetime(df['date'])
# 일자(date)를 기준으로 오름차순 정렬
# df = df.sort_values(by=['date'], ascending=True)
# 상위 5개 데이터 확인
###print (df.head())
if len(stock) > 0 and len(stock["PRICE"]) - 1 > 0:
lastDay = stock["PRICE"][len(stock["PRICE"]) - 1]["DATE"]
for values in df.values:
day = str(values[0]).split(' ')[0]
if lastDay == day:
break
stock["PRICE"].append({
"DATE": day,
df.columns[1]: values[1],
df.columns[2]: values[2],
df.columns[3]: values[3],
df.columns[4]: values[4],
df.columns[5]: values[5],
df.columns[6]: values[6],
})
# stock["PRICE"] = sorted(stock["PRICE"], key=lambda x: x['DATE'], reverse=True)
stock["PRICE"] = sorted(stock["PRICE"], key=lambda x: x['DATE'])
return
def get_moving_avg(self, stock):
q_3 = Queue(3)
q_5 = Queue(5)
q_7 = Queue(7)
q_10 = Queue(10)
q_20 = Queue(20)
q_30 = Queue(30)
q_60 = Queue(60)
q_90 = Queue(90)
q_100 = Queue(100)
q_120 = Queue(120)
q_150 = Queue(150)
q_180 = Queue(180)
q_200 = Queue(200)
q_240 = Queue(240)
for i in range(len(stock['PRICE'])):
q_3.enqueue(stock['PRICE'][i]['close'])
q_5.enqueue(stock['PRICE'][i]['close'])
q_7.enqueue(stock['PRICE'][i]['close'])
q_10.enqueue(stock['PRICE'][i]['close'])
q_20.enqueue(stock['PRICE'][i]['close'])
q_30.enqueue(stock['PRICE'][i]['close'])
q_60.enqueue(stock['PRICE'][i]['close'])
q_90.enqueue(stock['PRICE'][i]['close'])
q_100.enqueue(stock['PRICE'][i]['close'])
q_120.enqueue(stock['PRICE'][i]['close'])
q_150.enqueue(stock['PRICE'][i]['close'])
q_180.enqueue(stock['PRICE'][i]['close'])
q_200.enqueue(stock['PRICE'][i]['close'])
q_240.enqueue(stock['PRICE'][i]['close'])
stock['PRICE'][i]['avg3'] = q_3.avg()
stock['PRICE'][i]['avg5'] = q_5.avg()
stock['PRICE'][i]['avg7'] = q_7.avg()
stock['PRICE'][i]['avg10'] = q_10.avg()
stock['PRICE'][i]['avg20'] = q_20.avg()
stock['PRICE'][i]['avg30'] = q_30.avg()
stock['PRICE'][i]['avg60'] = q_60.avg()
stock['PRICE'][i]['avg90'] = q_90.avg()
stock['PRICE'][i]['avg100'] = q_100.avg()
stock['PRICE'][i]['avg120'] = q_120.avg()
stock['PRICE'][i]['avg150'] = q_150.avg()
stock['PRICE'][i]['avg180'] = q_180.avg()
stock['PRICE'][i]['avg200'] = q_200.avg()
stock['PRICE'][i]['avg240'] = q_240.avg()
return
def crawl_specific_stock(self, stock):
# 데이터 수집
self.get_data(stock)
# 이동 평균 계산
self.get_moving_avg(stock)
return
def update(self, inFileName, outFileName):
"""
Full json 데이터를 db에 import 시킴
inFileName = PROJECT_HOME + '/resources/stock.json.full'
outFileName = PROJECT_HOME + '/resources/stock.db'
crawler = StockCrawler()
crawler.update(inFileName, outFileName)
:param inFileName:
:param outFileName:
:return:
"""
tableName = 'stock'
conn = sqlite3.connect(outFileName, isolation_level=None)
cursor = conn.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text PRIMARY KEY, NAME text, PRICE text)")
idx = 0
inFp = open(inFileName, 'r')
for line in inFp.readlines():
if line:
idx += 1
stock = json.loads(line)
print(idx, stock["CODE"], stock["NAME"])
text = json.dumps(stock["PRICE"], ensure_ascii=False)
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, PRICE) VALUES(?, ?, ?)", (stock["CODE"], stock["NAME"], text))
else:
cursor.execute("UPDATE " + tableName + " SET PRICE=? WHERE CODE=?", (text, stock["CODE"]))
return