init
This commit is contained in:
80
stockpredictor_back/crawler/toSQLite/Crawler.py
Normal file
80
stockpredictor_back/crawler/toSQLite/Crawler.py
Normal file
@@ -0,0 +1,80 @@
|
||||
import os
|
||||
import shutil
|
||||
import datetime
|
||||
import time
|
||||
from stockpredictor.crawler.toSQLite.FnGuideCrawler import FnGuideCrawler
|
||||
from stockpredictor.crawler.toSQLite.MetaCrawler import MetaCrawler
|
||||
from stockpredictor.crawler.toSQLite.StockCrawler import StockCrawler
|
||||
from stockpredictor.analysis.Analyzer import Analyzer
|
||||
|
||||
today = datetime.datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
# DB Browser for SQLite: http://hleecaster.com/python-sqlite3/
|
||||
|
||||
PROJECT_HOME = os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(__file__))))))))
|
||||
|
||||
|
||||
start = time.time()
|
||||
inFnguideFileName = PROJECT_HOME + '/resources/fnguide.db'
|
||||
|
||||
"""
|
||||
# 재무제표는 3개월마다 다운로드를 한다.
|
||||
crawler = FnGuideCrawler()
|
||||
print("[KOSPI 상장기업 재무제표 다운로드]")
|
||||
crawler.crawl_fnguide(inFnguideFileName)
|
||||
|
||||
print("\n[증시자금동향 (신용잔고, 펀드자금 잔고)]")
|
||||
inFileName = PROJECT_HOME + '/resources/meta_3.db'
|
||||
crawler.crawl_money_trend(inFileName)
|
||||
|
||||
print("\n[국내 시장금리]")
|
||||
inFileName = PROJECT_HOME + '/resources/meta_4.db'
|
||||
crawler.crawl_interest_rates(inFileName)
|
||||
"""
|
||||
|
||||
|
||||
crawler = MetaCrawler()
|
||||
print("\n[투자자별 매매동향(Trading_Trend)]")
|
||||
inFileName = PROJECT_HOME + '/resources/meta_2.db'
|
||||
crawler.crawl_trading_trend(inFileName)
|
||||
|
||||
|
||||
print("\n[환율 (USD, JPY, EUR, CNY), 원유 (WTI), 국제금]")
|
||||
inFileName = PROJECT_HOME + '/resources/meta_1.db'
|
||||
crawler.crawl_stocks(inFileName)
|
||||
|
||||
|
||||
print("\n[종목 다운로드]")
|
||||
inFileName = PROJECT_HOME + '/resources/stock.db'
|
||||
crawler = StockCrawler()
|
||||
crawler.crawl_etf_stocks(inFileName)
|
||||
crawler.crawl_stocks(inFileName)
|
||||
|
||||
|
||||
print("\n[지수 저장]")
|
||||
kospiFileName = PROJECT_HOME + '/resources/kospi.tsv'
|
||||
kosdakFileName = PROJECT_HOME + '/resources/kosdak.tsv'
|
||||
outFileName = PROJECT_HOME + '/resources/stock.db'
|
||||
crawler = StockCrawler()
|
||||
crawler.saveIndex("KOSPI", kospiFileName, outFileName)
|
||||
crawler.saveIndex("KOSDAK", kosdakFileName, outFileName)
|
||||
|
||||
print("\n[종목 분석]")
|
||||
# S: 분석까지 진행
|
||||
inFileName = PROJECT_HOME + '/resources/stock.db'
|
||||
analyzer = Analyzer(PROJECT_HOME, inFileName, inFnguideFileName)
|
||||
analyzer.analyze()
|
||||
|
||||
print("\n[종목 결정]")
|
||||
day = datetime.datetime.today().strftime("%Y%m%d")
|
||||
outPath = PROJECT_HOME + "/resources/analysis/" + day
|
||||
if os.path.isdir(outPath):
|
||||
shutil.rmtree(outPath)
|
||||
os.mkdir(outPath)
|
||||
print("print to Html...")
|
||||
analyzer.analyzeToHtml(outPath)
|
||||
# E: 분석까지 진행
|
||||
|
||||
print("time : %6.2f 초", (time.time() - start))
|
||||
|
||||
print ("done...")
|
||||
129
stockpredictor_back/crawler/toSQLite/FnGuideCrawler.py
Normal file
129
stockpredictor_back/crawler/toSQLite/FnGuideCrawler.py
Normal file
@@ -0,0 +1,129 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from pandas import DataFrame, Series
|
||||
import requests as re
|
||||
import pandas as pd
|
||||
import os
|
||||
import json
|
||||
import sqlite3
|
||||
import requests
|
||||
|
||||
class FnGuideCrawler:
|
||||
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
|
||||
|
||||
def getStockInfo(self):
|
||||
code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
|
||||
#code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header).text)
|
||||
|
||||
# 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
|
||||
code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)
|
||||
|
||||
# 우리가 필요한 것은 회사명과 종목코드이기 때문에 필요없는 column들은 제외해준다.
|
||||
code_df = code_df[['회사명', '종목코드']]
|
||||
|
||||
# 한글로된 컬럼명을 영어로 바꿔준다.
|
||||
code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'})
|
||||
###print (code_df.head())
|
||||
|
||||
return code_df
|
||||
|
||||
# FnGuide에서 크롤링한 KOSPI 상장기업의 재무제표
|
||||
# http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221294884955&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
|
||||
def get_fnguide_table(self, code):
|
||||
url = re.get('http://comp.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A%s'%(code.strip()))
|
||||
url = url.content
|
||||
|
||||
html = BeautifulSoup(url,'html.parser')
|
||||
body = html.find('body')
|
||||
|
||||
try:
|
||||
fn_body = body.find('div', {'class': 'fng_body asp_body'})
|
||||
ur_table = fn_body.find('div', {'id': 'div15'})
|
||||
table = ur_table.find('div', {'id': 'highlight_D_Y'})
|
||||
|
||||
tbody = table.find('tbody')
|
||||
tr = tbody.find_all('tr')
|
||||
Table = DataFrame()
|
||||
except:
|
||||
return {}
|
||||
|
||||
for i in tr:
|
||||
''' 자료 항목 가져오기'''
|
||||
category = i.find('span', {'class': 'txt_acd'})
|
||||
|
||||
if category == None:
|
||||
category = i.find('th')
|
||||
|
||||
category = category.text.strip()
|
||||
|
||||
'''값 가져오기'''
|
||||
value_list = []
|
||||
|
||||
j = i.find_all('td', {'class': 'r'})
|
||||
|
||||
for value in j:
|
||||
temp = value.text.replace(',', '').strip()
|
||||
|
||||
try:
|
||||
temp = float(temp)
|
||||
value_list.append(temp)
|
||||
except:
|
||||
value_list.append(0)
|
||||
|
||||
Table['%s' % (category)] = value_list
|
||||
|
||||
''' 기간 가져오기 '''
|
||||
thead = table.find('thead')
|
||||
tr_2 = thead.find('tr', {'class': 'td_gapcolor2'}).find_all('th')
|
||||
|
||||
year_list = []
|
||||
|
||||
for i in tr_2:
|
||||
try:
|
||||
temp_year = i.find('span', {'class': 'txt_acd'}).text
|
||||
except:
|
||||
temp_year = i.text
|
||||
|
||||
temp_year = temp_year.replace("/",".")+".01"
|
||||
year_list.append(temp_year)
|
||||
|
||||
Table.index = year_list
|
||||
|
||||
return Table.T.to_dict()
|
||||
|
||||
def crawl_fnguide(self, inFileName):
|
||||
tableName = 'fnguide'
|
||||
conn = sqlite3.connect(inFileName, isolation_level=None)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS "+tableName+" (CODE text PRIMARY KEY, NAME text, PRICE text)")
|
||||
|
||||
code_df = self.getStockInfo()
|
||||
idx = 0
|
||||
for item in code_df.values:
|
||||
item_name = item[0]
|
||||
item_code = item[1]
|
||||
|
||||
idx += 1
|
||||
print(idx, item_name)
|
||||
|
||||
fnGuideData = self.get_fnguide_table(item_code)
|
||||
text = json.dumps(fnGuideData, ensure_ascii=False)
|
||||
|
||||
cursor.execute('SELECT * FROM '+tableName+' WHERE CODE=?', (item_code, ))
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
cursor.execute("INSERT INTO "+tableName+"(CODE, NAME, PRICE) VALUES(?, ?, ?)", (item_code, item_name, text))
|
||||
else:
|
||||
cursor.execute("UPDATE "+tableName+" SET PRICE=? WHERE CODE=?", (text, item_code))
|
||||
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return
|
||||
|
||||
if __name__ == "__main__":
|
||||
crawler = FnGuideCrawler()
|
||||
#crawler.get_fnguide_table('155660')
|
||||
|
||||
PROJECT_HOME = os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(__file__))))))))
|
||||
inFnguideFileName = PROJECT_HOME + '/resources/fnguide.db'
|
||||
crawler = FnGuideCrawler()
|
||||
crawler.crawl_fnguide(inFnguideFileName)
|
||||
341
stockpredictor_back/crawler/toSQLite/MetaCrawler.py
Normal file
341
stockpredictor_back/crawler/toSQLite/MetaCrawler.py
Normal file
@@ -0,0 +1,341 @@
|
||||
import json
|
||||
import datetime
|
||||
import requests
|
||||
import sqlite3
|
||||
from time import sleep
|
||||
import pandas as pd
|
||||
import os
|
||||
|
||||
class MetaCrawler:
|
||||
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
|
||||
limit_page_count = 10000
|
||||
|
||||
def __init__(self):
|
||||
return
|
||||
|
||||
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221288761509
|
||||
def crawl_stocks(self, inFileName):
|
||||
tableName = 'meta_1'
|
||||
conn = sqlite3.connect(inFileName)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS "+tableName+" (CODE text PRIMARY KEY, NAME text, PRICE text)")
|
||||
|
||||
inputs = []
|
||||
inputs.append( {'NAME':'USD', 'CODE':'FX_USDKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW'} ) # 미국 USD
|
||||
inputs.append( {'NAME':'JPY', 'CODE':'FX_JPYKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_JPYKRW'} ) # 일본 JPY
|
||||
inputs.append( {'NAME':'EUR', 'CODE':'FX_EURKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_EURKRW'} ) # 유럽연합 EUR'
|
||||
inputs.append( {'NAME':'CNY', 'CODE':'FX_CNYKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_CNYKRW'} ) # 중국 CNY
|
||||
inputs.append( {'NAME':'WTI', 'CODE':'OIL_CL', 'URL':'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=OIL_CL&fdtc=2'} ) # WTI
|
||||
inputs.append( {'NAME':'GOLD', 'CODE':'CMDT_GC', 'URL':'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_GC&fdtc=2'} ) # 국제 금
|
||||
|
||||
for i in range(len(inputs)):
|
||||
input = inputs[i]
|
||||
meta = {}
|
||||
meta["NAME"] = input['NAME']
|
||||
meta["CODE"] = input['CODE']
|
||||
meta["PRICE"] = []
|
||||
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (meta["CODE"],))
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
lastDay = "1900.01.01"
|
||||
else:
|
||||
meta["PRICE"] = json.loads(result[2])
|
||||
lastDay = meta['PRICE'][0]['DATE']
|
||||
|
||||
finish = False
|
||||
for i in range(1, self.limit_page_count):
|
||||
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
|
||||
html = None
|
||||
while True:
|
||||
try:
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
break
|
||||
except:
|
||||
print(input['URL'] + '&page=%s' % i)
|
||||
if i > 200:
|
||||
break
|
||||
continue
|
||||
|
||||
# 마지막 페이지 까지 받기
|
||||
if len(html[0].날짜.values) <= 1:
|
||||
break
|
||||
|
||||
for j in range(0, len(html[0].values)):
|
||||
item = html[0].values[j]
|
||||
if input['NAME'] in ('USD', 'JPY', 'EUR', 'CNY'):
|
||||
if j == 0:
|
||||
continue
|
||||
if item[0] <= lastDay:
|
||||
finish = True
|
||||
break
|
||||
meta["PRICE"].append({
|
||||
"DATE": item[0], # 날짜
|
||||
"close": item[1], # 매매기준율
|
||||
"diff": item[2] # 전일대비
|
||||
})
|
||||
elif input['NAME'] in ('WTI', 'GOLD'):
|
||||
if item[0] <= lastDay:
|
||||
finish = True
|
||||
break
|
||||
meta["PRICE"].append({
|
||||
"DATE": item[0], # 날짜
|
||||
"close": item[1], # 종가
|
||||
"diff": item[2], # 전일대비
|
||||
"rate": item[3] # 등락율
|
||||
})
|
||||
if finish:
|
||||
break
|
||||
|
||||
meta["PRICE"] = sorted(meta["PRICE"], key=lambda x: x['DATE'], reverse=True)
|
||||
text = json.dumps(meta["PRICE"], ensure_ascii=False)
|
||||
|
||||
cursor.execute('SELECT * FROM '+tableName+' WHERE CODE=?', (meta["CODE"], ))
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
cursor.execute("INSERT INTO "+tableName+"(CODE, NAME, PRICE) VALUES(?, ?, ?)", (meta["CODE"], meta["NAME"], text))
|
||||
else:
|
||||
cursor.execute("UPDATE "+tableName+" SET PRICE=? WHERE CODE=?", (text, meta["CODE"]))
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return
|
||||
|
||||
# 투자자별 매매동향 (Trading_Trend) 크롤링
|
||||
# (pri, 개인)
|
||||
# (for, 외국인)
|
||||
# (ins, 기관합)
|
||||
# (ins0, 금융투자)
|
||||
# (ins1, 보험)
|
||||
# (ins2, 투신 (사모))
|
||||
# (ins3, 은행)
|
||||
# (ins4, 기타금융기관)
|
||||
# (ins5, 연기금 등)
|
||||
# (cor, 기타법인)
|
||||
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221289696771&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
|
||||
def crawl_trading_trend(self, inFileName):
|
||||
tableName = 'meta_2'
|
||||
conn = sqlite3.connect(inFileName)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS "+tableName+" (DATE text PRIMARY KEY, pri integer, for integer, ins integer, ins0 integer, ins1 integer, ins2 integer, ins3 integer, ins4 integer, ins5 integer, cor integer)")
|
||||
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' order by DATE desc')
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
lastDay = "1900.01.01"
|
||||
else:
|
||||
lastDay = result[0]
|
||||
|
||||
today = datetime.datetime.now().strftime("%Y%m%d")
|
||||
url = 'http://finance.naver.com/sise/investorDealTrendDay.nhn?bizdate='+today+'&sosok=&page='
|
||||
|
||||
previousDay = ""
|
||||
finish = False
|
||||
for i in range(1, self.limit_page_count):
|
||||
#html = pd.read_html(url + str(i), header=0)
|
||||
html = None
|
||||
while True:
|
||||
try:
|
||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
break
|
||||
except:
|
||||
print(url + str(i))
|
||||
if i > 200:
|
||||
break
|
||||
continue
|
||||
|
||||
# 마지막 페이지 까지 받기
|
||||
if len(html[0].날짜.values) <= 2:
|
||||
break
|
||||
|
||||
for j in range(1, len(html[0].values)):
|
||||
item = html[0].values[j]
|
||||
if str(item[0]) == "nan":
|
||||
continue
|
||||
if "20" + item[0] <= lastDay or item[0] == previousDay:
|
||||
finish = True
|
||||
break
|
||||
meta = {
|
||||
"DATE": "20"+item[0],
|
||||
"pri": item[1], # 개인
|
||||
"for": item[2], # 외국인
|
||||
"ins": item[3], # 기관합
|
||||
"ins0": item[4], # 금융투자
|
||||
"ins1": item[5], # 보험
|
||||
"ins2": item[6], # 투신 (사모)
|
||||
"ins3": item[7], # 은행
|
||||
"ins4": item[8], # 기타금융기관
|
||||
"ins5": item[9], # 연기금 등
|
||||
"cor": item[10]} # 기타법인
|
||||
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' WHERE DATE=?', (meta["DATE"],))
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
cursor.execute("INSERT INTO " + tableName + "(DATE, pri, for, ins, ins0, ins1, ins2, ins3, ins4, ins5, cor) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (meta["DATE"], meta["pri"], meta["for"], meta["ins"], meta["ins0"], meta["ins1"], meta["ins2"], meta["ins3"], meta["ins4"], meta["ins5"], meta["cor"]))
|
||||
else:
|
||||
cursor.execute("UPDATE " + tableName + " SET pri=?, for=?, ins=?, ins0=?, ins1=?, ins2=?, ins3=?, ins4=?, ins5=?, cor=? WHERE DATE=?", (meta["pri"], meta["for"], meta["ins"], meta["ins0"], meta["ins1"], meta["ins2"], meta["ins3"], meta["ins4"], meta["ins5"], meta["cor"], meta["DATE"]))
|
||||
|
||||
print ("20"+item[0])
|
||||
previousDay = html[0].values[2][0]
|
||||
if finish:
|
||||
break
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
return
|
||||
|
||||
# 증시자금동향 (신용잔고, 펀드자금 잔고) 크롤링
|
||||
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221290138187&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
|
||||
def crawl_money_trend(self, inFileName):
|
||||
tableName = 'meta_3'
|
||||
conn = sqlite3.connect(inFileName)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS "+tableName+" (DATE text PRIMARY KEY, dep1_1 integer, dep1_2 integer, dep2_1 integer, dep2_2 integer, dep3_1 integer, dep3_2 integer, dep4_1 integer, dep4_2 integer, dep5_1 integer, dep5_2 integer)")
|
||||
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' order by DATE desc')
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
lastDay = "1900.01.01"
|
||||
else:
|
||||
lastDay = result[0]
|
||||
previousDay = ""
|
||||
|
||||
url = 'http://finance.naver.com/sise/sise_deposit.nhn?&page='
|
||||
|
||||
finish = False
|
||||
for i in range(1, self.limit_page_count):
|
||||
#html = pd.read_html(url + str(i), header=0, encoding='euc-kr')
|
||||
html = None
|
||||
while True:
|
||||
try:
|
||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=30).text, encoding='euc-kr')
|
||||
sleep(0.5)
|
||||
break
|
||||
except:
|
||||
print(url + str(i))
|
||||
if i > 200:
|
||||
break
|
||||
continue
|
||||
|
||||
# 마지막 페이지 까지 받기
|
||||
if len(html[0].날짜.values) <= 10:
|
||||
break
|
||||
for j in range(1, len(html[0].values)):
|
||||
item = html[0].values[j]
|
||||
if str(item[0]) == "nan":
|
||||
continue
|
||||
if "20"+item[0] <= lastDay or item[0] == previousDay:
|
||||
finish = True
|
||||
break
|
||||
meta = {
|
||||
"DATE": "20"+item[0],
|
||||
"dep1_1": item[1], # 고객예탁금 누적
|
||||
"dep1_2": item[2], # 고객예탁금 당일
|
||||
"dep2_1": item[3], # 신용잔고 누적
|
||||
"dep2_2": item[4], # 신용잔고 당일
|
||||
"dep3_1": item[5], # 주식형펀드 누적
|
||||
"dep3_2": item[6], # 주식형펀드 당일
|
||||
"dep4_1": item[7], # 혼합형펀드 누적
|
||||
"dep4_2": item[8], # 혼합형펀드 당일
|
||||
"dep5_1": item[9], # 채권형펀드 누적
|
||||
"dep5_2": item[10]} # 채권형펀드 당일
|
||||
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' WHERE DATE=?', (meta["DATE"],))
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
cursor.execute("INSERT INTO " + tableName + "(DATE, dep1_1, dep1_2, dep2_1, dep2_2, dep3_1, dep3_2, dep4_1, dep4_2, dep5_1, dep5_2) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (meta["DATE"], meta["dep1_1"], meta["dep1_2"], meta["dep2_1"], meta["dep2_2"], meta["dep3_1"], meta["dep3_2"], meta["dep4_1"], meta["dep4_2"], meta["dep5_1"], meta["dep5_2"]))
|
||||
else:
|
||||
cursor.execute("UPDATE " + tableName + " SET dep1_1=?, dep1_2=?, dep2_1=?, dep2_2=?, dep3_1=?, dep3_2=?, dep4_1=?, dep4_2=?, dep5_1=?, dep5_2=? WHERE DATE=?", (meta["dep1_1"], meta["dep1_2"], meta["dep2_1"], meta["dep2_2"], meta["dep3_1"], meta["dep3_2"], meta["dep4_1"], meta["dep4_2"], meta["dep5_1"], meta["dep5_2"], meta["DATE"]))
|
||||
|
||||
print("20"+item[0])
|
||||
if finish:
|
||||
break
|
||||
previousDay = html[0].values[2][0]
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return
|
||||
|
||||
# 국내 시장금리 크롤링
|
||||
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221292348073&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
|
||||
def crawl_interest_rates(self, inFileName):
|
||||
tableName = 'meta_4'
|
||||
conn = sqlite3.connect(inFileName)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text PRIMARY KEY, NAME text, PRICE text)")
|
||||
|
||||
inputs = []
|
||||
inputs.append({'NAME': '91일 CD금리', 'CODE': 'IRR_CD91', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_CD91'})
|
||||
inputs.append({'NAME': '콜금리', 'CODE': 'IRR_CALL', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_CALL'})
|
||||
inputs.append({'NAME': '국고채(3년)', 'CODE': 'IRR_GOVT03Y', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_GOVT03Y'})
|
||||
inputs.append({'NAME': '회사채(3년)', 'CODE': 'IRR_CORP03Y', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_CORP03Y'})
|
||||
|
||||
for i in range(len(inputs)):
|
||||
input = inputs[i]
|
||||
meta = {}
|
||||
meta["NAME"] = input['NAME']
|
||||
meta["CODE"] = input['CODE']
|
||||
meta["PRICE"] = []
|
||||
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (meta["CODE"],))
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
lastDay = "1900.01.01"
|
||||
else:
|
||||
meta["PRICE"] = json.loads(result[2])
|
||||
lastDay = meta['PRICE'][0]['DATE']
|
||||
|
||||
finish = False
|
||||
for i in range(1, self.limit_page_count):
|
||||
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
|
||||
html = None
|
||||
while True:
|
||||
try:
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
break
|
||||
except:
|
||||
print(input['URL'] + '&page=%s' % i)
|
||||
if i > 200:
|
||||
break
|
||||
continue
|
||||
|
||||
# 마지막 페이지 까지 받기
|
||||
if len(html[0].날짜.values) <= 1:
|
||||
break
|
||||
|
||||
for j in range(len(html[0].values)):
|
||||
item = html[0].values[j]
|
||||
if str(item[0]) == "nan":
|
||||
continue
|
||||
if item[0] <= lastDay:
|
||||
finish = True
|
||||
break
|
||||
meta["PRICE"].append({
|
||||
"DATE": item[0],
|
||||
"close": item[1], # 종가
|
||||
"diff": item[2], # 전일대비
|
||||
"rate": item[3]}) # 등락율
|
||||
if finish:
|
||||
break
|
||||
print(meta["NAME"] + " / " + item[0])
|
||||
meta["PRICE"] = sorted(meta["PRICE"], key=lambda x: x['DATE'], reverse=True)
|
||||
text = json.dumps(meta["PRICE"], ensure_ascii=False)
|
||||
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (meta["CODE"],))
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, PRICE) VALUES(?, ?, ?)", (meta["CODE"], meta["NAME"], text))
|
||||
else:
|
||||
cursor.execute("UPDATE " + tableName + " SET PRICE=? WHERE CODE=?", (text, meta["CODE"]))
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return
|
||||
464
stockpredictor_back/crawler/toSQLite/StockCrawler.py
Normal file
464
stockpredictor_back/crawler/toSQLite/StockCrawler.py
Normal file
@@ -0,0 +1,464 @@
|
||||
# https://bigdata-sk.tistory.com/10
|
||||
|
||||
import pandas as pd
|
||||
import re
|
||||
import json
|
||||
import sqlite3
|
||||
import requests
|
||||
import math
|
||||
import time
|
||||
from time import sleep
|
||||
|
||||
class Queue(object):
|
||||
def __init__(self, max):
|
||||
self.queue = []
|
||||
self.max = max
|
||||
|
||||
def dequeue(self):
|
||||
length = len(self.queue)
|
||||
if length == 0 or length < self.max:
|
||||
return -1
|
||||
return self.queue.pop(0)
|
||||
|
||||
def enqueue(self, n):
|
||||
length = len(self.queue)
|
||||
if length == self.max:
|
||||
self.dequeue()
|
||||
|
||||
self.queue.append(n)
|
||||
pass
|
||||
|
||||
def sum(self):
|
||||
sum = 0
|
||||
for item in self.queue:
|
||||
sum += item
|
||||
return sum
|
||||
|
||||
def avg(self):
|
||||
length = len(self.queue)
|
||||
total = self.sum()
|
||||
return round(total / length)
|
||||
|
||||
def print(self):
|
||||
print(self.sum(), self.queue)
|
||||
|
||||
# 닐짜 형식으로 바뀐 this_date값을 확인 가능
|
||||
# 읽어온 날짜 정보를 date형식으로 바꿀 일이 계속 생기므로 이 기능을 함수로 정의해줌.
|
||||
# 함수명은 date_format()
|
||||
|
||||
class StockCrawler:
|
||||
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
|
||||
|
||||
historical_prices = None
|
||||
special_pattern = None
|
||||
fnGuideCrawler = None
|
||||
|
||||
limit_page_count = 10000
|
||||
|
||||
def __init__(self):
|
||||
self.historical_prices = dict()
|
||||
self.special_pattern = (
|
||||
'[', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', ',', '.', '?', '"', ':', ';', '{', '}', '|', '<', '>',
|
||||
']', '+', '-', '/', '=', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
|
||||
|
||||
return
|
||||
|
||||
def clean_str(self, string):
|
||||
string = re.sub(r"\\", " ", string)
|
||||
string = re.sub(r"\'", " ", string)
|
||||
string = re.sub(r"\"", " ", string)
|
||||
string = re.sub(r"`", " ", string)
|
||||
string = re.sub(r"-", " ", string)
|
||||
string = re.sub(r"\(.*?\)", " ", string)
|
||||
string = re.sub(r" ", " ", string)
|
||||
|
||||
return string.strip().lower()
|
||||
|
||||
def getStockInfo(self):
|
||||
#code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
|
||||
code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header, timeout=30).text)[0]
|
||||
# code_df = pd.read_excel('../resources/stock/상장법인목록.xls')
|
||||
|
||||
# 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
|
||||
code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)
|
||||
|
||||
# 우리가 필요한 것은 회사명과 종목코드이기 때문에 필요없는 column들은 제외해준다.
|
||||
code_df = code_df[['회사명', '종목코드']]
|
||||
|
||||
# 한글로된 컬럼명을 영어로 바꿔준다.
|
||||
code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'})
|
||||
###print (code_df.head())
|
||||
|
||||
return code_df
|
||||
|
||||
# 종목 이름을 입력하면 종목에 해당하는 코드를 불러와
|
||||
# 네이버 금융(http://finance.naver.com)에 넣어줌
|
||||
def get_url(self, item_name, code_df):
|
||||
code = code_df.query("name=='{}'".format(item_name))['code'].to_string(index=False).strip()
|
||||
url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code.strip())
|
||||
|
||||
return code, url
|
||||
|
||||
def date_format(slef, d):
|
||||
d = str(d).replace('-', '.')
|
||||
#yyyy = int(d.split('.')[0])
|
||||
#mm = int(d.split('.')[1])
|
||||
#dd = int(d.split('.')[2])
|
||||
#this_date = dt.date(yyyy, mm, dd)
|
||||
return d
|
||||
|
||||
def getCodeIndex(self, stocks, item_code):
|
||||
for i, stock in enumerate(stocks):
|
||||
if item_code == stock['CODE']:
|
||||
return i
|
||||
return -1
|
||||
|
||||
def crawl_etf_stocks(self, inFileName):
|
||||
tableName = 'stock'
|
||||
conn = sqlite3.connect(inFileName)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text PRIMARY KEY, NAME text, PRICE text, MACD text, STOCHASTIC text, ICHIMOKU text, RSI text, BOLINGERBAND text)")
|
||||
|
||||
stocks = []
|
||||
stocks.append({"NAME": 'KODEX 코스닥150선물인버스', "CODE": "251340", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 코스닥150 레버리지', "CODE": "233740", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 200선물인버스2X', "CODE": "252670", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 레버리지', "CODE": "122630", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 인버스', "CODE": "114800", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 중국본토CSI300', "CODE": "283580", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 심천ChiNext(합성)', "CODE": "256750", "PRICE": []})
|
||||
stocks.append({"NAME": 'KINDEX 블룸버그베트남VN30선물레버리지(H)', "CODE": "371130", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 미국S&P바이오(합성)', "CODE": "185680", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 미국S&P에너지(합성)', "CODE": "218420", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 골드선물(H)', "CODE": "132030", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 콩선물(H)', "CODE": "138920", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 3대농산물선물(H)', "CODE": "271060", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 건설', "CODE": "117700", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 헬스케어', "CODE": "266420", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 글로벌4차산업로보틱스(합성)', "CODE": "276990", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 바이오', "CODE": "244580", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 반도체', "CODE": "091160", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 보험', "CODE": "140700", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 필수소비재', "CODE": "266410", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 2차전지산업', "CODE": "305720", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 경기소비재', "CODE": "266390", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 철강', "CODE": "117680", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 에너지화학', "CODE": "117460", "PRICE": []})
|
||||
stocks.append({"NAME": 'KODEX 은행', "CODE": "091170", "PRICE": []})
|
||||
stocks.append({"NAME": 'TIGER 탄소효율그린뉴딜', "CODE": "376410", "PRICE": []})
|
||||
|
||||
start_time = time.time()
|
||||
for i, stock in enumerate(stocks):
|
||||
print (i, stock["NAME"], stock["CODE"], (time.time()-start_time), "s")
|
||||
start_time = time.time()
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],))
|
||||
result = cursor.fetchone()
|
||||
if result is not None:
|
||||
stock["PRICE"] = json.loads(result[2])
|
||||
self.crawl_specific_stock(stock)
|
||||
text = json.dumps(stock['PRICE'], ensure_ascii=False)
|
||||
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],))
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, PRICE) VALUES(?, ?, ?)", (stock["CODE"], stock["NAME"], text))
|
||||
else:
|
||||
cursor.execute("UPDATE " + tableName + " SET PRICE=? WHERE CODE=?", (text, stock["CODE"]))
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return
|
||||
|
||||
def crawl_stocks(self, inFileName):
|
||||
tableName = 'stock'
|
||||
conn = sqlite3.connect(inFileName)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text PRIMARY KEY, NAME text, PRICE text, MACD text, STOCHASTIC text, ICHIMOKU text, RSI text, BOLINGERBAND text)")
|
||||
|
||||
code_df = self.getStockInfo()
|
||||
items = code_df.values
|
||||
|
||||
start_time = time.time()
|
||||
idx = 0
|
||||
for item in items:
|
||||
idx += 1
|
||||
|
||||
item_name = item[0]
|
||||
item_code = item[1]
|
||||
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (item_code,))
|
||||
result = cursor.fetchone()
|
||||
stock = {"CODE": item_code, "NAME": item_name, "PRICE": []}
|
||||
if result is not None:
|
||||
stock["PRICE"] = json.loads(result[2])
|
||||
|
||||
self.crawl_specific_stock(stock)
|
||||
text = json.dumps(stock['PRICE'], ensure_ascii=False)
|
||||
|
||||
print(idx, item_name, item_code, (time.time()-start_time),"s")
|
||||
start_time = time.time()
|
||||
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],))
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, PRICE) VALUES(?, ?, ?)", (stock["CODE"], stock["NAME"], text))
|
||||
else:
|
||||
cursor.execute("UPDATE " + tableName + " SET PRICE=? WHERE CODE=?", (text, stock["CODE"]))
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return
|
||||
|
||||
|
||||
def get_data(self, stock):
|
||||
url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=stock['CODE'].strip())
|
||||
|
||||
# 일자 데이터를 담을 df라는 DataFrame 정의
|
||||
df = pd.DataFrame()
|
||||
|
||||
lastDay = ""
|
||||
if len(stock) > 0 and len(stock["PRICE"]) - 1 > 0:
|
||||
lastDay = stock["PRICE"][len(stock["PRICE"]) - 1]["DATE"].replace("-", ".")
|
||||
|
||||
date_set = set()
|
||||
lastPage = False
|
||||
# 1페이지에서 1000페이지의 데이터만 가져오기
|
||||
for page in range(1, self.limit_page_count):
|
||||
# 최근 상장 기업의 마지막 반복되는 페이지를 제외시킨다.
|
||||
pg_url = '{url}&page={page}'.format(url=url, page=page)
|
||||
#html = pd.read_html(pg_url, header=0)
|
||||
html = None
|
||||
while True:
|
||||
try:
|
||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
break
|
||||
except:
|
||||
print(pg_url)
|
||||
if page > 200:
|
||||
break
|
||||
continue
|
||||
|
||||
for date in html[0].날짜.values:
|
||||
if type(date) is str:
|
||||
if date in date_set:
|
||||
lastPage = True
|
||||
break
|
||||
date_set.add(date)
|
||||
|
||||
if date == lastDay:
|
||||
lastPage = True
|
||||
df = df.append(html[0], ignore_index=True)
|
||||
break
|
||||
df = df.append(html[0], ignore_index=True)
|
||||
if lastPage:
|
||||
print("\t- lastpage:", page)
|
||||
break
|
||||
|
||||
"""
|
||||
if count == 10:
|
||||
df = df.append(html[0], ignore_index=True)
|
||||
if lastPage:
|
||||
break
|
||||
else:
|
||||
if lastPage == False:
|
||||
df = df.append(html[0], ignore_index=True)
|
||||
lastPage = True
|
||||
else:
|
||||
break
|
||||
"""
|
||||
|
||||
# df.dropna()를 이용해 결측값 있는 행 제거
|
||||
df = df.dropna()
|
||||
|
||||
# 상위 5개 데이터 확인하기
|
||||
###print (df.head())
|
||||
|
||||
# 한글로 된 컬럼명을 영어로 바꿔줌
|
||||
df = df.rename(columns={'날짜': 'date', '종가': 'close', '전일비': 'diff', '시가': 'open', '고가': 'high', '저가': 'low', '거래량': 'volume'})
|
||||
|
||||
# 데이터의 타입을 int형으로 바꿔줌
|
||||
df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[['close', 'diff', 'open', 'high', 'low', 'volume']].astype(int)
|
||||
|
||||
# 컬럼명 'date'의 타입을 date로 바꿔줌
|
||||
df['date'] = pd.to_datetime(df['date'])
|
||||
|
||||
# 일자(date)를 기준으로 오름차순 정렬
|
||||
# df = df.sort_values(by=['date'], ascending=True)
|
||||
|
||||
# 상위 5개 데이터 확인
|
||||
###print (df.head())
|
||||
|
||||
if len(stock) > 0 and len(stock["PRICE"]) - 1 > 0:
|
||||
lastDay = stock["PRICE"][len(stock["PRICE"]) - 1]["DATE"]
|
||||
for values in df.values:
|
||||
day = str(values[0]).split(' ')[0]
|
||||
if lastDay == day:
|
||||
break
|
||||
stock["PRICE"].append({
|
||||
"DATE": day,
|
||||
df.columns[1]: values[1],
|
||||
df.columns[2]: values[2],
|
||||
df.columns[3]: values[3],
|
||||
df.columns[4]: values[4],
|
||||
df.columns[5]: values[5],
|
||||
df.columns[6]: values[6],
|
||||
})
|
||||
|
||||
# stock["PRICE"] = sorted(stock["PRICE"], key=lambda x: x['DATE'], reverse=True)
|
||||
stock["PRICE"] = sorted(stock["PRICE"], key=lambda x: x['DATE'])
|
||||
return
|
||||
|
||||
def get_moving_avg(self, stock):
|
||||
q_3 = Queue(3)
|
||||
q_5 = Queue(5)
|
||||
q_7 = Queue(7)
|
||||
q_10 = Queue(10)
|
||||
q_20 = Queue(20)
|
||||
q_30 = Queue(30)
|
||||
q_60 = Queue(60)
|
||||
q_90 = Queue(90)
|
||||
q_100 = Queue(100)
|
||||
q_120 = Queue(120)
|
||||
q_150 = Queue(150)
|
||||
q_180 = Queue(180)
|
||||
q_200 = Queue(200)
|
||||
q_240 = Queue(240)
|
||||
|
||||
for i in range(len(stock['PRICE'])):
|
||||
q_3.enqueue(stock['PRICE'][i]['close'])
|
||||
q_5.enqueue(stock['PRICE'][i]['close'])
|
||||
q_7.enqueue(stock['PRICE'][i]['close'])
|
||||
q_10.enqueue(stock['PRICE'][i]['close'])
|
||||
q_20.enqueue(stock['PRICE'][i]['close'])
|
||||
q_30.enqueue(stock['PRICE'][i]['close'])
|
||||
q_60.enqueue(stock['PRICE'][i]['close'])
|
||||
q_90.enqueue(stock['PRICE'][i]['close'])
|
||||
q_100.enqueue(stock['PRICE'][i]['close'])
|
||||
q_120.enqueue(stock['PRICE'][i]['close'])
|
||||
q_150.enqueue(stock['PRICE'][i]['close'])
|
||||
q_180.enqueue(stock['PRICE'][i]['close'])
|
||||
q_200.enqueue(stock['PRICE'][i]['close'])
|
||||
q_240.enqueue(stock['PRICE'][i]['close'])
|
||||
|
||||
stock['PRICE'][i]['avg3'] = q_3.avg()
|
||||
stock['PRICE'][i]['avg5'] = q_5.avg()
|
||||
stock['PRICE'][i]['avg7'] = q_7.avg()
|
||||
stock['PRICE'][i]['avg10'] = q_10.avg()
|
||||
stock['PRICE'][i]['avg20'] = q_20.avg()
|
||||
stock['PRICE'][i]['avg30'] = q_30.avg()
|
||||
stock['PRICE'][i]['avg60'] = q_60.avg()
|
||||
stock['PRICE'][i]['avg90'] = q_90.avg()
|
||||
stock['PRICE'][i]['avg100'] = q_100.avg()
|
||||
stock['PRICE'][i]['avg120'] = q_120.avg()
|
||||
stock['PRICE'][i]['avg150'] = q_150.avg()
|
||||
stock['PRICE'][i]['avg180'] = q_180.avg()
|
||||
stock['PRICE'][i]['avg200'] = q_200.avg()
|
||||
stock['PRICE'][i]['avg240'] = q_240.avg()
|
||||
|
||||
return
|
||||
|
||||
def crawl_specific_stock(self, stock):
|
||||
# 데이터 수집
|
||||
self.get_data(stock)
|
||||
# 이동 평균 계산
|
||||
self.get_moving_avg(stock)
|
||||
return
|
||||
|
||||
def update(self, inFileName, outFileName):
|
||||
"""
|
||||
Full json 데이터를 db에 import 시킴
|
||||
inFileName = PROJECT_HOME + '/resources/stock.json.full'
|
||||
outFileName = PROJECT_HOME + '/resources/stock.db'
|
||||
crawler = StockCrawler()
|
||||
crawler.update(inFileName, outFileName)
|
||||
|
||||
:param inFileName:
|
||||
:param outFileName:
|
||||
:return:
|
||||
"""
|
||||
tableName = 'stock'
|
||||
conn = sqlite3.connect(outFileName, isolation_level=None)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text PRIMARY KEY, NAME text, PRICE text, MACD text, STOCHASTIC text, ICHIMOKU text, RSI text, BOLINGERBAND text)")
|
||||
|
||||
idx = 0
|
||||
inFp = open(inFileName, 'r')
|
||||
for line in inFp.readlines():
|
||||
if line:
|
||||
idx += 1
|
||||
stock = json.loads(line)
|
||||
print(idx, stock["CODE"], stock["NAME"])
|
||||
|
||||
text = json.dumps(stock["PRICE"], ensure_ascii=False)
|
||||
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],))
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, PRICE) VALUES(?, ?, ?)", (stock["CODE"], stock["NAME"], text))
|
||||
else:
|
||||
cursor.execute("UPDATE " + tableName + " SET PRICE=? WHERE CODE=?", (text, stock["CODE"]))
|
||||
|
||||
return
|
||||
|
||||
|
||||
def saveIndex(self, code, inFileName, outFileName):
|
||||
tableName = 'stock'
|
||||
conn = sqlite3.connect(outFileName)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text PRIMARY KEY, NAME text, PRICE text, MACD text, STOCHASTIC text, ICHIMOKU text, RSI text, BOLINGERBAND text)")
|
||||
|
||||
stock = {"NAME": code, "CODE": code, "PRICE": []}
|
||||
|
||||
lastDay = ""
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],))
|
||||
result = cursor.fetchone()
|
||||
if result is not None:
|
||||
stock["PRICE"] = json.loads(result[2])
|
||||
lastDay = stock["PRICE"][len(stock["PRICE"]) - 1]["DATE"]
|
||||
|
||||
with open(inFileName, "r", encoding="utf-8") as inFp:
|
||||
for line in inFp:
|
||||
line = line.strip()
|
||||
if line[0] == "#":
|
||||
continue
|
||||
|
||||
arr = line.split("\t")
|
||||
if arr[0] == lastDay:
|
||||
break
|
||||
|
||||
price = {"DATE": arr[0], "close": float(arr[1]), "diff": float(arr[6].replace("%", "")), "open": float(arr[2]), "high": float(arr[3]), "low": float(arr[4]), "volume": 0}
|
||||
price['avg3'] = 0
|
||||
price['avg5'] = 0
|
||||
price['avg7'] = 0
|
||||
price['avg10'] = 0
|
||||
price['avg20'] = 0
|
||||
price['avg30'] = 0
|
||||
price['avg60'] = 0
|
||||
price['avg90'] = 0
|
||||
price['avg100'] = 0
|
||||
price['avg120'] = 0
|
||||
price['avg150'] = 0
|
||||
price['avg180'] = 0
|
||||
price['avg200'] = 0
|
||||
price['avg240'] = 0
|
||||
stock["PRICE"].append(price)
|
||||
|
||||
stock["PRICE"] = sorted(stock["PRICE"], key=lambda x: x['DATE'])
|
||||
self.get_moving_avg(stock)
|
||||
|
||||
text = json.dumps(stock['PRICE'], ensure_ascii=False)
|
||||
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],))
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, PRICE, MACD, STOCHASTIC, ICHIMOKU, RSI) VALUES(?, ?, ?, ?, ?, ?, ?)", (stock["CODE"], stock["NAME"], text, "[{}]", "[{}]", "[{}]", "[{}]"))
|
||||
else:
|
||||
cursor.execute("UPDATE " + tableName + " SET PRICE=?, MACD=?, STOCHASTIC=?, ICHIMOKU=?, RSI=? WHERE CODE=?", (text, "[{}]", "[{}]", "[{}]", "[{}]", stock["CODE"]))
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return
|
||||
365
stockpredictor_back/crawler/toSQLite/inout.cvs
Normal file
365
stockpredictor_back/crawler/toSQLite/inout.cvs
Normal file
@@ -0,0 +1,365 @@
|
||||
364,1186,1339,46.97
|
||||
363,1086,1439,43.01
|
||||
362,805,1720,31.88
|
||||
361,1121,1404,44.40
|
||||
360,674,1851,26.69
|
||||
359,260,2265,10.30
|
||||
358,1616,909,64.00
|
||||
357,341,2184,13.50
|
||||
356,1179,1346,46.69
|
||||
355,1499,1026,59.37
|
||||
354,1801,724,71.33
|
||||
353,925,1600,36.63
|
||||
352,532,1993,21.07
|
||||
351,1248,1277,49.43
|
||||
350,1092,1433,43.25
|
||||
349,1292,1233,51.17
|
||||
348,1253,1272,49.62
|
||||
347,1070,1455,42.38
|
||||
346,1966,559,77.86
|
||||
345,1436,1089,56.87
|
||||
344,786,1739,31.13
|
||||
343,1365,1160,54.06
|
||||
342,1181,1344,46.77
|
||||
341,1147,1378,45.43
|
||||
340,1319,1206,52.24
|
||||
339,904,1621,35.80
|
||||
338,748,1777,29.62
|
||||
337,533,1992,21.11
|
||||
336,1024,1501,40.55
|
||||
335,458,2067,18.14
|
||||
334,395,2130,15.64
|
||||
333,923,1602,36.55
|
||||
332,548,1977,21.70
|
||||
331,1020,1505,40.40
|
||||
330,1736,789,68.75
|
||||
329,1300,1225,51.49
|
||||
328,1564,961,61.94
|
||||
327,999,1526,39.56
|
||||
326,1642,883,65.03
|
||||
325,880,1645,34.85
|
||||
324,1009,1516,39.96
|
||||
323,810,1715,32.08
|
||||
322,624,1901,24.71
|
||||
321,614,1911,24.32
|
||||
320,634,1891,25.11
|
||||
319,873,1652,34.57
|
||||
318,1320,1205,52.28
|
||||
317,1416,1109,56.08
|
||||
316,710,1815,28.12
|
||||
315,830,1695,32.87
|
||||
314,358,2167,14.18
|
||||
313,1379,1146,54.61
|
||||
312,1871,654,74.10
|
||||
311,1634,891,64.71
|
||||
310,420,2105,16.63
|
||||
309,1605,920,63.56
|
||||
308,1486,1039,58.85
|
||||
307,914,1611,36.20
|
||||
306,1647,878,65.23
|
||||
305,646,1879,25.58
|
||||
304,1526,999,60.44
|
||||
303,995,1530,39.41
|
||||
302,1137,1388,45.03
|
||||
301,1059,1466,41.94
|
||||
300,991,1534,39.25
|
||||
299,1205,1320,47.72
|
||||
298,734,1791,29.07
|
||||
297,1388,1137,54.97
|
||||
296,1026,1499,40.63
|
||||
295,1394,1131,55.21
|
||||
294,1021,1504,40.44
|
||||
293,1152,1373,45.62
|
||||
292,437,2088,17.31
|
||||
291,1154,1371,45.70
|
||||
290,1269,1256,50.26
|
||||
289,855,1670,33.86
|
||||
288,1153,1372,45.66
|
||||
287,1100,1425,43.56
|
||||
286,858,1667,33.98
|
||||
285,777,1748,30.77
|
||||
284,797,1728,31.56
|
||||
283,855,1670,33.86
|
||||
282,1343,1182,53.19
|
||||
281,1351,1174,53.50
|
||||
280,1314,1211,52.04
|
||||
279,735,1790,29.11
|
||||
278,904,1621,35.80
|
||||
277,1253,1272,49.62
|
||||
276,935,1590,37.03
|
||||
275,1048,1477,41.50
|
||||
274,916,1609,36.28
|
||||
273,359,2166,14.22
|
||||
272,892,1633,35.33
|
||||
271,1320,1205,52.28
|
||||
270,727,1798,28.79
|
||||
269,1702,823,67.41
|
||||
268,1612,913,63.84
|
||||
267,1182,1343,46.81
|
||||
266,1470,1055,58.22
|
||||
265,872,1653,34.53
|
||||
264,1270,1255,50.30
|
||||
263,687,1838,27.21
|
||||
262,435,2090,17.23
|
||||
261,1260,1265,49.90
|
||||
260,1375,1150,54.46
|
||||
259,1297,1228,51.37
|
||||
258,653,1872,25.86
|
||||
257,639,1886,25.31
|
||||
256,1721,804,68.16
|
||||
255,1295,1230,51.29
|
||||
254,1213,1312,48.04
|
||||
253,863,1662,34.18
|
||||
252,1360,1165,53.86
|
||||
251,656,1869,25.98
|
||||
250,682,1843,27.01
|
||||
249,880,1645,34.85
|
||||
248,278,2247,11.01
|
||||
247,1975,550,78.22
|
||||
246,1364,1161,54.02
|
||||
245,1142,1383,45.23
|
||||
244,774,1751,30.65
|
||||
243,1001,1524,39.64
|
||||
242,873,1652,34.57
|
||||
241,902,1623,35.72
|
||||
240,1323,1202,52.40
|
||||
239,1390,1135,55.05
|
||||
238,1062,1463,42.06
|
||||
237,1162,1363,46.02
|
||||
236,717,1808,28.40
|
||||
235,895,1630,35.45
|
||||
234,794,1731,31.45
|
||||
233,987,1538,39.09
|
||||
232,297,2228,11.76
|
||||
231,1538,987,60.91
|
||||
230,1138,1387,45.07
|
||||
229,760,1765,30.10
|
||||
228,1706,819,67.56
|
||||
227,1169,1356,46.30
|
||||
226,1274,1251,50.46
|
||||
225,617,1908,24.44
|
||||
224,794,1731,31.45
|
||||
223,544,1981,21.54
|
||||
222,1659,866,65.70
|
||||
221,1587,938,62.85
|
||||
220,1173,1352,46.46
|
||||
219,1278,1247,50.61
|
||||
218,1043,1482,41.31
|
||||
217,1141,1384,45.19
|
||||
216,1728,797,68.44
|
||||
215,906,1619,35.88
|
||||
214,563,1962,22.30
|
||||
213,1427,1098,56.51
|
||||
212,1271,1254,50.34
|
||||
211,1114,1411,44.12
|
||||
210,945,1580,37.43
|
||||
209,1358,1167,53.78
|
||||
208,1041,1484,41.23
|
||||
207,1150,1375,45.54
|
||||
206,1035,1490,40.99
|
||||
205,1195,1330,47.33
|
||||
204,901,1624,35.68
|
||||
203,1307,1218,51.76
|
||||
202,1222,1303,48.40
|
||||
201,1323,1202,52.40
|
||||
200,1222,1303,48.40
|
||||
199,1185,1340,46.93
|
||||
198,1174,1351,46.50
|
||||
197,956,1569,37.86
|
||||
196,1134,1391,44.91
|
||||
195,1109,1416,43.92
|
||||
194,1440,1085,57.03
|
||||
193,771,1754,30.53
|
||||
192,1094,1431,43.33
|
||||
191,1191,1334,47.17
|
||||
190,1189,1336,47.09
|
||||
189,947,1578,37.50
|
||||
188,536,1989,21.23
|
||||
187,903,1622,35.76
|
||||
186,814,1711,32.24
|
||||
185,927,1598,36.71
|
||||
184,1178,1347,46.65
|
||||
183,1632,893,64.63
|
||||
182,1394,1131,55.21
|
||||
181,1381,1144,54.69
|
||||
180,1147,1378,45.43
|
||||
179,591,1934,23.41
|
||||
178,1344,1181,53.23
|
||||
177,1191,1334,47.17
|
||||
176,695,1830,27.52
|
||||
175,1527,998,60.48
|
||||
174,1239,1286,49.07
|
||||
173,621,1904,24.59
|
||||
172,672,1853,26.61
|
||||
171,1581,944,62.61
|
||||
170,1317,1208,52.16
|
||||
169,1230,1295,48.71
|
||||
168,1352,1173,53.54
|
||||
167,1284,1241,50.85
|
||||
166,1311,1214,51.92
|
||||
165,1099,1426,43.52
|
||||
164,1217,1308,48.20
|
||||
163,1115,1410,44.16
|
||||
162,867,1658,34.34
|
||||
161,941,1584,37.27
|
||||
160,848,1677,33.58
|
||||
159,1320,1205,52.28
|
||||
158,1197,1328,47.41
|
||||
157,1023,1502,40.51
|
||||
156,1078,1447,42.69
|
||||
155,1345,1180,53.27
|
||||
154,1226,1299,48.55
|
||||
153,1171,1354,46.38
|
||||
152,835,1690,33.07
|
||||
151,1146,1379,45.39
|
||||
150,793,1732,31.41
|
||||
149,946,1579,37.47
|
||||
148,1240,1285,49.11
|
||||
147,1265,1260,50.10
|
||||
146,920,1605,36.44
|
||||
145,1200,1325,47.52
|
||||
144,997,1528,39.49
|
||||
143,987,1538,39.09
|
||||
142,1116,1409,44.20
|
||||
141,1218,1307,48.24
|
||||
140,868,1657,34.38
|
||||
139,475,2050,18.81
|
||||
138,999,1526,39.56
|
||||
137,1590,935,62.97
|
||||
136,1458,1067,57.74
|
||||
135,1001,1524,39.64
|
||||
134,1221,1304,48.36
|
||||
133,1020,1505,40.40
|
||||
132,809,1716,32.04
|
||||
131,1505,1020,59.60
|
||||
130,627,1898,24.83
|
||||
129,1258,1267,49.82
|
||||
128,1199,1326,47.49
|
||||
127,601,1924,23.80
|
||||
126,1261,1264,49.94
|
||||
125,827,1698,32.75
|
||||
124,1270,1255,50.30
|
||||
123,617,1908,24.44
|
||||
122,1074,1451,42.53
|
||||
121,893,1632,35.37
|
||||
120,1766,759,69.94
|
||||
119,1495,1030,59.21
|
||||
118,914,1611,36.20
|
||||
117,805,1720,31.88
|
||||
116,675,1850,26.73
|
||||
115,890,1635,35.25
|
||||
114,1036,1489,41.03
|
||||
113,514,2011,20.36
|
||||
112,394,2131,15.60
|
||||
111,1912,613,75.72
|
||||
110,389,2136,15.41
|
||||
109,320,2205,12.67
|
||||
108,1889,636,74.81
|
||||
107,1940,585,76.83
|
||||
106,1088,1437,43.09
|
||||
105,865,1660,34.26
|
||||
104,1662,863,65.82
|
||||
103,1132,1393,44.83
|
||||
102,1238,1287,49.03
|
||||
101,1597,928,63.25
|
||||
100,1036,1489,41.03
|
||||
99,1307,1218,51.76
|
||||
98,1046,1479,41.43
|
||||
97,1043,1482,41.31
|
||||
96,777,1748,30.77
|
||||
95,922,1603,36.51
|
||||
94,1359,1166,53.82
|
||||
93,663,1862,26.26
|
||||
92,1542,983,61.07
|
||||
91,976,1549,38.65
|
||||
90,707,1818,28.00
|
||||
89,1207,1318,47.80
|
||||
88,819,1706,32.44
|
||||
87,1026,1499,40.63
|
||||
86,1335,1190,52.87
|
||||
85,530,1995,20.99
|
||||
84,1599,926,63.33
|
||||
83,1408,1117,55.76
|
||||
82,533,1992,21.11
|
||||
81,691,1834,27.37
|
||||
80,343,2182,13.58
|
||||
79,2095,430,82.97
|
||||
78,1015,1510,40.20
|
||||
77,804,1721,31.84
|
||||
76,1967,558,77.90
|
||||
75,1895,630,75.05
|
||||
74,1399,1126,55.41
|
||||
73,1015,1510,40.20
|
||||
72,1193,1332,47.25
|
||||
71,870,1655,34.46
|
||||
70,731,1794,28.95
|
||||
69,1094,1431,43.33
|
||||
68,1401,1124,55.49
|
||||
67,1583,942,62.69
|
||||
66,772,1753,30.57
|
||||
65,802,1723,31.76
|
||||
64,617,1908,24.44
|
||||
63,1050,1475,41.58
|
||||
62,1643,882,65.07
|
||||
61,588,1937,23.29
|
||||
60,599,1926,23.72
|
||||
59,770,1755,30.50
|
||||
58,1411,1114,55.88
|
||||
57,997,1528,39.49
|
||||
56,494,2031,19.56
|
||||
55,1235,1290,48.91
|
||||
54,1922,603,76.12
|
||||
53,1483,1042,58.73
|
||||
52,967,1558,38.30
|
||||
51,691,1834,27.37
|
||||
50,938,1587,37.15
|
||||
49,1460,1065,57.82
|
||||
48,1067,1458,42.26
|
||||
47,1063,1462,42.10
|
||||
46,1212,1313,48.00
|
||||
45,794,1731,31.45
|
||||
44,494,2031,19.56
|
||||
43,1547,978,61.27
|
||||
42,278,2247,11.01
|
||||
41,2035,490,80.59
|
||||
40,2081,444,82.42
|
||||
39,1839,686,72.83
|
||||
38,1541,984,61.03
|
||||
37,1372,1153,54.34
|
||||
36,730,1795,28.91
|
||||
35,1631,894,64.59
|
||||
34,1186,1339,46.97
|
||||
33,980,1545,38.81
|
||||
32,1117,1408,44.24
|
||||
31,1078,1447,42.69
|
||||
30,910,1615,36.04
|
||||
29,1407,1118,55.72
|
||||
28,553,1972,21.90
|
||||
27,1243,1282,49.23
|
||||
26,1035,1490,40.99
|
||||
25,1106,1419,43.80
|
||||
24,1290,1235,51.09
|
||||
23,1135,1390,44.95
|
||||
22,1184,1341,46.89
|
||||
21,1553,972,61.50
|
||||
20,1294,1231,51.25
|
||||
19,1321,1204,52.32
|
||||
18,1314,1211,52.04
|
||||
17,787,1738,31.17
|
||||
16,1121,1404,44.40
|
||||
15,1690,835,66.93
|
||||
14,700,1825,27.72
|
||||
13,627,1898,24.83
|
||||
12,1754,771,69.47
|
||||
11,919,1606,36.40
|
||||
10,1051,1474,41.62
|
||||
9,557,1968,22.06
|
||||
8,594,1931,23.52
|
||||
7,1281,1244,50.73
|
||||
6,2005,520,79.41
|
||||
5,926,1599,36.67
|
||||
4,476,2049,18.85
|
||||
3,325,2200,12.87
|
||||
2,1516,1009,60.04
|
||||
1,309,2216,12.24
|
||||
0,2134,391,84.51
|
||||
Reference in New Issue
Block a user