This commit is contained in:
dosangyoon
2022-07-29 14:28:27 +09:00
parent b06a89eb2f
commit 794e2ea5f7
1082 changed files with 4441 additions and 446000 deletions

View File

@@ -0,0 +1,74 @@
import os
import shutil
import datetime
import time
from stockpredictor.crawler.sQLite.FnGuideCrawler import FnGuideCrawler
from stockpredictor.crawler.sQLite.MetaCrawler import MetaCrawler
from stockpredictor.crawler.sQLite.StockCrawler import StockCrawler
from stockpredictor.analysis.AnalyzerSqlite import AnalyzerSqlite
today = datetime.datetime.now().strftime("%Y-%m-%d")
# DB Browser for SQLite: http://hleecaster.com/python-sqlite3/
PROJECT_HOME = os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(__file__))))))))
START_DATE = "1900.01.01"
start = time.time()
stockFileName = PROJECT_HOME + '/resources/stock.db'
# 재무제표는 3개월마다 다운로드를 한다.
fnGuideCrawler = FnGuideCrawler(START_DATE)
print("[KOSPI 상장기업 재무제표 다운로드]")
fnGuideCrawler.crawl_fnguide(stockFileName)
metaCrawler = MetaCrawler(START_DATE)
print("\n[증시자금동향 (신용잔고, 펀드자금 잔고)]")
metaCrawler.crawl_money_trend(stockFileName)
print("\n[국내 시장금리]")
metaCrawler.crawl_interest_rates(stockFileName)
print("\n[투자자별 매매동향(Trading_Trend)]")
metaCrawler.crawl_trading_trend(stockFileName)
print("\n[환율 (USD, JPY, EUR, CNY)]")
metaCrawler.crawl_exchange(stockFileName)
print("\n[원유 (WTI), 국제금, COPPER, NATURALGAS, CORN, SOYBEAN]")
metaCrawler.crawl_meterials(stockFileName)
print("\n[종목 다운로드]")
stockCrawler = StockCrawler(START_DATE)
stockCrawler.crawl_etf_stocks(stockFileName)
stockCrawler.crawl_stocks(stockFileName)
#stockCrawler.crawl_special_stocks(stockFileName)
print("\n[종목 분석]")
# S: 분석까지 진행
inFileName = PROJECT_HOME + '/resources/stock.db'
analyzerSqlite = AnalyzerSqlite(PROJECT_HOME, stockFileName)
analyzerSqlite.analyzeDaily()
analyzerSqlite.analyzeGrouping("weekly")
analyzerSqlite.analyzeGrouping("monthly")
analyzerSqlite = AnalyzerSqlite(PROJECT_HOME, stockFileName)
print("\n[종목 결정]")
day = datetime.datetime.today().strftime("%Y%m%d")
outPath = PROJECT_HOME + "/resources/analysis/" + day
if os.path.isdir(outPath):
shutil.rmtree(outPath)
os.mkdir(outPath)
print("print to Html...")
analyzerSqlite.findCandidate(outPath)
# E: 분석까지 진행
print("time : %6.2f", (time.time() - start))
print ("done...")

View File

@@ -0,0 +1,223 @@
from bs4 import BeautifulSoup
from pandas import DataFrame, Series
import requests as re
import pandas as pd
import os
import json
import sqlite3
import requests
class FnGuideCrawler:
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
START_DATE = None
def __init__(self, START_DATE):
self.START_DATE = START_DATE
return
def getStockInfo(self):
code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
#code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header).text)
# 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)
# 우리가 필요한 것은 회사명과 종목코드이기 때문에 필요없는 column들은 제외해준다.
code_df = code_df[['회사명', '종목코드']]
# 한글로된 컬럼명을 영어로 바꿔준다.
code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'})
###print (code_df.head())
return code_df
# FnGuide에서 크롤링한 KOSPI 상장기업의 재무제표
# http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221294884955&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
def get_fnguide_table(self, code):
url = re.get('http://comp.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A%s'%(code.strip()))
url = url.content
html = BeautifulSoup(url,'html.parser')
body = html.find('body')
try:
fn_body = body.find('div', {'class': 'fng_body asp_body'})
ur_table = fn_body.find('div', {'id': 'div15'})
table = ur_table.find('div', {'id': 'highlight_D_Y'})
tbody = table.find('tbody')
tr = tbody.find_all('tr')
Table = DataFrame()
except:
return {}
for i in tr:
''' 자료 항목 가져오기'''
category = i.find('span', {'class': 'txt_acd'})
if category == None:
category = i.find('th')
category = category.text.strip()
'''값 가져오기'''
value_list = []
j = i.find_all('td', {'class': 'r'})
for value in j:
temp = value.text.replace(',', '').strip()
try:
temp = float(temp)
value_list.append(temp)
except:
value_list.append(0)
Table['%s' % (category)] = value_list
''' 기간 가져오기 '''
thead = table.find('thead')
tr_2 = thead.find('tr', {'class': 'td_gapcolor2'}).find_all('th')
year_list = []
for i in tr_2:
try:
temp_year = i.find('span', {'class': 'txt_acd'}).text
except:
temp_year = i.text
temp_year = temp_year.replace("/",".")+".01"
year_list.append(temp_year)
Table.index = year_list
return Table.T.to_dict()
def crawl_fnguide(self, inFileName):
tableName = 'fnguide'
conn = sqlite3.connect(inFileName, isolation_level=None)
cursor = conn.cursor()
# 테이블 생성
create_sql = "CREATE TABLE IF NOT EXISTS "+tableName+" (CODE text, NAME text, ymd text, "
create_sql += " type text, sales REAL, net_business_profits REAL, business_profits REAL, business_profits_release REAL, "
create_sql += " net_profit REAL, significant_shareholder_net_profit REAL, "
create_sql += " none_significant_shareholder_net_profit REAL, total_assets REAL, total_debt REAL, "
create_sql += " total_ownership_interest REAL, equity_holdings REAL, none_equity_holdings REAL, capital REAL, "
create_sql += " debt_ratio REAL, reserve_ratio REAL, business_profits_ratio REAL, "
create_sql += " significant_shareholder_profits_ratio REAL, ROA REAL, ROE REAL, EPS REAL, BPS REAL, DPS REAL, "
create_sql += " PER REAL, PBR REAL, share_outstanding REAL, dividend_rate REAL)"
cursor.execute(create_sql)
# 키 생성
create_key = "CREATE INDEX IF NOT EXISTS "+tableName+"_idx on "+tableName+" (CODE, ymd) "
cursor.execute(create_key)
code_df = self.getStockInfo()
idx = 0
for item in code_df.values:
item_name = item[0]
item_code = item[1]
idx += 1
print(idx, item_code, item_name, 'http://comp.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A%s'%(item_code.strip()))
fnGuideData = self.get_fnguide_table(item_code)
"""
매출액: sales
순영업수익: net_business_profits REAL,
영업이익: business_profits
영업이익(발표기준): business_profits_release
당기순이익: net_profit
지배주주순이익: significant_shareholder_net_profit
비지배주주순이익: none_significant_shareholder_net_profit
자산총계: total_assets
부채총계: total_debt
자본총계: total_ownership_interest
지배주주지분: equity_holdings
비지배주주지분: none_equity_holdings
자본금: capital
부채비율: debt_ratio
유보율: reserve_ratio
영업이익률: business_profits_ratio
지배주주순이익률: significant_shareholder_profits_ratio
ROA: ROA
ROE: ROE
EPS(원): EPS
BPS(원): BPS
DPS(원): DPS
PER: PER
PBR: PBR
발행주식수: share_outstanding
배당수익률: dividend_rate
"""
for key_ymd in fnGuideData:
ymd = key_ymd.replace('(P)', '').replace('(E)', '')
if key_ymd.find('P') > 0:
type = 'P'
elif key_ymd.find('E') > 0:
type = 'E'
else:
type = ''
if '매출액' in fnGuideData[key_ymd]:
sales = fnGuideData[key_ymd]['매출액']
else:
sales = 0
if '순영업수익' in fnGuideData[key_ymd]:
net_business_profits = fnGuideData[key_ymd]['순영업수익']
else:
net_business_profits = 0
business_profits = fnGuideData[key_ymd]['영업이익']
business_profits_release = fnGuideData[key_ymd]['영업이익(발표기준)']
net_profit = fnGuideData[key_ymd]['당기순이익']
significant_shareholder_net_profit = fnGuideData[key_ymd]['지배주주순이익']
none_significant_shareholder_net_profit = fnGuideData[key_ymd]['비지배주주순이익']
total_assets = fnGuideData[key_ymd]['자산총계']
total_debt = fnGuideData[key_ymd]['부채총계']
total_ownership_interest = fnGuideData[key_ymd]['자본총계']
equity_holdings = fnGuideData[key_ymd]['지배주주지분']
none_equity_holdings = fnGuideData[key_ymd]['비지배주주지분']
capital = fnGuideData[key_ymd]['자본금']
debt_ratio = fnGuideData[key_ymd]['부채비율']
reserve_ratio = fnGuideData[key_ymd]['유보율']
business_profits_ratio = fnGuideData[key_ymd]['영업이익률']
significant_shareholder_profits_ratio = fnGuideData[key_ymd]['지배주주순이익률']
ROA = fnGuideData[key_ymd]['ROA']
ROE = fnGuideData[key_ymd]['ROE']
if 'EPS(원)' in fnGuideData[key_ymd]:
EPS = fnGuideData[key_ymd]['EPS(원)']
else:
EPS = fnGuideData[key_ymd]['EPS']
if 'BPS(원)' in fnGuideData[key_ymd]:
BPS = fnGuideData[key_ymd]['BPS(원)']
else:
BPS = fnGuideData[key_ymd]['BPS']
if 'DPS(원)' in fnGuideData[key_ymd]:
DPS = fnGuideData[key_ymd]['DPS(원)']
else:
DPS = fnGuideData[key_ymd]['DPS']
PER = fnGuideData[key_ymd]['PER']
PBR = fnGuideData[key_ymd]['PBR']
share_outstanding = fnGuideData[key_ymd]['발행주식수']
dividend_rate = fnGuideData[key_ymd]['배당수익률']
cursor.execute('SELECT * FROM '+tableName+' WHERE CODE=? and ymd=?', (item_code, ymd))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO "+tableName+"(CODE, NAME, ymd, type, sales, net_business_profits, business_profits, business_profits_release, net_profit, significant_shareholder_net_profit, none_significant_shareholder_net_profit, total_assets, total_debt, total_ownership_interest, equity_holdings, none_equity_holdings, capital, debt_ratio, reserve_ratio, business_profits_ratio, significant_shareholder_profits_ratio, ROA, ROE, EPS, BPS, DPS, PER, PBR, share_outstanding, dividend_rate) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (item_code, item_name, ymd, type, sales, net_business_profits, business_profits, business_profits_release, net_profit, significant_shareholder_net_profit, none_significant_shareholder_net_profit, total_assets, total_debt, total_ownership_interest, equity_holdings, none_equity_holdings, capital, debt_ratio, reserve_ratio, business_profits_ratio, significant_shareholder_profits_ratio, ROA, ROE, EPS, BPS, DPS, PER, PBR, share_outstanding, dividend_rate))
else:
# cursor.execute("UPDATE "+tableName+" SET ymd=?, type=?, sales=?, net_business_profits=?, business_profits=?, business_profits_release=?, net_profit=?, significant_shareholder_net_profit=?, none_significant_shareholder_net_profit=?, total_assets=?, total_debt=?, total_ownership_interest=?, equity_holdings=?, none_equity_holdings=?, capital=?, debt_ratio=?, reserve_ratio=?, business_profits_ratio=?, significant_shareholder_profits_ratio=?, ROA=?, ROE=?, EPS=?, BPS=?, DPS=?, PER=?, PBR=?, share_outstanding=?, dividend_rate=? WHERE CODE=?", (ymd, type, sales, net_business_profits, business_profits, business_profits_release, net_profit, significant_shareholder_net_profit, none_significant_shareholder_net_profit, total_assets, total_debt, total_ownership_interest, equity_holdings, none_equity_holdings, capital, debt_ratio, reserve_ratio, business_profits_ratio, significant_shareholder_profits_ratio, ROA, ROE, EPS, BPS, DPS, PER, PBR, share_outstanding, dividend_rate, item_code))
break
cursor.close()
conn.close()
return
if __name__ == "__main__":
PROJECT_HOME = os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(__file__))))))))
inFnguideFileName = PROJECT_HOME + '/resources/stock.db'
crawler = FnGuideCrawler()
crawler.crawl_fnguide(inFnguideFileName)

View File

@@ -0,0 +1,460 @@
import os
import datetime
import requests
import sqlite3
from time import sleep
import pandas as pd
class MetaCrawler:
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
limit_page_count = 1000000
START_DATE = None
def __init__(self, START_DATE):
self.START_DATE = START_DATE
return
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221288761509
def crawl_exchange(self, inFileName):
tableName = 'meta_1'
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
# 테이블 생성
cursor.execute("CREATE TABLE IF NOT EXISTS "+tableName+" (CODE text, NAME text, ymd text, price REAL, diff REAL, cash_buy REAL, cash_sell REAL, transfer_buy REAL, transfer_sell REAL)")
# 키 생성
create_key = "CREATE INDEX IF NOT EXISTS " + tableName + "_idx on " + tableName + " (CODE, ymd) "
cursor.execute(create_key)
inputs = []
inputs.append( {'NAME':'USD', 'CODE':'FX_USDKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW'} ) # 미국 USD
inputs.append( {'NAME':'JPY', 'CODE':'FX_JPYKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_JPYKRW'} ) # 일본 JPY
inputs.append( {'NAME':'EUR', 'CODE':'FX_EURKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_EURKRW'} ) # 유럽연합 EUR'
inputs.append( {'NAME':'CNY', 'CODE':'FX_CNYKRW', 'URL':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_CNYKRW'} ) # 중국 CNY
for i in range(len(inputs)):
input = inputs[i]
NAME = input['NAME']
CODE = input['CODE']
cursor.execute('SELECT ymd FROM ' + tableName + ' WHERE CODE=? order by ymd desc', (CODE,))
result = cursor.fetchone()
if result == None:
lastDay = self.START_DATE
else:
lastDay = result[0]
finish = False
for i in range(1, self.limit_page_count):
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
html = None
while True:
try:
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=30).text)
sleep(0.5)
break
except:
print(input['URL'] + '&page=%s' % i)
if i>200:
break
continue
# 마지막 페이지 까지 받기
if len(html[0].날짜.values) <= 1:
break
for j in range(0, len(html[0].values)):
item = html[0].values[j]
if input['NAME'] in ('USD', 'JPY', 'EUR', 'CNY'):
if item[0] <= lastDay:
finish = True
break
ymd = item[0] # 날짜
price = item[1] # 매매기준율
diff = item[2] # 전일대비
cash_buy = item[3] # 현찰 사실 때
cash_sell = item[4] # 현찰 파실 때
transfer_buy = item[5] # 송금 사실 때
transfer_sell = item[6] # 송금 파실 때
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (CODE,ymd,))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, ymd, price, diff, cash_buy, cash_sell, transfer_buy, transfer_sell) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", (CODE, NAME, ymd, price, diff, cash_buy, cash_sell, transfer_buy, transfer_sell))
else:
# cursor.execute("UPDATE " + tableName + " SET price=?, diff=?, cash_buy=?, cash_sell=?, transfer_buy=?, transfer_sell=? WHERE CODE=? and ymd=?", (price, diff, cash_buy, cash_sell, transfer_buy, transfer_sell, CODE, ymd))
finish = True
break
print(CODE, NAME, ymd)
if finish:
break
conn.commit()
cursor.close()
conn.close()
return
# 투자자별 매매동향 (Trading_Trend) 크롤링
# (pri, 개인)
# (for, 외국인)
# (ins, 기관합)
# (ins0, 금융투자)
# (ins1, 보험)
# (ins2, 투신 (사모))
# (ins3, 은행)
# (ins4, 기타금융기관)
# (ins5, 연기금 등)
# (cor, 기타법인)
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221289696771&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
def crawl_trading_trend(self, inFileName):
tableName = 'meta_2'
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
# 테이블 생성
cursor.execute("CREATE TABLE IF NOT EXISTS "+tableName+" (ymd text PRIMARY KEY, pri integer, fori integer, ins integer, ins0 integer, ins1 integer, ins2 integer, ins3 integer, ins4 integer, ins5 integer, cor integer)")
# 키 생성
create_key = "CREATE INDEX IF NOT EXISTS "+tableName+"_idx on "+tableName+" (ymd) "
cursor.execute(create_key)
cursor.execute('SELECT ymd FROM ' + tableName + ' order by ymd desc')
result = cursor.fetchone()
if result == None:
lastDay = self.START_DATE
else:
lastDay = result[0]
today = datetime.datetime.now().strftime("%Y%m%d")
url = 'http://finance.naver.com/sise/investorDealTrendDay.nhn?bizdate='+today+'&sosok=&page='
previousDay = ""
finish = False
for i in range(1, self.limit_page_count):
#html = pd.read_html(url + str(i), header=0)
html = None
while True:
try:
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=30).text)
sleep(0.5)
break
except:
print(url + str(i))
if i > 200:
break
continue
# 마지막 페이지 까지 받기
if len(html[0].날짜.values) <= 2:
break
for j in range(0, len(html[0].values)):
item = html[0].values[j]
if str(item[0]) == "nan":
continue
if "20" + item[0] <= lastDay or item[0] == previousDay:
finish = True
break
ymd = "20"+item[0]
pri = item[1] # 개인
fori = item[2] # 외국인
ins = item[3] # 기관합
ins0 = item[4] # 금융투자
ins1 = item[5] # 보험
ins2 = item[6] # 투신 (사모)
ins3 = item[7] # 은행
ins4 = item[8] # 기타금융기관
ins5 = item[9] # 연기금 등
cor = item[10] # 기타법인
cursor.execute('SELECT * FROM ' + tableName + ' WHERE ymd=?', (ymd,))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(ymd, pri, fori, ins, ins0, ins1, ins2, ins3, ins4, ins5, cor) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (ymd, pri, fori, ins, ins0, ins1, ins2, ins3, ins4, ins5, cor))
else:
# cursor.execute("UPDATE " + tableName + " SET pri=?, fori=?, ins=?, ins0=?, ins1=?, ins2=?, ins3=?, ins4=?, ins5=?, cor=? WHERE ymd=?", (pri, fori, ins, ins0, ins1, ins2, ins3, ins4, ins5, cor, ymd))
finish = True
break
print ("20"+item[0])
previousDay = html[0].values[2][0]
if finish:
break
conn.commit()
cursor.close()
conn.close()
return
# 증시자금동향 (신용잔고, 펀드자금 잔고) 크롤링
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221290138187&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
def crawl_money_trend(self, inFileName):
tableName = 'meta_3'
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS "+tableName+" (ymd text, dep1_1 integer, dep1_2 integer, dep2_1 integer, dep2_2 integer, dep3_1 integer, dep3_2 integer, dep4_1 integer, dep4_2 integer, dep5_1 integer, dep5_2 integer)")
# 키 생성
create_key = "CREATE INDEX IF NOT EXISTS "+tableName+"_idx on "+tableName+" (ymd) "
cursor.execute(create_key)
cursor.execute('SELECT ymd FROM ' + tableName + ' order by ymd desc')
result = cursor.fetchone()
if result == None:
lastDay = self.START_DATE
else:
lastDay = result[0]
previousDay = ""
url = 'http://finance.naver.com/sise/sise_deposit.nhn?&page='
finish = False
for i in range(1, self.limit_page_count):
#html = pd.read_html(url + str(i), header=0, encoding='euc-kr')
html = None
while True:
try:
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=30).text, encoding='euc-kr')
sleep(0.5)
break
except:
print(url + str(i))
if i > 200:
break
continue
# 마지막 페이지 까지 받기
if len(html[0].날짜.values) <= 10:
break
for j in range(0, len(html[0].values)):
item = html[0].values[j]
if str(item[0]) == "nan":
continue
if "20"+item[0] <= lastDay or item[0] == previousDay:
finish = True
break
meta = {
"ymd": "20"+item[0],
"dep1_1": item[1], # 고객예탁금 누적
"dep1_2": item[2], # 고객예탁금 당일
"dep2_1": item[3], # 신용잔고 누적
"dep2_2": item[4], # 신용잔고 당일
"dep3_1": item[5], # 주식형펀드 누적
"dep3_2": item[6], # 주식형펀드 당일
"dep4_1": item[7], # 혼합형펀드 누적
"dep4_2": item[8], # 혼합형펀드 당일
"dep5_1": item[9], # 채권형펀드 누적
"dep5_2": item[10]} # 채권형펀드 당일
cursor.execute('SELECT * FROM ' + tableName + ' WHERE ymd=?', (meta["ymd"],))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(ymd, dep1_1, dep1_2, dep2_1, dep2_2, dep3_1, dep3_2, dep4_1, dep4_2, dep5_1, dep5_2) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (meta["ymd"], meta["dep1_1"], meta["dep1_2"], meta["dep2_1"], meta["dep2_2"], meta["dep3_1"], meta["dep3_2"], meta["dep4_1"], meta["dep4_2"], meta["dep5_1"], meta["dep5_2"]))
else:
# cursor.execute("UPDATE " + tableName + " SET dep1_1=?, dep1_2=?, dep2_1=?, dep2_2=?, dep3_1=?, dep3_2=?, dep4_1=?, dep4_2=?, dep5_1=?, dep5_2=? WHERE ymd=?", (meta["dep1_1"], meta["dep1_2"], meta["dep2_1"], meta["dep2_2"], meta["dep3_1"], meta["dep3_2"], meta["dep4_1"], meta["dep4_2"], meta["dep5_1"], meta["dep5_2"], meta["ymd"]))
finish = True
break
print("20"+item[0])
if finish:
break
previousDay = html[0].values[2][0]
conn.commit()
cursor.close()
conn.close()
return
# 국내 시장금리 크롤링
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221292348073&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
def crawl_interest_rates(self, inFileName):
tableName = 'meta_4'
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
# 테이블 생성
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text, NAME text, ymd text, close REAL, diff REAL, rate REAL)")
# 키 생성
create_key = "CREATE INDEX IF NOT EXISTS "+tableName+"_idx on "+tableName+" (CODE, ymd) "
cursor.execute(create_key)
inputs = []
inputs.append({'NAME': '91일 CD금리', 'CODE': 'IRR_CD91', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_CD91'})
inputs.append({'NAME': '콜금리', 'CODE': 'IRR_CALL', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_CALL'})
inputs.append({'NAME': '국고채(3년)', 'CODE': 'IRR_GOVT03Y', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_GOVT03Y'})
inputs.append({'NAME': '회사채(3년)', 'CODE': 'IRR_CORP03Y', 'URL': 'http://finance.naver.com/marketindex/interestDailyQuote.nhn?marketindexCd=IRR_CORP03Y'})
for i in range(len(inputs)):
input = inputs[i]
NAME = input['NAME']
CODE = input['CODE']
cursor.execute('SELECT ymd FROM ' + tableName + ' WHERE CODE=? order by ymd desc', (CODE,))
result = cursor.fetchone()
if result == None:
lastDay = self.START_DATE
else:
lastDay = result[0]
finish = False
for i in range(1, self.limit_page_count):
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
html = None
while True:
try:
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=30).text)
sleep(0.5)
break
except:
print (input['URL'] + '&page=%s' % i)
if i>200:
break
continue
ymd, close, diff, rate = "", 0.0, 0.0, 0.0
# 마지막 페이지 까지 받기
if len(html[0].날짜.values) <= 1:
break
for j in range(len(html[0].values)):
item = html[0].values[j]
if str(item[0]) == "nan":
continue
if item[0] <= lastDay:
finish = True
break
ymd = item[0]
close = item[1] # 종가
diff = item[2] # 전일대비
rate = item[3] # 등락율
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (CODE, ymd,))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, ymd, close, diff, rate) VALUES(?, ?, ?, ?, ?, ?)", (CODE, NAME, ymd, close, diff, rate))
else:
# cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, rate=? WHERE CODE=? and ymd=?", (close, diff, rate, CODE, ymd))
finish = True
break
if finish:
break
print(NAME + " / " + ymd)
conn.commit()
cursor.close()
conn.close()
return
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221288761509
def crawl_meterials(self, inFileName):
tableName = 'meta_5'
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
# 테이블 생성
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text, NAME text, ymd text, close REAL, diff REAL, rate REAL)")
# 키 생성
create_key = "CREATE INDEX IF NOT EXISTS " + tableName + "_idx on " + tableName + " (CODE, NAME, ymd) "
cursor.execute(create_key)
inputs = []
inputs.append( {'NAME':'WTI', 'CODE':'OIL_CL', 'URL':'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=OIL_CL&fdtc=2'} ) # WTI
inputs.append( {'NAME':'GOLD', 'CODE':'CMDT_GC', 'URL':'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_GC&fdtc=2'} ) # 국제 금
inputs.append({'NAME': 'COPPER', 'CODE': 'CMDT_CDY','URL': 'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_CDY&fdtc=2'}) # 구리
inputs.append({'NAME': 'NATURALGAS', 'CODE': 'CMDT_NG','URL': 'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_NG&fdtc=2'}) # 천연가스
inputs.append({'NAME': 'CORN', 'CODE': 'CMDT_C','URL': 'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_C&fdtc=2'}) # 국제 옥수수
inputs.append({'NAME': 'SOYBEAN', 'CODE': 'CMDT_S','URL': 'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_S&fdtc=2'}) # 국제 대두
for i in range(len(inputs)):
input = inputs[i]
NAME = input['NAME']
CODE = input['CODE']
cursor.execute('SELECT ymd FROM ' + tableName + ' WHERE CODE=? order by ymd desc', (CODE,))
result = cursor.fetchone()
if result == None:
lastDay = self.START_DATE
else:
lastDay = result[0]
finish = False
for i in range(1, self.limit_page_count):
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
html = None
while True:
try:
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=30).text)
sleep(0.5)
break
except:
print(input['URL'] + '&page=%s' % i)
if i>200:
break
continue
# 마지막 페이지 까지 받기
if len(html[0].날짜.values) <= 1:
break
for j in range(0, len(html[0].values)):
item = html[0].values[j]
if item[0] <= lastDay:
finish = True
break
ymd = item[0] # 날짜
close = item[1] # 종가
diff = item[2] # 전일대비
rate = item[3] # 등락율
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (CODE,ymd,))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, ymd, close, diff, rate) VALUES(?, ?, ?, ?, ?, ?)", (CODE, NAME, ymd, close, diff, rate))
else:
# cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, rate=? WHERE CODE=? and ymd=?", (close, diff, rate, CODE, ymd))
finish = True
break
print(CODE, NAME, ymd)
if finish:
break
conn.commit()
cursor.close()
conn.close()
return
if __name__ == "__main__":
PROJECT_HOME = os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(__file__))))))))
metaCrawler = MetaCrawler()
print("\n[환율 (USD, JPY, EUR, CNY)]")
inFileName = PROJECT_HOME + '/resources/stock.db'
metaCrawler.crawl_exchange(inFileName)
print("\n[투자자별 매매동향(Trading_Trend)]")
inFileName = PROJECT_HOME + '/resources/stock.db'
metaCrawler.crawl_trading_trend(inFileName)
print("\n[증시자금동향 (신용잔고, 펀드자금 잔고)]")
inFileName = PROJECT_HOME + '/resources/stock.db'
metaCrawler.crawl_money_trend(inFileName)
print("\n[국내 시장금리]")
inFileName = PROJECT_HOME + '/resources/stock.db'
metaCrawler.crawl_interest_rates(inFileName)
print("\n[원유 (WTI), 국제금, COPPER, NATURALGAS, CORN, SOYBEAN]")
inFileName = PROJECT_HOME + '/resources/stock.db'
metaCrawler.crawl_meterials(inFileName)

View File

@@ -0,0 +1,33 @@
class MovingAverage(object):
def __init__(self, max):
self.queue = []
self.max = max
def dequeue(self):
length = len(self.queue)
if length == 0 or length < self.max:
return -1
return self.queue.pop(0)
def enqueue(self, n):
length = len(self.queue)
if length == self.max:
self.dequeue()
self.queue.append(n)
pass
def sum(self):
sum = 0
for item in self.queue:
sum += item
return sum
def avg(self):
length = len(self.queue)
total = self.sum()
return round(total / length, 2)
def print(self):
print(self.sum(), self.queue)

View File

@@ -0,0 +1,551 @@
# https://bigdata-sk.tistory.com/10
from datetime import datetime, timedelta
import os
import pandas as pd
import re
import json
import sqlite3
import requests
from time import sleep
import time
from pandas_datareader import data as pdr
# 닐짜 형식으로 바뀐 this_date값을 확인 가능
# 읽어온 날짜 정보를 date형식으로 바꿀 일이 계속 생기므로 이 기능을 함수로 정의해줌.
# 함수명은 date_format()
class StockCrawler:
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
historical_prices = None
special_pattern = None
fnGuideCrawler = None
limit_page_count = 1000000
START_DATE = None
def __init__(self, START_DATE):
self.historical_prices = dict()
self.special_pattern = (
'[', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', ',', '.', '?', '"', ':', ';', '{', '}', '|', '<', '>',
']', '+', '-', '/', '=', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
self.START_DATE = START_DATE
return
def clean_str(self, string):
string = re.sub(r"\\", " ", string)
string = re.sub(r"\'", " ", string)
string = re.sub(r"\"", " ", string)
string = re.sub(r"`", " ", string)
string = re.sub(r"-", " ", string)
string = re.sub(r"\(.*?\)", " ", string)
string = re.sub(r" ", " ", string)
return string.strip().lower()
def getStockInfo(self):
#code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header, timeout=30).text)[0]
# code_df = pd.read_excel('../resources/stock/상장법인목록.xls')
# 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)
# 우리가 필요한 것은 회사명과 종목코드이기 때문에 필요없는 column들은 제외해준다.
code_df = code_df[['회사명', '종목코드']]
# 한글로된 컬럼명을 영어로 바꿔준다.
code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'})
###print (code_df.head())
return code_df
# 종목 이름을 입력하면 종목에 해당하는 코드를 불러와
# 네이버 금융(http://finance.naver.com)에 넣어줌
def get_url(self, item_name, code_df):
code = code_df.query("name=='{}'".format(item_name))['code'].to_string(index=False).strip()
url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code.strip())
return code, url
def date_format(slef, d):
d = str(d).replace('-', '.')
#yyyy = int(d.split('.')[0])
#mm = int(d.split('.')[1])
#dd = int(d.split('.')[2])
#this_date = dt.date(yyyy, mm, dd)
return d
def getCodeIndex(self, stocks, item_code):
for i, stock in enumerate(stocks):
if item_code == stock['CODE']:
return i
return -1
def crawl_etf_stocks(self, inFileName):
tableName = 'stock'
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
# 테이블 생성
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text, NAME text, ymd text, close REAL, diff REAL, open REAL, high REAL, low REAL, volume REAL)")
# 키 생성
create_key = "CREATE INDEX IF NOT EXISTS " + tableName + "_idx on " + tableName + " (CODE, ymd) "
cursor.execute(create_key)
stocks = []
stocks.append({"NAME": 'KODEX 코스닥150선물인버스', "CODE": "251340"})
stocks.append({"NAME": 'KODEX 코스닥150 레버리지', "CODE": "233740"})
stocks.append({"NAME": 'KODEX 200선물인버스2X', "CODE": "252670"})
stocks.append({"NAME": 'KODEX 레버리지', "CODE": "122630"})
stocks.append({"NAME": 'KODEX 인버스', "CODE": "114800"})
stocks.append({"NAME": 'KODEX 중국본토CSI300', "CODE": "283580"})
stocks.append({"NAME": 'KODEX 심천ChiNext(합성)', "CODE": "256750"})
stocks.append({"NAME": 'KINDEX 블룸버그베트남VN30선물레버리지(H)', "CODE": "371130"})
stocks.append({"NAME": 'KODEX 미국S&P바이오(합성)', "CODE": "185680"})
stocks.append({"NAME": 'KODEX 미국S&P에너지(합성)', "CODE": "218420"})
stocks.append({"NAME": 'KODEX 골드선물(H)', "CODE": "132030"})
stocks.append({"NAME": 'KODEX 콩선물(H)', "CODE": "138920"})
stocks.append({"NAME": 'KODEX 3대농산물선물(H)', "CODE": "271060"})
stocks.append({"NAME": 'KODEX 건설', "CODE": "117700"})
stocks.append({"NAME": 'KODEX 헬스케어', "CODE": "266420"})
stocks.append({"NAME": 'KODEX 글로벌4차산업로보틱스(합성)', "CODE": "276990"})
stocks.append({"NAME": 'KODEX 바이오', "CODE": "244580"})
stocks.append({"NAME": 'KODEX 반도체', "CODE": "091160"})
stocks.append({"NAME": 'KODEX 보험', "CODE": "140700"})
stocks.append({"NAME": 'KODEX 필수소비재', "CODE": "266410"})
stocks.append({"NAME": 'KODEX 2차전지산업', "CODE": "305720"})
stocks.append({"NAME": 'KODEX 경기소비재', "CODE": "266390"})
stocks.append({"NAME": 'KODEX 철강', "CODE": "117680"})
stocks.append({"NAME": 'KODEX 에너지화학', "CODE": "117460"})
stocks.append({"NAME": 'KODEX 은행', "CODE": "091170"})
stocks.append({"NAME": 'TIGER 탄소효율그린뉴딜', "CODE": "376410"})
start_time = time.time()
for i, stock in enumerate(stocks):
print (i, stock["NAME"], stock["CODE"], (time.time()-start_time), "s")
start_time = time.time()
cursor.execute('SELECT ymd FROM ' + tableName + ' WHERE CODE=? order by ymd desc', (stock["CODE"],))
result = cursor.fetchone()
ymd = self.START_DATE
if result is not None:
ymd = result[0]
stock_data = self.crawl_specific_stock(stock["CODE"], ymd)
for item in stock_data:
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (stock["CODE"],item['ymd'],))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, ymd, close, diff, open, high, low, volume) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", (stock["CODE"], stock["NAME"], item['ymd'], item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume']))
#else:
# cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, open=?, high=?, low=?, volume=? WHERE CODE=? and ymd=?", (item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'], stock["CODE"], item['ymd']))
sleep(0.05)
conn.commit()
cursor.close()
conn.close()
return
def crawl_stocks(self, inFileName):
tableName = 'stock'
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
# 테이블 생성
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text, NAME text, ymd text, close REAL, diff REAL, open REAL, high REAL, low REAL, volume REAL)")
# 키 생성
create_key = "CREATE INDEX IF NOT EXISTS " + tableName + "_idx on " + tableName + " (CODE, ymd) "
cursor.execute(create_key)
conn.commit()
cursor.close()
conn.close()
code_df = self.getStockInfo()
items = code_df.values
start_time = time.time()
idx = 0
for item in items:
idx += 1
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
item_name = item[0]
item_code = item[1]
cursor.execute('SELECT ymd FROM ' + tableName + ' WHERE CODE=? order by ymd desc', (item_code,))
result = cursor.fetchone()
stock = {"CODE": item_code, "NAME": item_name}
#ymd = (datetime.today() - timedelta(days=300)).strftime('%Y-%m-%d')
ymd = self.START_DATE.replace(".", "-")
if result is not None:
ymd = result[0]
stock_data = self.crawl_specific_stock(item_code, ymd)
for item in stock_data:
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (stock["CODE"],item['ymd'],))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, ymd, close, diff, open, high, low, volume) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", (stock["CODE"], stock["NAME"], item['ymd'], item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume']))
#else:
# cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, open=?, high=?, low=?, volume=? WHERE CODE=? and ymd=?", (item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'], stock["CODE"], item['ymd']))
conn.commit()
cursor.close()
conn.close()
print(idx, item_name, item_code, (time.time() - start_time), "s")
start_time = time.time()
sleep(0.05)
return
def crawl_special_stocks(self, inFileName):
tableName = 'stock'
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
# 테이블 생성
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text, NAME text, ymd text, close REAL, diff REAL, open REAL, high REAL, low REAL, volume REAL)")
# 키 생성
create_key = "CREATE INDEX IF NOT EXISTS " + tableName + "_idx on " + tableName + " (CODE, ymd) "
cursor.execute(create_key)
conn.commit()
cursor.close()
conn.close()
start_time = time.time()
pd.options.display.float_format = '{:.4f}'.format
pd.set_option('display.max_columns', None)
special_stocks = {
'^KS11': 'Kospi',
'^KQ11': 'Kosdak',
'SQQQ': 'ProShares UltraPro Short QQQ',
'TQQQ': 'ProShares UltraPro QQQ',
'SCO': 'ProShares UltraShort Bloomberg Crude Oil',
'UCO': 'ProShares Ultra Bloomberg Crude Oil',
'GLL': 'ProShares UltraShort Gold',
'UGL': 'ProShares Ultra Gold',
'SOXS': 'Direxion Daily Semiconductor Bear -3X Shares',
'SOXL': 'Direxion Daily Semiconductor Bull 3X Shares',
'FNGD': 'MicroSectors™ FANG+™ Index -3X Inverse Leveraged ETN',
'FNGU': 'MicroSectors™ FANG+™ Index 3X Leveraged ETN',
'AAPL': 'Apple',
'MSFT': 'Microsoft',
'GOOG': 'Alphabet C',
'AMZN': 'Amazon.com',
'AVGO': 'Broadcom',
'FB': 'Meta Platforms',
'NVDA': 'NVIDIA',
'UNH': 'UnitedHealth',
'TSM': 'Taiwan Semiconductor',
'JNJ': 'Johnson & Johnson (JNJ)',
'TCTZF': 'Tencent Holdings',
'V': 'Visa A',
'WMT': 'Walmart',
'XOM': 'Exxon Mobil',
'JPM': 'JPMorgan',
'MA': 'Mastercard',
'CVX': 'Chevron Corp',
'HD': 'Home Depot',
'BAC': 'Bank of America',
'KO': 'Coca-Cola',
'COST': 'Costco',
'DIS': 'Walt Disney',
'VZ': 'Verizon',
'CSCO': 'Cisco',
'ORCL': 'Oracle',
'NKE': 'Nike',
'ACN': 'Accenture',
'ADBE': 'Adobe',
'CRM': 'Salesforce.com',
'INTC': 'Intel',
'QCOM': 'Qualcomm',
'AMD': 'AMD',
'MS': 'Morgan Stanley',
'T': 'AT&T',
'HON': 'Honeywell',
'IBM': 'IBM',
'DQ': 'Daqo New Energy Corp ADR',
'EBAY': 'eBay Inc',
'NTAP': 'NetApp Inc',
'ASML': 'ASML Holding NV ADR',
'BABA': 'Alibaba Group Holdings Ltd ADR'}
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
us_sotck_data = {}
for ticker in special_stocks:
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (ticker,))
result = cursor.fetchone()
if result == None:
#start_day = pd.to_datetime('2017-01-01')
start_day = pd.to_datetime(self.START_DATE.replace(".", "-"))
end_day = pd.to_datetime(datetime.today().strftime('%Y-%m-%d'))
else:
#start = (datetime.today() - timedelta(days=300)).strftime('%Y-%m-%d')
start_day = pd.to_datetime(self.START_DATE.replace(".", "-"))
end_day = pd.to_datetime(datetime.today().strftime('%Y-%m-%d'))
data = None
while True:
try:
data = pdr.get_data_yahoo(ticker, start_day, end_day)
break
except:
print(ticker)
continue
us_sotck_data[ticker] = {
'close': data['Close'].to_dict(),
'open': data['Open'].to_dict(),
'high': data['High'].to_dict(),
'low': data['Low'].to_dict(),
'volume': data['Volume'].to_dict()
}
cursor.close()
conn.close()
dateList = list(us_sotck_data['SQQQ']['close'])
for idx, item_code in enumerate(us_sotck_data):
stock_data = []
stock = us_sotck_data[item_code]
for i, ymd in enumerate(dateList):
if i > 0:
if ymd in stock['close'] and dateList[i-1] in stock['close']:
diff = stock['close'][ymd] - stock['close'][dateList[i-1]]
else:
continue
else:
if ymd in stock['close']:
diff = stock['close'][ymd]
else:
continue
stock_data.append({
'CODE':item_code, 'NAME':us_stocks[item_code], 'ymd': ymd.strftime('%Y.%m.%d'),
'close': round(stock['close'][ymd], 2), 'diff': round(diff, 2), 'open': round(stock['open'][ymd], 2),
'high': round(stock['high'][ymd], 2), 'low': round(stock['low'][ymd], 2), 'volume': stock['volume'][ymd]
})
stock_data = reversed(stock_data)
conn = sqlite3.connect(inFileName)
cursor = conn.cursor()
for item in stock_data:
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (item["CODE"],item['ymd'],))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, ymd, close, diff, open, high, low, volume) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", (item["CODE"], item["NAME"], item['ymd'], item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume']))
#else:
# cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, open=?, high=?, low=?, volume=? WHERE CODE=? and ymd=?", (item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'], stock["CODE"], item['ymd']))
conn.commit()
cursor.close()
conn.close()
print(idx, item_code, us_stocks[item_code], (time.time() - start_time), "s")
start_time = time.time()
sleep(0.05)
return
def get_data(self, code, lastDay):
url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code.strip())
stock = []
# 일자 데이터를 담을 df라는 DataFrame 정의
df = pd.DataFrame()
date_set = set()
lastPage = False
# 1페이지에서 1000페이지의 데이터만 가져오기
for page in range(1, self.limit_page_count):
# 최근 상장 기업의 마지막 반복되는 페이지를 제외시킨다.
pg_url = '{url}&page={page}'.format(url=url, page=page)
#html = pd.read_html(pg_url, header=0)
html = None
while True:
try:
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=30).text)
sleep(0.5)
break
except:
print(pg_url)
if page > 200:
break
continue
for date in html[0].날짜.values:
if type(date) is str:
if date in date_set:
lastPage = True
break
date_set.add(date)
if date == lastDay:
lastPage = True
df = df.append(html[0], ignore_index=True)
break
df = df.append(html[0], ignore_index=True)
df = df.dropna()
if (lastPage) or (len(df) < 1) or ("날짜" not in df) or (df.날짜[1]==''):
print("\t- lastpage:", page)
break
# df.dropna()를 이용해 결측값 있는 행 제거
df = df.dropna()
# 상위 5개 데이터 확인하기
###print (df.head())
# 한글로 된 컬럼명을 영어로 바꿔줌
df = df.rename(columns={'날짜': 'date', '종가': 'close', '전일비': 'diff', '시가': 'open', '고가': 'high', '저가': 'low', '거래량': 'volume'})
# 데이터의 타입을 int형으로 바꿔줌
df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[['close', 'diff', 'open', 'high', 'low', 'volume']].astype(int)
for values in df.values:
day = str(values[0]).split(' ')[0]
if lastDay == day:
break
stock.append({
"ymd": day,
df.columns[1]: values[1],
df.columns[2]: values[2],
df.columns[3]: values[3],
df.columns[4]: values[4],
df.columns[5]: values[5],
df.columns[6]: values[6],
})
# stock = sorted(stock, key=lambda x: x['ymd'], reverse=True)
stock = sorted(stock, key=lambda x: x['ymd'])
return stock
def crawl_specific_stock(self, code, ymd):
# 데이터 수집
stock = self.get_data(code, ymd)
# 이동 평균 계산
#self.get_moving_avg(stock)
return stock
def update(self, inFileName, outFileName):
"""
Full json 데이터를 db에 import 시킴
inFileName = PROJECT_HOME + '/resources/stock.json.full'
outFileName = PROJECT_HOME + '/resources/stock.db'
crawler = StockCrawler()
crawler.update(inFileName, outFileName)
:param inFileName:
:param outFileName:
:return:
"""
tableName = 'stock'
conn = sqlite3.connect(outFileName, isolation_level=None)
cursor = conn.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text PRIMARY KEY, NAME text, PRICE text, MACD text, STOCHASTIC text, ICHIMOKU text, RSI text, BOLINGERBAND text)")
idx = 0
inFp = open(inFileName, 'r')
for line in inFp.readlines():
if line:
idx += 1
stock = json.loads(line)
print(idx, stock["CODE"], stock["NAME"])
text = json.dumps(stock["PRICE"], ensure_ascii=False)
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, PRICE) VALUES(?, ?, ?)", (stock["CODE"], stock["NAME"], text))
else:
cursor.execute("UPDATE " + tableName + " SET PRICE=? WHERE CODE=?", (text, stock["CODE"]))
return
def saveIndex(self, code, inFileName, outFileName):
tableName = 'stock'
conn = sqlite3.connect(outFileName)
cursor = conn.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text PRIMARY KEY, NAME text, PRICE text, MACD text, STOCHASTIC text, ICHIMOKU text, RSI text, BOLINGERBAND text)")
stock = {"NAME": code, "CODE": code}
lastDay = ""
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],))
result = cursor.fetchone()
if result is not None:
stock["PRICE"] = json.loads(result[2])
lastDay = stock["PRICE"][len(stock["PRICE"]) - 1]["DATE"]
with open(inFileName, "r", encoding="utf-8") as inFp:
for line in inFp:
line = line.strip()
if line[0] == "#":
continue
arr = line.split("\t")
if arr[0] == lastDay:
break
price = {"DATE": arr[0], "close": float(arr[1]), "diff": float(arr[6].replace("%", "")), "open": float(arr[2]), "high": float(arr[3]), "low": float(arr[4]), "volume": 0}
price['avg3'] = 0
price['avg5'] = 0
price['avg7'] = 0
price['avg10'] = 0
price['avg20'] = 0
price['avg30'] = 0
price['avg60'] = 0
price['avg90'] = 0
price['avg100'] = 0
price['avg120'] = 0
price['avg150'] = 0
price['avg180'] = 0
price['avg200'] = 0
price['avg240'] = 0
stock["PRICE"].append(price)
stock["PRICE"] = sorted(stock["PRICE"], key=lambda x: x['DATE'])
# self.get_moving_avg(stock)
text = json.dumps(stock['PRICE'], ensure_ascii=False)
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, PRICE, MACD, STOCHASTIC, ICHIMOKU, RSI) VALUES(?, ?, ?, ?, ?, ?, ?)", (stock["CODE"], stock["NAME"], text, "[{}]", "[{}]", "[{}]", "[{}]"))
else:
cursor.execute("UPDATE " + tableName + " SET PRICE=?, MACD=?, STOCHASTIC=?, ICHIMOKU=?, RSI=? WHERE CODE=?", (text, "[{}]", "[{}]", "[{}]", "[{}]", stock["CODE"]))
conn.commit()
cursor.close()
conn.close()
return
if __name__ == "__main__":
stockCrawler = StockCrawler()
PROJECT_HOME = os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(__file__))))))))
stockFileName = PROJECT_HOME + '/resources/stock.db'
stockCrawler.crawl_us_sotck_datas(stockFileName)