# https://bigdata-sk.tistory.com/10 from datetime import datetime, timedelta import os import pandas as pd import re import json import sqlite3 import requests from time import sleep import time from pandas_datareader import data as pdr import yfinance as yfin # 닐짜 형식으로 바뀐 this_date값을 확인 가능 # 읽어온 날짜 정보를 date형식으로 바꿀 일이 계속 생기므로 이 기능을 함수로 정의해줌. # 함수명은 date_format() class StockCrawler: header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'} historical_prices = None special_pattern = None fnGuideCrawler = None limit_page_count = 1000000 START_DATE = None def __init__(self, START_DATE): self.historical_prices = dict() self.special_pattern = ( '[', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', ',', '.', '?', '"', ':', ';', '{', '}', '|', '<', '>', ']', '+', '-', '/', '=', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9') self.START_DATE = START_DATE """ start_day = (datetime.today() - timedelta(weeks=2)).strftime('%Y-%m-%d') end_day = datetime.today().strftime('%Y-%m-%d') yfin.pdr_override() data = pdr.get_data_yahoo("311690.KQ", start_day, end_day, auto_adjust=True, progress=False) print (data) """ return def clean_str(self, string): string = re.sub(r"\\", " ", string) string = re.sub(r"\'", " ", string) string = re.sub(r"\"", " ", string) string = re.sub(r"`", " ", string) string = re.sub(r"-", " ", string) string = re.sub(r"\(.*?\)", " ", string) string = re.sub(r" ", " ", string) return string.strip().lower() def getStockInfo(self): #code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0] code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header, timeout=30).text)[0] # code_df = pd.read_excel('../resources/stock/상장법인목록.xls') # 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌 code_df.종목코드 = code_df.종목코드.map('{:06d}'.format) # 우리가 필요한 것은 회사명과 종목코드이기 때문에 필요없는 column들은 제외해준다. code_df = code_df[['회사명', '종목코드']] # 한글로된 컬럼명을 영어로 바꿔준다. code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'}) ###print (code_df.head()) return code_df # 종목 이름을 입력하면 종목에 해당하는 코드를 불러와 # 네이버 금융(http://finance.naver.com)에 넣어줌 def get_url(self, item_name, code_df): code = code_df.query("name=='{}'".format(item_name))['code'].to_string(index=False).strip() url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code.strip()) return code, url def date_format(slef, d): d = str(d).replace('-', '.') #yyyy = int(d.split('.')[0]) #mm = int(d.split('.')[1]) #dd = int(d.split('.')[2]) #this_date = dt.date(yyyy, mm, dd) return d def getCodeIndex(self, stocks, item_code): for i, stock in enumerate(stocks): if item_code == stock['CODE']: return i return -1 def crawl_etf_stocks(self, inFileName): tableName = 'stock' conn = sqlite3.connect(inFileName) cursor = conn.cursor() # 테이블 생성 cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text, NAME text, ymd text, close REAL, diff REAL, open REAL, high REAL, low REAL, volume REAL)") # 키 생성 create_key = "CREATE INDEX IF NOT EXISTS " + tableName + "_idx on " + tableName + " (CODE, ymd) " cursor.execute(create_key) stocks = [] stocks.append({"NAME": 'KODEX 코스닥150선물인버스', "CODE": "251340"}) stocks.append({"NAME": 'KODEX 코스닥150 레버리지', "CODE": "233740"}) stocks.append({"NAME": 'KODEX 200선물인버스2X', "CODE": "252670"}) stocks.append({"NAME": 'KODEX 레버리지', "CODE": "122630"}) stocks.append({"NAME": 'KODEX 인버스', "CODE": "114800"}) stocks.append({"NAME": 'KODEX 중국본토CSI300', "CODE": "283580"}) stocks.append({"NAME": 'KODEX 심천ChiNext(합성)', "CODE": "256750"}) stocks.append({"NAME": 'KINDEX 블룸버그베트남VN30선물레버리지(H)', "CODE": "371130"}) stocks.append({"NAME": 'KODEX 미국S&P바이오(합성)', "CODE": "185680"}) stocks.append({"NAME": 'KODEX 미국S&P에너지(합성)', "CODE": "218420"}) stocks.append({"NAME": 'KODEX 골드선물(H)', "CODE": "132030"}) stocks.append({"NAME": 'KODEX 콩선물(H)', "CODE": "138920"}) stocks.append({"NAME": 'KODEX 3대농산물선물(H)', "CODE": "271060"}) stocks.append({"NAME": 'KODEX 건설', "CODE": "117700"}) stocks.append({"NAME": 'KODEX 헬스케어', "CODE": "266420"}) stocks.append({"NAME": 'KODEX 글로벌4차산업로보틱스(합성)', "CODE": "276990"}) stocks.append({"NAME": 'KODEX 바이오', "CODE": "244580"}) stocks.append({"NAME": 'KODEX 반도체', "CODE": "091160"}) stocks.append({"NAME": 'KODEX 보험', "CODE": "140700"}) stocks.append({"NAME": 'KODEX 필수소비재', "CODE": "266410"}) stocks.append({"NAME": 'KODEX 2차전지산업', "CODE": "305720"}) stocks.append({"NAME": 'KODEX 경기소비재', "CODE": "266390"}) stocks.append({"NAME": 'KODEX 철강', "CODE": "117680"}) stocks.append({"NAME": 'KODEX 에너지화학', "CODE": "117460"}) stocks.append({"NAME": 'KODEX 은행', "CODE": "091170"}) stocks.append({"NAME": 'TIGER 탄소효율그린뉴딜', "CODE": "376410"}) for i, stock in enumerate(stocks): cursor.execute('SELECT ymd FROM ' + tableName + ' WHERE CODE=? order by ymd desc', (stock["CODE"],)) result = cursor.fetchone() ymd = self.START_DATE if result is not None: ymd = result[0] stock_data = self.crawl_specific_stock(stock["CODE"], ymd) for item in stock_data: cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (stock["CODE"],item['ymd'],)) result = cursor.fetchone() if result == None: cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, ymd, close, diff, open, high, low, volume) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", (stock["CODE"], stock["NAME"], item['ymd'], item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'])) #else: # cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, open=?, high=?, low=?, volume=? WHERE CODE=? and ymd=?", (item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'], stock["CODE"], item['ymd'])) print("{}. {} ({})".format(i, stock["CODE"], stock["NAME"])) sleep(0.5) conn.commit() cursor.close() conn.close() return def crawl_stocks(self, inFileName): tableName = 'stock' conn = sqlite3.connect(inFileName) cursor = conn.cursor() # 테이블 생성 cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text, NAME text, ymd text, close REAL, diff REAL, open REAL, high REAL, low REAL, volume REAL)") # 키 생성 create_key = "CREATE INDEX IF NOT EXISTS " + tableName + "_idx on " + tableName + " (CODE, ymd) " cursor.execute(create_key) conn.commit() cursor.close() conn.close() code_df = self.getStockInfo() items = code_df.values idx = 0 for item in items: idx += 1 conn = sqlite3.connect(inFileName) cursor = conn.cursor() item_name = item[0] item_code = item[1] cursor.execute('SELECT ymd FROM ' + tableName + ' WHERE CODE=? order by ymd desc', (item_code,)) result = cursor.fetchone() stock = {"CODE": item_code, "NAME": item_name} #ymd = (datetime.today() - timedelta(days=300)).strftime('%Y-%m-%d') ymd = self.START_DATE.replace(".", "-") if result is not None: ymd = result[0] stock_data = self.crawl_specific_stock(stock["CODE"], ymd) if stock_data is not None: for item in stock_data: cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (stock["CODE"],item['ymd'],)) result = cursor.fetchone() if result == None: cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, ymd, close, diff, open, high, low, volume) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", (stock["CODE"], stock["NAME"], item['ymd'], item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'])) #else: # cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, open=?, high=?, low=?, volume=? WHERE CODE=? and ymd=?", (item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'], stock["CODE"], item['ymd'])) conn.commit() cursor.close() conn.close() print("{}. {} ({})".format(idx, item_code, item_name)) sleep(0.3) return def crawl_special_stocks(self, inFileName): tableName = 'stock' conn = sqlite3.connect(inFileName) cursor = conn.cursor() # 테이블 생성 cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text, NAME text, ymd text, close REAL, diff REAL, open REAL, high REAL, low REAL, volume REAL)") # 키 생성 create_key = "CREATE INDEX IF NOT EXISTS " + tableName + "_idx on " + tableName + " (CODE, ymd) " cursor.execute(create_key) conn.commit() cursor.close() conn.close() pd.options.display.float_format = '{:.4f}'.format pd.set_option('display.max_columns', None) special_stocks = { '^KS11': 'Kospi', '^KQ11': 'Kosdak', '^DJI': 'Dow Johns', '^GSPC': 'S&P 500', '^IXIC': 'Nasdaq', '^NDX': 'NASDAQ 100', 'SQQQ': 'ProShares UltraPro Short QQQ', 'TQQQ': 'ProShares UltraPro QQQ', 'SCO': 'ProShares UltraShort Bloomberg Crude Oil', 'UCO': 'ProShares Ultra Bloomberg Crude Oil', 'GLL': 'ProShares UltraShort Gold', 'UGL': 'ProShares Ultra Gold', 'SOXS': 'Direxion Daily Semiconductor Bear -3X Shares', 'SOXL': 'Direxion Daily Semiconductor Bull 3X Shares', 'FNGD': 'MicroSectors™ FANG+™ Index -3X Inverse Leveraged ETN', 'FNGU': 'MicroSectors™ FANG+™ Index 3X Leveraged ETN', 'AAPL': 'Apple', 'MSFT': 'Microsoft', 'GOOG': 'Alphabet C', 'AMZN': 'Amazon.com', 'AVGO': 'Broadcom', 'NVDA': 'NVIDIA', 'UNH': 'UnitedHealth', 'TSM': 'Taiwan Semiconductor', 'JNJ': 'Johnson & Johnson (JNJ)', 'TCTZF': 'Tencent Holdings', 'V': 'Visa A', 'WMT': 'Walmart', 'XOM': 'Exxon Mobil', 'JPM': 'JPMorgan', 'MA': 'Mastercard', 'CVX': 'Chevron Corp', 'HD': 'Home Depot', 'BAC': 'Bank of America', 'KO': 'Coca-Cola', 'COST': 'Costco', 'DIS': 'Walt Disney', 'VZ': 'Verizon', 'CSCO': 'Cisco', 'ORCL': 'Oracle', 'NKE': 'Nike', 'ACN': 'Accenture', 'ADBE': 'Adobe', 'CRM': 'Salesforce.com', 'INTC': 'Intel', 'QCOM': 'Qualcomm', 'AMD': 'AMD', 'MS': 'Morgan Stanley', 'T': 'AT&T', 'HON': 'Honeywell', 'IBM': 'IBM', 'DQ': 'Daqo New Energy Corp ADR', 'EBAY': 'eBay Inc', 'NTAP': 'NetApp Inc', 'ASML': 'ASML Holding NV ADR', 'BABA': 'Alibaba Group Holdings Ltd ADR'} conn = sqlite3.connect(inFileName) cursor = conn.cursor() us_sotck_data = {} for ticker in special_stocks: cursor.execute('SELECT ymd FROM ' + tableName + ' WHERE CODE=? order by ymd desc', (ticker,)) result = cursor.fetchone() if result is not None: start_day = result[0].replace(".", "-") end_day = datetime.today().strftime('%Y-%m-%d') else: #start_day = pd.to_datetime('2017-01-01') start_day = self.START_DATE.replace(".", "-") end_day = datetime.today().strftime('%Y-%m-%d') yfin.pdr_override() data = pdr.get_data_yahoo(ticker, start_day, end_day, auto_adjust=True, progress=False) if len(data) <1: continue data['datetime'] = data.index.strftime("%Y.%m.%d") data.set_index('datetime', inplace=True) us_sotck_data[ticker] = { 'close': data['Close'].to_dict(), 'open': data['Open'].to_dict(), 'high': data['High'].to_dict(), 'low': data['Low'].to_dict(), 'volume': data['Volume'].to_dict() } cursor.close() conn.close() dateList = list(us_sotck_data['^KS11']['close']) #dateList = [temp.strftime("%Y-%m-%d") for temp in dateList] for idx, item_code in enumerate(us_sotck_data): stock_data = [] stock = us_sotck_data[item_code] for i, ymd in enumerate(dateList): if i > 0: if ymd in stock['close'] and dateList[i-1] in stock['close']: diff = stock['close'][ymd] - stock['close'][dateList[i-1]] else: continue else: if ymd in stock['close']: diff = stock['close'][ymd] else: continue stock_data.append({ 'CODE':item_code, 'NAME':special_stocks[item_code], 'ymd': ymd, 'close': round(stock['close'][ymd], 2), 'diff': round(diff, 2), 'open': round(stock['open'][ymd], 2), 'high': round(stock['high'][ymd], 2), 'low': round(stock['low'][ymd], 2), 'volume': stock['volume'][ymd] }) stock_data = reversed(stock_data) conn = sqlite3.connect(inFileName) cursor = conn.cursor() for item in stock_data: cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (item["CODE"],item['ymd'],)) result = cursor.fetchone() if result == None: cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, ymd, close, diff, open, high, low, volume) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", (item["CODE"], item["NAME"], item['ymd'], item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'])) #else: # cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, open=?, high=?, low=?, volume=? WHERE CODE=? and ymd=?", (item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'], stock["CODE"], item['ymd'])) conn.commit() cursor.close() conn.close() print("{}. {} ({})".format(idx, item_code, special_stocks[item_code])) sleep(0.05) return def get_data(self, code, start_day, end_day, tick='.KS'): stock = [] try: #yfin.pdr_override() #data = pdr.get_data_yahoo(code.strip() + tick, start_day, end_day, auto_adjust=True, progress=False) data = pdr.DataReader(code.strip(), 'naver', start=start_day, end=end_day) if len(data) < 1: return data['datetime'] = data.index.strftime("%Y.%m.%d") data.set_index('datetime', inplace=True) for idx, row in data.iterrows(): stock.append({ "ymd": idx, 'diff': 0, 'open': row['Open'], 'close': row['Close'], 'high': row['High'], 'low': row['Low'], 'volume': row['Volume'], }) except: print ("error") return stock def crawl_specific_stock(self, code, ymd): # 데이터 수집 start_day = (datetime.today() - timedelta(weeks=2)).strftime('%Y-%m-%d') end_day = datetime.today().strftime('%Y-%m-%d') stock = [] try: stock = self.get_data(code, start_day, end_day) except: print (code, 'is not exist...') return stock def update(self, inFileName, outFileName): """ Full json 데이터를 db에 import 시킴 inFileName = PROJECT_HOME + '/resources/stock.json.full' outFileName = PROJECT_HOME + '/resources/stock.db' crawler = StockCrawler() crawler.update(inFileName, outFileName) :param inFileName: :param outFileName: :return: """ tableName = 'stock' conn = sqlite3.connect(outFileName, isolation_level=None) cursor = conn.cursor() cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text PRIMARY KEY, NAME text, PRICE text, MACD text, STOCHASTIC text, ICHIMOKU text, RSI text, BOLINGERBAND text)") idx = 0 inFp = open(inFileName, 'r') for line in inFp.readlines(): if line: idx += 1 stock = json.loads(line) print(idx, stock["CODE"], stock["NAME"]) text = json.dumps(stock["PRICE"], ensure_ascii=False) cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],)) result = cursor.fetchone() if result == None: cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, PRICE) VALUES(?, ?, ?)", (stock["CODE"], stock["NAME"], text)) else: cursor.execute("UPDATE " + tableName + " SET PRICE=? WHERE CODE=?", (text, stock["CODE"])) return def saveIndex(self, code, inFileName, outFileName): tableName = 'stock' conn = sqlite3.connect(outFileName) cursor = conn.cursor() cursor.execute("CREATE TABLE IF NOT EXISTS " + tableName + " (CODE text PRIMARY KEY, NAME text, PRICE text, MACD text, STOCHASTIC text, ICHIMOKU text, RSI text, BOLINGERBAND text)") stock = {"NAME": code, "CODE": code} lastDay = "" cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],)) result = cursor.fetchone() if result is not None: stock["PRICE"] = json.loads(result[2]) lastDay = stock["PRICE"][len(stock["PRICE"]) - 1]["DATE"] with open(inFileName, "r", encoding="utf-8") as inFp: for line in inFp: line = line.strip() if line[0] == "#": continue arr = line.split("\t") if arr[0] == lastDay: break price = {"DATE": arr[0], "close": float(arr[1]), "diff": float(arr[6].replace("%", "")), "open": float(arr[2]), "high": float(arr[3]), "low": float(arr[4]), "volume": 0} price['avg3'] = 0 price['avg5'] = 0 price['avg7'] = 0 price['avg10'] = 0 price['avg20'] = 0 price['avg30'] = 0 price['avg60'] = 0 price['avg90'] = 0 price['avg100'] = 0 price['avg120'] = 0 price['avg150'] = 0 price['avg180'] = 0 price['avg200'] = 0 price['avg240'] = 0 stock["PRICE"].append(price) stock["PRICE"] = sorted(stock["PRICE"], key=lambda x: x['DATE']) # self.get_moving_avg(stock) text = json.dumps(stock['PRICE'], ensure_ascii=False) cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=?', (stock["CODE"],)) result = cursor.fetchone() if result == None: cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, PRICE, MACD, STOCHASTIC, ICHIMOKU, RSI) VALUES(?, ?, ?, ?, ?, ?, ?)", (stock["CODE"], stock["NAME"], text, "[{}]", "[{}]", "[{}]", "[{}]")) else: cursor.execute("UPDATE " + tableName + " SET PRICE=?, MACD=?, STOCHASTIC=?, ICHIMOKU=?, RSI=? WHERE CODE=?", (text, "[{}]", "[{}]", "[{}]", "[{}]", stock["CODE"])) conn.commit() cursor.close() conn.close() return if __name__ == "__main__": START_DATE = "2000.01.01" stockCrawler = StockCrawler(START_DATE) PROJECT_HOME = os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(__file__)))))) stockFileName = PROJECT_HOME + '/resources/stock.db' stockCrawler.crawl_special_stocks(stockFileName)