diff --git a/StockCrawler.py b/StockCrawler.py index 54912c4..91e860a 100644 --- a/StockCrawler.py +++ b/StockCrawler.py @@ -37,7 +37,7 @@ if week in (0, 1, 2, 3, 4): # 0:월, 1:화, 2:수, 3:목, 4:금, 5:토, 6:일 slackBot.sendMsg("1. start to crawl...") - + ERROR_COUNT = 0 while ERROR_COUNT < 3: try: @@ -133,53 +133,23 @@ if week in (0, 1, 2, 3, 4): # 0:월, 1:화, 2:수, 3:목, 4:금, 5:토, 6:일 - print("\n[종목 다운로드]") + stockCrawler = StockCrawler(START_DATE) - ERROR_COUNT = 0 - while ERROR_COUNT < 3: - try: - print("\n[국내 ETF 수집]") - stockCrawler.crawl_etf_stocks(stockFileName) - slackBot.sendMsg("7. done etf stocks...") - break - except: - ERROR_COUNT += 1 - continue - if ERROR_COUNT >= 3: - exit() + + print("\n[국내 ETF 수집]") + stockCrawler.crawl_etf_stocks(stockFileName) + slackBot.sendMsg("7. done etf stocks...") - - ERROR_COUNT = 0 - while ERROR_COUNT < 3: - try: - print("\n[국내 종목 수집]") - stockCrawler.crawl_stocks(stockFileName) - slackBot.sendMsg("8. done stocks...") - break - except: - ERROR_COUNT += 1 - continue - if ERROR_COUNT >= 3: - exit() + print("\n[국내 종목 수집]") + stockCrawler.crawl_stocks(stockFileName) + slackBot.sendMsg("8. done stocks...") - - - ERROR_COUNT = 0 - while ERROR_COUNT < 3: - try: - print("\n[US 종목 수집]") - stockCrawler.crawl_special_stocks(stockFileName) - slackBot.sendMsg("9. done US stocks...") - break - except: - ERROR_COUNT += 1 - continue - if ERROR_COUNT >= 3: - exit() - + print("\n[US 종목 수집]") + stockCrawler.crawl_special_stocks(stockFileName) + slackBot.sendMsg("9. done US stocks...") diff --git a/stock/crawler/StockCrawler.py b/stock/crawler/StockCrawler.py index 8ba24c7..921395b 100644 --- a/stock/crawler/StockCrawler.py +++ b/stock/crawler/StockCrawler.py @@ -32,6 +32,14 @@ class StockCrawler: '[', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', ',', '.', '?', '"', ':', ';', '{', '}', '|', '<', '>', ']', '+', '-', '/', '=', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9') self.START_DATE = START_DATE + + """ + start_day = (datetime.today() - timedelta(weeks=2)).strftime('%Y-%m-%d') + end_day = datetime.today().strftime('%Y-%m-%d') + yfin.pdr_override() + data = pdr.get_data_yahoo("311690.KQ", start_day, end_day, auto_adjust=True, progress=False) + print (data) + """ return def clean_str(self, string): @@ -134,7 +142,8 @@ class StockCrawler: if result is not None: ymd = result[0] - stock_data = self.crawl_specific_stock(stock["CODE"], ymd) + stock_data = self.crawl_specific_stock(stock["CODE"], ymd, ".KS") + for item in stock_data: cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (stock["CODE"],item['ymd'],)) @@ -144,7 +153,7 @@ class StockCrawler: #else: # cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, open=?, high=?, low=?, volume=? WHERE CODE=? and ymd=?", (item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'], stock["CODE"], item['ymd'])) - sleep(0.05) + sleep(0.5) conn.commit() cursor.close() conn.close() @@ -189,15 +198,15 @@ class StockCrawler: if result is not None: ymd = result[0] - stock_data = self.crawl_specific_stock(item_code, ymd) - - for item in stock_data: - cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (stock["CODE"],item['ymd'],)) - result = cursor.fetchone() - if result == None: - cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, ymd, close, diff, open, high, low, volume) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", (stock["CODE"], stock["NAME"], item['ymd'], item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'])) - #else: - # cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, open=?, high=?, low=?, volume=? WHERE CODE=? and ymd=?", (item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'], stock["CODE"], item['ymd'])) + stock_data = self.crawl_specific_stock(stock["CODE"], ymd) + if stock_data is not None: + for item in stock_data: + cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (stock["CODE"],item['ymd'],)) + result = cursor.fetchone() + if result == None: + cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, ymd, close, diff, open, high, low, volume) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", (stock["CODE"], stock["NAME"], item['ymd'], item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'])) + #else: + # cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, open=?, high=?, low=?, volume=? WHERE CODE=? and ymd=?", (item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'], stock["CODE"], item['ymd'])) conn.commit() cursor.close() @@ -205,7 +214,7 @@ class StockCrawler: print(idx, item_name, item_code, (time.time() - start_time), "s") start_time = time.time() - sleep(0.05) + sleep(0.3) return @@ -308,7 +317,7 @@ class StockCrawler: end_day = datetime.today().strftime('%Y-%m-%d') yfin.pdr_override() - data = pdr.get_data_yahoo(ticker, start_day, end_day, auto_adjust=True) + data = pdr.get_data_yahoo(ticker, start_day, end_day, auto_adjust=True, progress=False) if len(data) <1: continue data['datetime'] = data.index.strftime("%Y.%m.%d") @@ -370,85 +379,44 @@ class StockCrawler: return - def get_data(self, code, lastDay): - url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code.strip()) - + def get_data(self, code, start_day, end_day, tick='.KS'): stock = [] - # 일자 데이터를 담을 df라는 DataFrame 정의 - df = pd.DataFrame() - date_set = set() - lastPage = False - # 1페이지에서 1000페이지의 데이터만 가져오기 - for page in range(1, self.limit_page_count): - # 최근 상장 기업의 마지막 반복되는 페이지를 제외시킨다. - pg_url = '{url}&page={page}'.format(url=url, page=page) - #html = pd.read_html(pg_url, header=0) - html = None - while True: - try: - html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=30).text) - sleep(0.5) - break - except: - print(pg_url) - if page > 200: - break - continue + try: + #yfin.pdr_override() + #data = pdr.get_data_yahoo(code.strip() + tick, start_day, end_day, auto_adjust=True, progress=False) - for date in html[0].날짜.values: - if type(date) is str: - if date in date_set: - lastPage = True - break - date_set.add(date) + data = pdr.DataReader(code.strip(), 'naver', start=start_day, end=end_day) + if len(data) < 1: + return + data['datetime'] = data.index.strftime("%Y.%m.%d") + data.set_index('datetime', inplace=True) - if date == lastDay: - lastPage = True - df = pd.concat((df, html[0]), ignore_index=True) - break - df = pd.concat((df, html[0]), ignore_index=True) - df = df.dropna() - if (lastPage) or (len(df) < 1) or ("날짜" not in df) or (df.날짜[1]==''): - print("\t- lastpage:", page) - break + for idx, row in data.iterrows(): + stock.append({ + "ymd": idx, + 'diff': 0, + 'open': row['Open'], + 'close': row['Close'], + 'high': row['High'], + 'low': row['Low'], + 'volume': row['Volume'], + }) + except: + print ("error") - # df.dropna()를 이용해 결측값 있는 행 제거 - df = df.dropna() - - # 상위 5개 데이터 확인하기 - ###print (df.head()) - - # 한글로 된 컬럼명을 영어로 바꿔줌 - df = df.rename(columns={'날짜': 'date', '종가': 'close', '전일비': 'diff', '시가': 'open', '고가': 'high', '저가': 'low', '거래량': 'volume'}) - - # 데이터의 타입을 int형으로 바꿔줌 - df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[['close', 'diff', 'open', 'high', 'low', 'volume']].astype(int) - - for values in df.values: - day = str(values[0]).split(' ')[0] - if lastDay == day: - break - stock.append({ - "ymd": day, - df.columns[1]: values[1], - df.columns[2]: values[2], - df.columns[3]: values[3], - df.columns[4]: values[4], - df.columns[5]: values[5], - df.columns[6]: values[6], - }) - - # stock = sorted(stock, key=lambda x: x['ymd'], reverse=True) - stock = sorted(stock, key=lambda x: x['ymd']) return stock def crawl_specific_stock(self, code, ymd): # 데이터 수집 - stock = self.get_data(code, ymd) + start_day = (datetime.today() - timedelta(weeks=2)).strftime('%Y-%m-%d') + end_day = datetime.today().strftime('%Y-%m-%d') - # 이동 평균 계산 - #self.get_moving_avg(stock) + stock = [] + try: + stock = self.get_data(code, start_day, end_day) + except: + print (code, 'is not exist...') return stock