This commit is contained in:
dsyoon
2023-09-02 18:17:51 +09:00
parent bed0f6a4e8
commit b36d2c30f1
2 changed files with 62 additions and 124 deletions

View File

@@ -37,7 +37,7 @@ if week in (0, 1, 2, 3, 4): # 0:월, 1:화, 2:수, 3:목, 4:금, 5:토, 6:일
slackBot.sendMsg("1. start to crawl...")
ERROR_COUNT = 0
while ERROR_COUNT < 3:
try:
@@ -133,53 +133,23 @@ if week in (0, 1, 2, 3, 4): # 0:월, 1:화, 2:수, 3:목, 4:금, 5:토, 6:일
print("\n[종목 다운로드]")
stockCrawler = StockCrawler(START_DATE)
ERROR_COUNT = 0
while ERROR_COUNT < 3:
try:
print("\n[국내 ETF 수집]")
stockCrawler.crawl_etf_stocks(stockFileName)
slackBot.sendMsg("7. done etf stocks...")
break
except:
ERROR_COUNT += 1
continue
if ERROR_COUNT >= 3:
exit()
print("\n[국내 ETF 수집]")
stockCrawler.crawl_etf_stocks(stockFileName)
slackBot.sendMsg("7. done etf stocks...")
ERROR_COUNT = 0
while ERROR_COUNT < 3:
try:
print("\n[국내 종목 수집]")
stockCrawler.crawl_stocks(stockFileName)
slackBot.sendMsg("8. done stocks...")
break
except:
ERROR_COUNT += 1
continue
if ERROR_COUNT >= 3:
exit()
print("\n[국내 종목 수집]")
stockCrawler.crawl_stocks(stockFileName)
slackBot.sendMsg("8. done stocks...")
ERROR_COUNT = 0
while ERROR_COUNT < 3:
try:
print("\n[US 종목 수집]")
stockCrawler.crawl_special_stocks(stockFileName)
slackBot.sendMsg("9. done US stocks...")
break
except:
ERROR_COUNT += 1
continue
if ERROR_COUNT >= 3:
exit()
print("\n[US 종목 수집]")
stockCrawler.crawl_special_stocks(stockFileName)
slackBot.sendMsg("9. done US stocks...")

View File

@@ -32,6 +32,14 @@ class StockCrawler:
'[', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', ',', '.', '?', '"', ':', ';', '{', '}', '|', '<', '>',
']', '+', '-', '/', '=', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
self.START_DATE = START_DATE
"""
start_day = (datetime.today() - timedelta(weeks=2)).strftime('%Y-%m-%d')
end_day = datetime.today().strftime('%Y-%m-%d')
yfin.pdr_override()
data = pdr.get_data_yahoo("311690.KQ", start_day, end_day, auto_adjust=True, progress=False)
print (data)
"""
return
def clean_str(self, string):
@@ -134,7 +142,8 @@ class StockCrawler:
if result is not None:
ymd = result[0]
stock_data = self.crawl_specific_stock(stock["CODE"], ymd)
stock_data = self.crawl_specific_stock(stock["CODE"], ymd, ".KS")
for item in stock_data:
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (stock["CODE"],item['ymd'],))
@@ -144,7 +153,7 @@ class StockCrawler:
#else:
# cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, open=?, high=?, low=?, volume=? WHERE CODE=? and ymd=?", (item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'], stock["CODE"], item['ymd']))
sleep(0.05)
sleep(0.5)
conn.commit()
cursor.close()
conn.close()
@@ -189,15 +198,15 @@ class StockCrawler:
if result is not None:
ymd = result[0]
stock_data = self.crawl_specific_stock(item_code, ymd)
for item in stock_data:
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (stock["CODE"],item['ymd'],))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, ymd, close, diff, open, high, low, volume) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", (stock["CODE"], stock["NAME"], item['ymd'], item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume']))
#else:
# cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, open=?, high=?, low=?, volume=? WHERE CODE=? and ymd=?", (item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'], stock["CODE"], item['ymd']))
stock_data = self.crawl_specific_stock(stock["CODE"], ymd)
if stock_data is not None:
for item in stock_data:
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (stock["CODE"],item['ymd'],))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, ymd, close, diff, open, high, low, volume) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", (stock["CODE"], stock["NAME"], item['ymd'], item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume']))
#else:
# cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, open=?, high=?, low=?, volume=? WHERE CODE=? and ymd=?", (item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'], stock["CODE"], item['ymd']))
conn.commit()
cursor.close()
@@ -205,7 +214,7 @@ class StockCrawler:
print(idx, item_name, item_code, (time.time() - start_time), "s")
start_time = time.time()
sleep(0.05)
sleep(0.3)
return
@@ -308,7 +317,7 @@ class StockCrawler:
end_day = datetime.today().strftime('%Y-%m-%d')
yfin.pdr_override()
data = pdr.get_data_yahoo(ticker, start_day, end_day, auto_adjust=True)
data = pdr.get_data_yahoo(ticker, start_day, end_day, auto_adjust=True, progress=False)
if len(data) <1:
continue
data['datetime'] = data.index.strftime("%Y.%m.%d")
@@ -370,85 +379,44 @@ class StockCrawler:
return
def get_data(self, code, lastDay):
url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code.strip())
def get_data(self, code, start_day, end_day, tick='.KS'):
stock = []
# 일자 데이터를 담을 df라는 DataFrame 정의
df = pd.DataFrame()
date_set = set()
lastPage = False
# 1페이지에서 1000페이지의 데이터만 가져오기
for page in range(1, self.limit_page_count):
# 최근 상장 기업의 마지막 반복되는 페이지를 제외시킨다.
pg_url = '{url}&page={page}'.format(url=url, page=page)
#html = pd.read_html(pg_url, header=0)
html = None
while True:
try:
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=30).text)
sleep(0.5)
break
except:
print(pg_url)
if page > 200:
break
continue
try:
#yfin.pdr_override()
#data = pdr.get_data_yahoo(code.strip() + tick, start_day, end_day, auto_adjust=True, progress=False)
for date in html[0].날짜.values:
if type(date) is str:
if date in date_set:
lastPage = True
break
date_set.add(date)
data = pdr.DataReader(code.strip(), 'naver', start=start_day, end=end_day)
if len(data) < 1:
return
data['datetime'] = data.index.strftime("%Y.%m.%d")
data.set_index('datetime', inplace=True)
if date == lastDay:
lastPage = True
df = pd.concat((df, html[0]), ignore_index=True)
break
df = pd.concat((df, html[0]), ignore_index=True)
df = df.dropna()
if (lastPage) or (len(df) < 1) or ("날짜" not in df) or (df.날짜[1]==''):
print("\t- lastpage:", page)
break
for idx, row in data.iterrows():
stock.append({
"ymd": idx,
'diff': 0,
'open': row['Open'],
'close': row['Close'],
'high': row['High'],
'low': row['Low'],
'volume': row['Volume'],
})
except:
print ("error")
# df.dropna()를 이용해 결측값 있는 행 제거
df = df.dropna()
# 상위 5개 데이터 확인하기
###print (df.head())
# 한글로 된 컬럼명을 영어로 바꿔줌
df = df.rename(columns={'날짜': 'date', '종가': 'close', '전일비': 'diff', '시가': 'open', '고가': 'high', '저가': 'low', '거래량': 'volume'})
# 데이터의 타입을 int형으로 바꿔줌
df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[['close', 'diff', 'open', 'high', 'low', 'volume']].astype(int)
for values in df.values:
day = str(values[0]).split(' ')[0]
if lastDay == day:
break
stock.append({
"ymd": day,
df.columns[1]: values[1],
df.columns[2]: values[2],
df.columns[3]: values[3],
df.columns[4]: values[4],
df.columns[5]: values[5],
df.columns[6]: values[6],
})
# stock = sorted(stock, key=lambda x: x['ymd'], reverse=True)
stock = sorted(stock, key=lambda x: x['ymd'])
return stock
def crawl_specific_stock(self, code, ymd):
# 데이터 수집
stock = self.get_data(code, ymd)
start_day = (datetime.today() - timedelta(weeks=2)).strftime('%Y-%m-%d')
end_day = datetime.today().strftime('%Y-%m-%d')
# 이동 평균 계산
#self.get_moving_avg(stock)
stock = []
try:
stock = self.get_data(code, start_day, end_day)
except:
print (code, 'is not exist...')
return stock