init
This commit is contained in:
@@ -32,6 +32,14 @@ class StockCrawler:
|
||||
'[', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', ',', '.', '?', '"', ':', ';', '{', '}', '|', '<', '>',
|
||||
']', '+', '-', '/', '=', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
|
||||
self.START_DATE = START_DATE
|
||||
|
||||
"""
|
||||
start_day = (datetime.today() - timedelta(weeks=2)).strftime('%Y-%m-%d')
|
||||
end_day = datetime.today().strftime('%Y-%m-%d')
|
||||
yfin.pdr_override()
|
||||
data = pdr.get_data_yahoo("311690.KQ", start_day, end_day, auto_adjust=True, progress=False)
|
||||
print (data)
|
||||
"""
|
||||
return
|
||||
|
||||
def clean_str(self, string):
|
||||
@@ -134,7 +142,8 @@ class StockCrawler:
|
||||
if result is not None:
|
||||
ymd = result[0]
|
||||
|
||||
stock_data = self.crawl_specific_stock(stock["CODE"], ymd)
|
||||
stock_data = self.crawl_specific_stock(stock["CODE"], ymd, ".KS")
|
||||
|
||||
|
||||
for item in stock_data:
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (stock["CODE"],item['ymd'],))
|
||||
@@ -144,7 +153,7 @@ class StockCrawler:
|
||||
#else:
|
||||
# cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, open=?, high=?, low=?, volume=? WHERE CODE=? and ymd=?", (item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'], stock["CODE"], item['ymd']))
|
||||
|
||||
sleep(0.05)
|
||||
sleep(0.5)
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
@@ -189,15 +198,15 @@ class StockCrawler:
|
||||
if result is not None:
|
||||
ymd = result[0]
|
||||
|
||||
stock_data = self.crawl_specific_stock(item_code, ymd)
|
||||
|
||||
for item in stock_data:
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (stock["CODE"],item['ymd'],))
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, ymd, close, diff, open, high, low, volume) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", (stock["CODE"], stock["NAME"], item['ymd'], item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume']))
|
||||
#else:
|
||||
# cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, open=?, high=?, low=?, volume=? WHERE CODE=? and ymd=?", (item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'], stock["CODE"], item['ymd']))
|
||||
stock_data = self.crawl_specific_stock(stock["CODE"], ymd)
|
||||
if stock_data is not None:
|
||||
for item in stock_data:
|
||||
cursor.execute('SELECT * FROM ' + tableName + ' WHERE CODE=? and ymd=?', (stock["CODE"],item['ymd'],))
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
cursor.execute("INSERT INTO " + tableName + "(CODE, NAME, ymd, close, diff, open, high, low, volume) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", (stock["CODE"], stock["NAME"], item['ymd'], item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume']))
|
||||
#else:
|
||||
# cursor.execute("UPDATE " + tableName + " SET close=?, diff=?, open=?, high=?, low=?, volume=? WHERE CODE=? and ymd=?", (item['close'], item['diff'], item['open'], item['high'], item['low'], item['volume'], stock["CODE"], item['ymd']))
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
@@ -205,7 +214,7 @@ class StockCrawler:
|
||||
print(idx, item_name, item_code, (time.time() - start_time), "s")
|
||||
|
||||
start_time = time.time()
|
||||
sleep(0.05)
|
||||
sleep(0.3)
|
||||
|
||||
return
|
||||
|
||||
@@ -308,7 +317,7 @@ class StockCrawler:
|
||||
end_day = datetime.today().strftime('%Y-%m-%d')
|
||||
|
||||
yfin.pdr_override()
|
||||
data = pdr.get_data_yahoo(ticker, start_day, end_day, auto_adjust=True)
|
||||
data = pdr.get_data_yahoo(ticker, start_day, end_day, auto_adjust=True, progress=False)
|
||||
if len(data) <1:
|
||||
continue
|
||||
data['datetime'] = data.index.strftime("%Y.%m.%d")
|
||||
@@ -370,85 +379,44 @@ class StockCrawler:
|
||||
return
|
||||
|
||||
|
||||
def get_data(self, code, lastDay):
|
||||
url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code.strip())
|
||||
|
||||
def get_data(self, code, start_day, end_day, tick='.KS'):
|
||||
stock = []
|
||||
# 일자 데이터를 담을 df라는 DataFrame 정의
|
||||
df = pd.DataFrame()
|
||||
|
||||
date_set = set()
|
||||
lastPage = False
|
||||
# 1페이지에서 1000페이지의 데이터만 가져오기
|
||||
for page in range(1, self.limit_page_count):
|
||||
# 최근 상장 기업의 마지막 반복되는 페이지를 제외시킨다.
|
||||
pg_url = '{url}&page={page}'.format(url=url, page=page)
|
||||
#html = pd.read_html(pg_url, header=0)
|
||||
html = None
|
||||
while True:
|
||||
try:
|
||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
break
|
||||
except:
|
||||
print(pg_url)
|
||||
if page > 200:
|
||||
break
|
||||
continue
|
||||
try:
|
||||
#yfin.pdr_override()
|
||||
#data = pdr.get_data_yahoo(code.strip() + tick, start_day, end_day, auto_adjust=True, progress=False)
|
||||
|
||||
for date in html[0].날짜.values:
|
||||
if type(date) is str:
|
||||
if date in date_set:
|
||||
lastPage = True
|
||||
break
|
||||
date_set.add(date)
|
||||
data = pdr.DataReader(code.strip(), 'naver', start=start_day, end=end_day)
|
||||
if len(data) < 1:
|
||||
return
|
||||
data['datetime'] = data.index.strftime("%Y.%m.%d")
|
||||
data.set_index('datetime', inplace=True)
|
||||
|
||||
if date == lastDay:
|
||||
lastPage = True
|
||||
df = pd.concat((df, html[0]), ignore_index=True)
|
||||
break
|
||||
df = pd.concat((df, html[0]), ignore_index=True)
|
||||
df = df.dropna()
|
||||
if (lastPage) or (len(df) < 1) or ("날짜" not in df) or (df.날짜[1]==''):
|
||||
print("\t- lastpage:", page)
|
||||
break
|
||||
for idx, row in data.iterrows():
|
||||
stock.append({
|
||||
"ymd": idx,
|
||||
'diff': 0,
|
||||
'open': row['Open'],
|
||||
'close': row['Close'],
|
||||
'high': row['High'],
|
||||
'low': row['Low'],
|
||||
'volume': row['Volume'],
|
||||
})
|
||||
except:
|
||||
print ("error")
|
||||
|
||||
# df.dropna()를 이용해 결측값 있는 행 제거
|
||||
df = df.dropna()
|
||||
|
||||
# 상위 5개 데이터 확인하기
|
||||
###print (df.head())
|
||||
|
||||
# 한글로 된 컬럼명을 영어로 바꿔줌
|
||||
df = df.rename(columns={'날짜': 'date', '종가': 'close', '전일비': 'diff', '시가': 'open', '고가': 'high', '저가': 'low', '거래량': 'volume'})
|
||||
|
||||
# 데이터의 타입을 int형으로 바꿔줌
|
||||
df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[['close', 'diff', 'open', 'high', 'low', 'volume']].astype(int)
|
||||
|
||||
for values in df.values:
|
||||
day = str(values[0]).split(' ')[0]
|
||||
if lastDay == day:
|
||||
break
|
||||
stock.append({
|
||||
"ymd": day,
|
||||
df.columns[1]: values[1],
|
||||
df.columns[2]: values[2],
|
||||
df.columns[3]: values[3],
|
||||
df.columns[4]: values[4],
|
||||
df.columns[5]: values[5],
|
||||
df.columns[6]: values[6],
|
||||
})
|
||||
|
||||
# stock = sorted(stock, key=lambda x: x['ymd'], reverse=True)
|
||||
stock = sorted(stock, key=lambda x: x['ymd'])
|
||||
return stock
|
||||
|
||||
def crawl_specific_stock(self, code, ymd):
|
||||
# 데이터 수집
|
||||
stock = self.get_data(code, ymd)
|
||||
start_day = (datetime.today() - timedelta(weeks=2)).strftime('%Y-%m-%d')
|
||||
end_day = datetime.today().strftime('%Y-%m-%d')
|
||||
|
||||
# 이동 평균 계산
|
||||
#self.get_moving_avg(stock)
|
||||
stock = []
|
||||
try:
|
||||
stock = self.get_data(code, start_day, end_day)
|
||||
except:
|
||||
print (code, 'is not exist...')
|
||||
|
||||
return stock
|
||||
|
||||
|
||||
Reference in New Issue
Block a user