This commit is contained in:
dosangyoon
2022-04-01 23:36:29 +09:00
parent b37438f83c
commit c17140eaa1
4 changed files with 35 additions and 23 deletions

View File

@@ -13,16 +13,11 @@ today = datetime.datetime.now().strftime("%Y-%m-%d")
PROJECT_HOME = os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(__file__))))))))
START_DATE = "1900.01.01"
start = time.time()
stockFileName = PROJECT_HOME + '/resources/stock.db'
# 재무제표는 3개월마다 다운로드를 한다.
fnGuideCrawler = FnGuideCrawler()
print("[KOSPI 상장기업 재무제표 다운로드]")
fnGuideCrawler.crawl_fnguide(stockFileName)
metaCrawler = MetaCrawler()
metaCrawler = MetaCrawler(START_DATE)
print("\n[증시자금동향 (신용잔고, 펀드자금 잔고)]")
metaCrawler.crawl_money_trend(stockFileName)
@@ -38,8 +33,14 @@ metaCrawler.crawl_exchange(stockFileName)
print("\n[원유 (WTI), 국제금, COPPER, NATURALGAS, CORN, SOYBEAN]")
metaCrawler.crawl_meterials(stockFileName)
# 재무제표는 3개월마다 다운로드를 한다.
fnGuideCrawler = FnGuideCrawler(START_DATE)
print("[KOSPI 상장기업 재무제표 다운로드]")
fnGuideCrawler.crawl_fnguide(stockFileName)
print("\n[종목 다운로드]")
stockCrawler = StockCrawler()
stockCrawler = StockCrawler(START_DATE)
stockCrawler.crawl_etf_stocks(stockFileName)
stockCrawler.crawl_stocks(stockFileName)
stockCrawler.crawl_usa_stocks(stockFileName)

View File

@@ -9,6 +9,11 @@ import requests
class FnGuideCrawler:
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
START_DATE = None
def __init__(self, START_DATE):
self.START_DATE = START_DATE
return
def getStockInfo(self):
code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]

View File

@@ -7,9 +7,11 @@ import pandas as pd
class MetaCrawler:
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
limit_page_count = 10000
limit_page_count = 100000
START_DATE = None
def __init__(self):
def __init__(self, START_DATE):
self.START_DATE = START_DATE
return
# 참고) http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221288761509
@@ -39,7 +41,7 @@ class MetaCrawler:
cursor.execute('SELECT ymd FROM ' + tableName + ' WHERE CODE=? order by ymd desc', (CODE,))
result = cursor.fetchone()
if result == None:
lastDay = "1900.01.01"
lastDay = self.START_DATE
else:
lastDay = result[0]
@@ -111,7 +113,7 @@ class MetaCrawler:
cursor.execute('SELECT ymd FROM ' + tableName + ' order by ymd desc')
result = cursor.fetchone()
if result == None:
lastDay = "1900.01.01"
lastDay = self.START_DATE
else:
lastDay = result[0]
@@ -153,7 +155,7 @@ class MetaCrawler:
cursor.execute("INSERT INTO " + tableName + "(ymd, pri, fori, ins, ins0, ins1, ins2, ins3, ins4, ins5, cor) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (ymd, pri, fori, ins, ins0, ins1, ins2, ins3, ins4, ins5, cor))
else:
# cursor.execute("UPDATE " + tableName + " SET pri=?, fori=?, ins=?, ins0=?, ins1=?, ins2=?, ins3=?, ins4=?, ins5=?, cor=? WHERE ymd=?", (pri, fori, ins, ins0, ins1, ins2, ins3, ins4, ins5, cor, ymd))
finish
finish = True
break
print ("20"+item[0])
@@ -182,7 +184,7 @@ class MetaCrawler:
cursor.execute('SELECT ymd FROM ' + tableName + ' order by ymd desc')
result = cursor.fetchone()
if result == None:
lastDay = "1900.01.01"
lastDay = self.START_DATE
else:
lastDay = result[0]
previousDay = ""
@@ -264,7 +266,7 @@ class MetaCrawler:
cursor.execute('SELECT ymd FROM ' + tableName + ' WHERE CODE=? order by ymd desc', (CODE,))
result = cursor.fetchone()
if result == None:
lastDay = "1900.01.01"
lastDay = self.START_DATE
else:
lastDay = result[0]
@@ -338,7 +340,7 @@ class MetaCrawler:
cursor.execute('SELECT ymd FROM ' + tableName + ' WHERE CODE=? order by ymd desc', (CODE,))
result = cursor.fetchone()
if result == None:
lastDay = "1900.01.01"
lastDay = self.START_DATE
else:
lastDay = result[0]

View File

@@ -21,14 +21,15 @@ class StockCrawler:
special_pattern = None
fnGuideCrawler = None
limit_page_count = 10000
limit_page_count = 100000
START_DATE = None
def __init__(self):
def __init__(self, START_DATE):
self.historical_prices = dict()
self.special_pattern = (
'[', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', ',', '.', '?', '"', ':', ';', '{', '}', '|', '<', '>',
']', '+', '-', '/', '=', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
self.START_DATE = START_DATE
return
def clean_str(self, string):
@@ -127,7 +128,7 @@ class StockCrawler:
start_time = time.time()
cursor.execute('SELECT ymd FROM ' + tableName + ' WHERE CODE=? order by ymd desc', (stock["CODE"],))
result = cursor.fetchone()
ymd = "2019.01.01"
ymd = self.START_DATE
if result is not None:
ymd = result[0]
@@ -181,7 +182,8 @@ class StockCrawler:
result = cursor.fetchone()
stock = {"CODE": item_code, "NAME": item_name}
ymd = (datetime.today() - timedelta(days=300)).strftime('%Y-%m-%d')
#ymd = (datetime.today() - timedelta(days=300)).strftime('%Y-%m-%d')
ymd = self.START_DATE.replace(".", "-")
if result is not None:
ymd = result[0]
@@ -248,10 +250,12 @@ class StockCrawler:
result = cursor.fetchone()
if result == None:
start = pd.to_datetime('2017-01-01')
#start = pd.to_datetime('2017-01-01')
start = pd.to_datetime(self.START_DATE.replace(".", "-"))
end = pd.to_datetime(datetime.today().strftime('%Y-%m-%d'))
else:
start = (datetime.today() - timedelta(days=300)).strftime('%Y-%m-%d')
#start = (datetime.today() - timedelta(days=300)).strftime('%Y-%m-%d')
start = pd.to_datetime(self.START_DATE.replace(".", "-"))
end = pd.to_datetime(datetime.today().strftime('%Y-%m-%d'))
data = pdr.get_data_yahoo(ticker, start, end)