init
This commit is contained in:
@@ -52,7 +52,7 @@ class MetaCrawler:
|
||||
html = None
|
||||
while True:
|
||||
try:
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
break
|
||||
except:
|
||||
@@ -136,7 +136,7 @@ class MetaCrawler:
|
||||
html = None
|
||||
while True:
|
||||
try:
|
||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text)
|
||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
break
|
||||
except:
|
||||
@@ -214,7 +214,7 @@ class MetaCrawler:
|
||||
html = None
|
||||
while True:
|
||||
try:
|
||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text, encoding='euc-kr')
|
||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=30).text, encoding='euc-kr')
|
||||
sleep(0.5)
|
||||
break
|
||||
except:
|
||||
@@ -301,7 +301,7 @@ class MetaCrawler:
|
||||
html = None
|
||||
while True:
|
||||
try:
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
break
|
||||
except:
|
||||
@@ -383,7 +383,7 @@ class MetaCrawler:
|
||||
html = None
|
||||
while True:
|
||||
try:
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
break
|
||||
except:
|
||||
|
||||
@@ -45,7 +45,7 @@ class StockCrawler:
|
||||
|
||||
def getStockInfo(self):
|
||||
#code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
|
||||
code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header, timeout=5).text)[0]
|
||||
code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header, timeout=30).text)[0]
|
||||
# code_df = pd.read_excel('../resources/stock/상장법인목록.xls')
|
||||
|
||||
# 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
|
||||
@@ -382,7 +382,7 @@ class StockCrawler:
|
||||
html = None
|
||||
while True:
|
||||
try:
|
||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=5).text)
|
||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
break
|
||||
except:
|
||||
|
||||
@@ -10,7 +10,7 @@ class FnGuideCrawler:
|
||||
|
||||
def getStockInfo(self):
|
||||
#code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
|
||||
code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header, timeout=5).text)[0]
|
||||
code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header, timeout=30).text)[0]
|
||||
# code_df = pd.read_excel('../resources/stock/상장법인목록.xls')
|
||||
|
||||
# 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
|
||||
|
||||
@@ -46,7 +46,7 @@ class MetaCrawler:
|
||||
finish = False
|
||||
for i in range(1, self.limit_page_count):
|
||||
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
|
||||
# 마지막 페이지 까지 받기
|
||||
@@ -120,7 +120,7 @@ class MetaCrawler:
|
||||
finish = False
|
||||
for i in range(1, self.limit_page_count):
|
||||
#html = pd.read_html(url + str(i), header=0)
|
||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text)
|
||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
|
||||
# 마지막 페이지 까지 받기
|
||||
@@ -181,7 +181,7 @@ class MetaCrawler:
|
||||
finish = False
|
||||
for i in range(1, self.limit_page_count):
|
||||
#html = pd.read_html(url + str(i), header=0, encoding='euc-kr')
|
||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text, encoding='euc-kr')
|
||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=30).text, encoding='euc-kr')
|
||||
sleep(0.5)
|
||||
|
||||
# 마지막 페이지 까지 받기
|
||||
@@ -253,7 +253,7 @@ class MetaCrawler:
|
||||
finish = False
|
||||
for i in range(1, self.limit_page_count):
|
||||
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
|
||||
# 마지막 페이지 까지 받기
|
||||
|
||||
@@ -74,7 +74,7 @@ class StockCrawler:
|
||||
|
||||
def getStockInfo(self):
|
||||
#code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
|
||||
code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header, timeout=5).text)[0]
|
||||
code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header, timeout=30).text)[0]
|
||||
# code_df = pd.read_excel('../resources/stock/상장법인목록.xls')
|
||||
|
||||
# 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
|
||||
@@ -255,7 +255,7 @@ class StockCrawler:
|
||||
# 최근 상장 기업의 마지막 반복되는 페이지를 제외시킨다.
|
||||
pg_url = '{url}&page={page}'.format(url=url, page=page)
|
||||
#html = pd.read_html(pg_url, header=0)
|
||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=5).text)
|
||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
|
||||
count = 0
|
||||
@@ -379,7 +379,7 @@ class StockCrawler:
|
||||
# 최근 상장 기업의 마지막 반복되는 페이지를 제외시킨다.
|
||||
pg_url = '{url}&page={page}'.format(url=url, page=page)
|
||||
#html = pd.read_html(pg_url, header=0)
|
||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=5).text)
|
||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
|
||||
count = 0
|
||||
@@ -603,7 +603,7 @@ class StockCrawler:
|
||||
# 최근 상장 기업의 마지막 반복되는 페이지를 제외시킨다.
|
||||
pg_url = '{url}&page={page}'.format(url=url, page=page)
|
||||
#html = pd.read_html(pg_url, header=0)
|
||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=5).text)
|
||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
|
||||
count = 0
|
||||
|
||||
@@ -46,7 +46,7 @@ class MetaCrawler:
|
||||
finish = False
|
||||
for i in range(1, self.limit_page_count):
|
||||
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
|
||||
# 마지막 페이지 까지 받기
|
||||
@@ -126,7 +126,7 @@ class MetaCrawler:
|
||||
finish = False
|
||||
for i in range(1, self.limit_page_count):
|
||||
#html = pd.read_html(url + str(i), header=0)
|
||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text)
|
||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
|
||||
# 마지막 페이지 까지 받기
|
||||
@@ -192,7 +192,7 @@ class MetaCrawler:
|
||||
finish = False
|
||||
for i in range(1, self.limit_page_count):
|
||||
#html = pd.read_html(url + str(i), header=0, encoding='euc-kr')
|
||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text, encoding='euc-kr')
|
||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=30).text, encoding='euc-kr')
|
||||
sleep(0.5)
|
||||
|
||||
# 마지막 페이지 까지 받기
|
||||
@@ -267,7 +267,7 @@ class MetaCrawler:
|
||||
finish = False
|
||||
for i in range(1, self.limit_page_count):
|
||||
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
|
||||
# 마지막 페이지 까지 받기
|
||||
|
||||
@@ -75,7 +75,7 @@ class StockCrawler:
|
||||
|
||||
def getStockInfo(self):
|
||||
#code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
|
||||
code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header, timeout=5).text)[0]
|
||||
code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header, timeout=30).text)[0]
|
||||
# code_df = pd.read_excel('../resources/stock/상장법인목록.xls')
|
||||
|
||||
# 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
|
||||
@@ -228,7 +228,7 @@ class StockCrawler:
|
||||
# 최근 상장 기업의 마지막 반복되는 페이지를 제외시킨다.
|
||||
pg_url = '{url}&page={page}'.format(url=url, page=page)
|
||||
#html = pd.read_html(pg_url, header=0)
|
||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=5).text)
|
||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=30).text)
|
||||
sleep(0.5)
|
||||
|
||||
for date in html[0].날짜.values:
|
||||
|
||||
Reference in New Issue
Block a user