sleep 추가
This commit is contained in:
@@ -2,6 +2,7 @@ import os
|
|||||||
import datetime
|
import datetime
|
||||||
import requests
|
import requests
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
@@ -52,6 +53,7 @@ class MetaCrawler:
|
|||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
||||||
|
sleep(0.5)
|
||||||
break
|
break
|
||||||
except:
|
except:
|
||||||
print(input['URL'] + '&page=%s' % i)
|
print(input['URL'] + '&page=%s' % i)
|
||||||
@@ -135,6 +137,7 @@ class MetaCrawler:
|
|||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text)
|
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text)
|
||||||
|
sleep(0.5)
|
||||||
break
|
break
|
||||||
except:
|
except:
|
||||||
print(url + str(i))
|
print(url + str(i))
|
||||||
@@ -212,6 +215,7 @@ class MetaCrawler:
|
|||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text, encoding='euc-kr')
|
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text, encoding='euc-kr')
|
||||||
|
sleep(0.5)
|
||||||
break
|
break
|
||||||
except:
|
except:
|
||||||
print(url + str(i))
|
print(url + str(i))
|
||||||
@@ -298,6 +302,7 @@ class MetaCrawler:
|
|||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
||||||
|
sleep(0.5)
|
||||||
break
|
break
|
||||||
except:
|
except:
|
||||||
print (input['URL'] + '&page=%s' % i)
|
print (input['URL'] + '&page=%s' % i)
|
||||||
@@ -379,6 +384,7 @@ class MetaCrawler:
|
|||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
||||||
|
sleep(0.5)
|
||||||
break
|
break
|
||||||
except:
|
except:
|
||||||
print(input['URL'] + '&page=%s' % i)
|
print(input['URL'] + '&page=%s' % i)
|
||||||
|
|||||||
@@ -383,6 +383,7 @@ class StockCrawler:
|
|||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=5).text)
|
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=5).text)
|
||||||
|
sleep(0.5)
|
||||||
break
|
break
|
||||||
except:
|
except:
|
||||||
print(pg_url)
|
print(pg_url)
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ class MetaCrawler:
|
|||||||
for i in range(1, self.limit_page_count):
|
for i in range(1, self.limit_page_count):
|
||||||
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
|
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
|
||||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
||||||
|
sleep(0.5)
|
||||||
|
|
||||||
# 마지막 페이지 까지 받기
|
# 마지막 페이지 까지 받기
|
||||||
if len(html[0].날짜.values) <= 1:
|
if len(html[0].날짜.values) <= 1:
|
||||||
@@ -120,6 +121,7 @@ class MetaCrawler:
|
|||||||
for i in range(1, self.limit_page_count):
|
for i in range(1, self.limit_page_count):
|
||||||
#html = pd.read_html(url + str(i), header=0)
|
#html = pd.read_html(url + str(i), header=0)
|
||||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text)
|
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text)
|
||||||
|
sleep(0.5)
|
||||||
|
|
||||||
# 마지막 페이지 까지 받기
|
# 마지막 페이지 까지 받기
|
||||||
if len(html[0].날짜.values) <= 2:
|
if len(html[0].날짜.values) <= 2:
|
||||||
@@ -180,6 +182,7 @@ class MetaCrawler:
|
|||||||
for i in range(1, self.limit_page_count):
|
for i in range(1, self.limit_page_count):
|
||||||
#html = pd.read_html(url + str(i), header=0, encoding='euc-kr')
|
#html = pd.read_html(url + str(i), header=0, encoding='euc-kr')
|
||||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text, encoding='euc-kr')
|
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text, encoding='euc-kr')
|
||||||
|
sleep(0.5)
|
||||||
|
|
||||||
# 마지막 페이지 까지 받기
|
# 마지막 페이지 까지 받기
|
||||||
if len(html[0].날짜.values) <= 10:
|
if len(html[0].날짜.values) <= 10:
|
||||||
@@ -251,6 +254,7 @@ class MetaCrawler:
|
|||||||
for i in range(1, self.limit_page_count):
|
for i in range(1, self.limit_page_count):
|
||||||
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
|
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
|
||||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
||||||
|
sleep(0.5)
|
||||||
|
|
||||||
# 마지막 페이지 까지 받기
|
# 마지막 페이지 까지 받기
|
||||||
if len(html[0].날짜.values) <= 1:
|
if len(html[0].날짜.values) <= 1:
|
||||||
|
|||||||
@@ -256,6 +256,7 @@ class StockCrawler:
|
|||||||
pg_url = '{url}&page={page}'.format(url=url, page=page)
|
pg_url = '{url}&page={page}'.format(url=url, page=page)
|
||||||
#html = pd.read_html(pg_url, header=0)
|
#html = pd.read_html(pg_url, header=0)
|
||||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=5).text)
|
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=5).text)
|
||||||
|
sleep(0.5)
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
for date in html[0].날짜.values:
|
for date in html[0].날짜.values:
|
||||||
@@ -379,6 +380,8 @@ class StockCrawler:
|
|||||||
pg_url = '{url}&page={page}'.format(url=url, page=page)
|
pg_url = '{url}&page={page}'.format(url=url, page=page)
|
||||||
#html = pd.read_html(pg_url, header=0)
|
#html = pd.read_html(pg_url, header=0)
|
||||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=5).text)
|
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=5).text)
|
||||||
|
sleep(0.5)
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
for date in html[0].날짜.values:
|
for date in html[0].날짜.values:
|
||||||
if type(date) is str:
|
if type(date) is str:
|
||||||
@@ -601,6 +604,8 @@ class StockCrawler:
|
|||||||
pg_url = '{url}&page={page}'.format(url=url, page=page)
|
pg_url = '{url}&page={page}'.format(url=url, page=page)
|
||||||
#html = pd.read_html(pg_url, header=0)
|
#html = pd.read_html(pg_url, header=0)
|
||||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=5).text)
|
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=5).text)
|
||||||
|
sleep(0.5)
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
for date in html[0].날짜.values:
|
for date in html[0].날짜.values:
|
||||||
if type(date) is str:
|
if type(date) is str:
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ class MetaCrawler:
|
|||||||
for i in range(1, self.limit_page_count):
|
for i in range(1, self.limit_page_count):
|
||||||
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
|
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
|
||||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
||||||
|
sleep(0.5)
|
||||||
|
|
||||||
# 마지막 페이지 까지 받기
|
# 마지막 페이지 까지 받기
|
||||||
if len(html[0].날짜.values) <= 1:
|
if len(html[0].날짜.values) <= 1:
|
||||||
@@ -126,6 +127,7 @@ class MetaCrawler:
|
|||||||
for i in range(1, self.limit_page_count):
|
for i in range(1, self.limit_page_count):
|
||||||
#html = pd.read_html(url + str(i), header=0)
|
#html = pd.read_html(url + str(i), header=0)
|
||||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text)
|
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text)
|
||||||
|
sleep(0.5)
|
||||||
|
|
||||||
# 마지막 페이지 까지 받기
|
# 마지막 페이지 까지 받기
|
||||||
if len(html[0].날짜.values) <= 2:
|
if len(html[0].날짜.values) <= 2:
|
||||||
@@ -191,6 +193,7 @@ class MetaCrawler:
|
|||||||
for i in range(1, self.limit_page_count):
|
for i in range(1, self.limit_page_count):
|
||||||
#html = pd.read_html(url + str(i), header=0, encoding='euc-kr')
|
#html = pd.read_html(url + str(i), header=0, encoding='euc-kr')
|
||||||
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text, encoding='euc-kr')
|
html = pd.read_html(requests.get(url + str(i), headers=self.header, timeout=5).text, encoding='euc-kr')
|
||||||
|
sleep(0.5)
|
||||||
|
|
||||||
# 마지막 페이지 까지 받기
|
# 마지막 페이지 까지 받기
|
||||||
if len(html[0].날짜.values) <= 10:
|
if len(html[0].날짜.values) <= 10:
|
||||||
@@ -265,6 +268,7 @@ class MetaCrawler:
|
|||||||
for i in range(1, self.limit_page_count):
|
for i in range(1, self.limit_page_count):
|
||||||
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
|
#html = pd.read_html(input['URL'] + '&page=%s' % i, header=0)
|
||||||
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
html = pd.read_html(requests.get(input['URL'] + '&page=%s' % i, headers=self.header, timeout=5).text)
|
||||||
|
sleep(0.5)
|
||||||
|
|
||||||
# 마지막 페이지 까지 받기
|
# 마지막 페이지 까지 받기
|
||||||
if len(html[0].날짜.values) <= 1:
|
if len(html[0].날짜.values) <= 1:
|
||||||
|
|||||||
@@ -229,6 +229,8 @@ class StockCrawler:
|
|||||||
pg_url = '{url}&page={page}'.format(url=url, page=page)
|
pg_url = '{url}&page={page}'.format(url=url, page=page)
|
||||||
#html = pd.read_html(pg_url, header=0)
|
#html = pd.read_html(pg_url, header=0)
|
||||||
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=5).text)
|
html = pd.read_html(requests.get(pg_url, headers=self.header, timeout=5).text)
|
||||||
|
sleep(0.5)
|
||||||
|
|
||||||
for date in html[0].날짜.values:
|
for date in html[0].날짜.values:
|
||||||
if type(date) is str:
|
if type(date) is str:
|
||||||
if date in date_set:
|
if date in date_set:
|
||||||
|
|||||||
Reference in New Issue
Block a user