Files
DeepStock/stockpredictor_back/crawler/toSQLite/FnGuideCrawler.py
dosangyoon 794e2ea5f7 init
2022-07-29 14:28:27 +09:00

130 lines
4.6 KiB
Python

from bs4 import BeautifulSoup
from pandas import DataFrame, Series
import requests as re
import pandas as pd
import os
import json
import sqlite3
import requests
class FnGuideCrawler:
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
def getStockInfo(self):
code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
#code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header).text)
# 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)
# 우리가 필요한 것은 회사명과 종목코드이기 때문에 필요없는 column들은 제외해준다.
code_df = code_df[['회사명', '종목코드']]
# 한글로된 컬럼명을 영어로 바꿔준다.
code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'})
###print (code_df.head())
return code_df
# FnGuide에서 크롤링한 KOSPI 상장기업의 재무제표
# http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221294884955&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
def get_fnguide_table(self, code):
url = re.get('http://comp.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A%s'%(code.strip()))
url = url.content
html = BeautifulSoup(url,'html.parser')
body = html.find('body')
try:
fn_body = body.find('div', {'class': 'fng_body asp_body'})
ur_table = fn_body.find('div', {'id': 'div15'})
table = ur_table.find('div', {'id': 'highlight_D_Y'})
tbody = table.find('tbody')
tr = tbody.find_all('tr')
Table = DataFrame()
except:
return {}
for i in tr:
''' 자료 항목 가져오기'''
category = i.find('span', {'class': 'txt_acd'})
if category == None:
category = i.find('th')
category = category.text.strip()
'''값 가져오기'''
value_list = []
j = i.find_all('td', {'class': 'r'})
for value in j:
temp = value.text.replace(',', '').strip()
try:
temp = float(temp)
value_list.append(temp)
except:
value_list.append(0)
Table['%s' % (category)] = value_list
''' 기간 가져오기 '''
thead = table.find('thead')
tr_2 = thead.find('tr', {'class': 'td_gapcolor2'}).find_all('th')
year_list = []
for i in tr_2:
try:
temp_year = i.find('span', {'class': 'txt_acd'}).text
except:
temp_year = i.text
temp_year = temp_year.replace("/",".")+".01"
year_list.append(temp_year)
Table.index = year_list
return Table.T.to_dict()
def crawl_fnguide(self, inFileName):
tableName = 'fnguide'
conn = sqlite3.connect(inFileName, isolation_level=None)
cursor = conn.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS "+tableName+" (CODE text PRIMARY KEY, NAME text, PRICE text)")
code_df = self.getStockInfo()
idx = 0
for item in code_df.values:
item_name = item[0]
item_code = item[1]
idx += 1
print(idx, item_name)
fnGuideData = self.get_fnguide_table(item_code)
text = json.dumps(fnGuideData, ensure_ascii=False)
cursor.execute('SELECT * FROM '+tableName+' WHERE CODE=?', (item_code, ))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO "+tableName+"(CODE, NAME, PRICE) VALUES(?, ?, ?)", (item_code, item_name, text))
else:
cursor.execute("UPDATE "+tableName+" SET PRICE=? WHERE CODE=?", (text, item_code))
cursor.close()
conn.close()
return
if __name__ == "__main__":
crawler = FnGuideCrawler()
#crawler.get_fnguide_table('155660')
PROJECT_HOME = os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(__file__))))))))
inFnguideFileName = PROJECT_HOME + '/resources/fnguide.db'
crawler = FnGuideCrawler()
crawler.crawl_fnguide(inFnguideFileName)