Files
DeepStock/stockpredictor_back/crawler/sQLite/FnGuideCrawler.py
dosangyoon 794e2ea5f7 init
2022-07-29 14:28:27 +09:00

224 lines
11 KiB
Python

from bs4 import BeautifulSoup
from pandas import DataFrame, Series
import requests as re
import pandas as pd
import os
import json
import sqlite3
import requests
class FnGuideCrawler:
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
START_DATE = None
def __init__(self, START_DATE):
self.START_DATE = START_DATE
return
def getStockInfo(self):
code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
#code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header).text)
# 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)
# 우리가 필요한 것은 회사명과 종목코드이기 때문에 필요없는 column들은 제외해준다.
code_df = code_df[['회사명', '종목코드']]
# 한글로된 컬럼명을 영어로 바꿔준다.
code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'})
###print (code_df.head())
return code_df
# FnGuide에서 크롤링한 KOSPI 상장기업의 재무제표
# http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221294884955&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
def get_fnguide_table(self, code):
url = re.get('http://comp.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A%s'%(code.strip()))
url = url.content
html = BeautifulSoup(url,'html.parser')
body = html.find('body')
try:
fn_body = body.find('div', {'class': 'fng_body asp_body'})
ur_table = fn_body.find('div', {'id': 'div15'})
table = ur_table.find('div', {'id': 'highlight_D_Y'})
tbody = table.find('tbody')
tr = tbody.find_all('tr')
Table = DataFrame()
except:
return {}
for i in tr:
''' 자료 항목 가져오기'''
category = i.find('span', {'class': 'txt_acd'})
if category == None:
category = i.find('th')
category = category.text.strip()
'''값 가져오기'''
value_list = []
j = i.find_all('td', {'class': 'r'})
for value in j:
temp = value.text.replace(',', '').strip()
try:
temp = float(temp)
value_list.append(temp)
except:
value_list.append(0)
Table['%s' % (category)] = value_list
''' 기간 가져오기 '''
thead = table.find('thead')
tr_2 = thead.find('tr', {'class': 'td_gapcolor2'}).find_all('th')
year_list = []
for i in tr_2:
try:
temp_year = i.find('span', {'class': 'txt_acd'}).text
except:
temp_year = i.text
temp_year = temp_year.replace("/",".")+".01"
year_list.append(temp_year)
Table.index = year_list
return Table.T.to_dict()
def crawl_fnguide(self, inFileName):
tableName = 'fnguide'
conn = sqlite3.connect(inFileName, isolation_level=None)
cursor = conn.cursor()
# 테이블 생성
create_sql = "CREATE TABLE IF NOT EXISTS "+tableName+" (CODE text, NAME text, ymd text, "
create_sql += " type text, sales REAL, net_business_profits REAL, business_profits REAL, business_profits_release REAL, "
create_sql += " net_profit REAL, significant_shareholder_net_profit REAL, "
create_sql += " none_significant_shareholder_net_profit REAL, total_assets REAL, total_debt REAL, "
create_sql += " total_ownership_interest REAL, equity_holdings REAL, none_equity_holdings REAL, capital REAL, "
create_sql += " debt_ratio REAL, reserve_ratio REAL, business_profits_ratio REAL, "
create_sql += " significant_shareholder_profits_ratio REAL, ROA REAL, ROE REAL, EPS REAL, BPS REAL, DPS REAL, "
create_sql += " PER REAL, PBR REAL, share_outstanding REAL, dividend_rate REAL)"
cursor.execute(create_sql)
# 키 생성
create_key = "CREATE INDEX IF NOT EXISTS "+tableName+"_idx on "+tableName+" (CODE, ymd) "
cursor.execute(create_key)
code_df = self.getStockInfo()
idx = 0
for item in code_df.values:
item_name = item[0]
item_code = item[1]
idx += 1
print(idx, item_code, item_name, 'http://comp.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A%s'%(item_code.strip()))
fnGuideData = self.get_fnguide_table(item_code)
"""
매출액: sales
순영업수익: net_business_profits REAL,
영업이익: business_profits
영업이익(발표기준): business_profits_release
당기순이익: net_profit
지배주주순이익: significant_shareholder_net_profit
비지배주주순이익: none_significant_shareholder_net_profit
자산총계: total_assets
부채총계: total_debt
자본총계: total_ownership_interest
지배주주지분: equity_holdings
비지배주주지분: none_equity_holdings
자본금: capital
부채비율: debt_ratio
유보율: reserve_ratio
영업이익률: business_profits_ratio
지배주주순이익률: significant_shareholder_profits_ratio
ROA: ROA
ROE: ROE
EPS(원): EPS
BPS(원): BPS
DPS(원): DPS
PER: PER
PBR: PBR
발행주식수: share_outstanding
배당수익률: dividend_rate
"""
for key_ymd in fnGuideData:
ymd = key_ymd.replace('(P)', '').replace('(E)', '')
if key_ymd.find('P') > 0:
type = 'P'
elif key_ymd.find('E') > 0:
type = 'E'
else:
type = ''
if '매출액' in fnGuideData[key_ymd]:
sales = fnGuideData[key_ymd]['매출액']
else:
sales = 0
if '순영업수익' in fnGuideData[key_ymd]:
net_business_profits = fnGuideData[key_ymd]['순영업수익']
else:
net_business_profits = 0
business_profits = fnGuideData[key_ymd]['영업이익']
business_profits_release = fnGuideData[key_ymd]['영업이익(발표기준)']
net_profit = fnGuideData[key_ymd]['당기순이익']
significant_shareholder_net_profit = fnGuideData[key_ymd]['지배주주순이익']
none_significant_shareholder_net_profit = fnGuideData[key_ymd]['비지배주주순이익']
total_assets = fnGuideData[key_ymd]['자산총계']
total_debt = fnGuideData[key_ymd]['부채총계']
total_ownership_interest = fnGuideData[key_ymd]['자본총계']
equity_holdings = fnGuideData[key_ymd]['지배주주지분']
none_equity_holdings = fnGuideData[key_ymd]['비지배주주지분']
capital = fnGuideData[key_ymd]['자본금']
debt_ratio = fnGuideData[key_ymd]['부채비율']
reserve_ratio = fnGuideData[key_ymd]['유보율']
business_profits_ratio = fnGuideData[key_ymd]['영업이익률']
significant_shareholder_profits_ratio = fnGuideData[key_ymd]['지배주주순이익률']
ROA = fnGuideData[key_ymd]['ROA']
ROE = fnGuideData[key_ymd]['ROE']
if 'EPS(원)' in fnGuideData[key_ymd]:
EPS = fnGuideData[key_ymd]['EPS(원)']
else:
EPS = fnGuideData[key_ymd]['EPS']
if 'BPS(원)' in fnGuideData[key_ymd]:
BPS = fnGuideData[key_ymd]['BPS(원)']
else:
BPS = fnGuideData[key_ymd]['BPS']
if 'DPS(원)' in fnGuideData[key_ymd]:
DPS = fnGuideData[key_ymd]['DPS(원)']
else:
DPS = fnGuideData[key_ymd]['DPS']
PER = fnGuideData[key_ymd]['PER']
PBR = fnGuideData[key_ymd]['PBR']
share_outstanding = fnGuideData[key_ymd]['발행주식수']
dividend_rate = fnGuideData[key_ymd]['배당수익률']
cursor.execute('SELECT * FROM '+tableName+' WHERE CODE=? and ymd=?', (item_code, ymd))
result = cursor.fetchone()
if result == None:
cursor.execute("INSERT INTO "+tableName+"(CODE, NAME, ymd, type, sales, net_business_profits, business_profits, business_profits_release, net_profit, significant_shareholder_net_profit, none_significant_shareholder_net_profit, total_assets, total_debt, total_ownership_interest, equity_holdings, none_equity_holdings, capital, debt_ratio, reserve_ratio, business_profits_ratio, significant_shareholder_profits_ratio, ROA, ROE, EPS, BPS, DPS, PER, PBR, share_outstanding, dividend_rate) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (item_code, item_name, ymd, type, sales, net_business_profits, business_profits, business_profits_release, net_profit, significant_shareholder_net_profit, none_significant_shareholder_net_profit, total_assets, total_debt, total_ownership_interest, equity_holdings, none_equity_holdings, capital, debt_ratio, reserve_ratio, business_profits_ratio, significant_shareholder_profits_ratio, ROA, ROE, EPS, BPS, DPS, PER, PBR, share_outstanding, dividend_rate))
else:
# cursor.execute("UPDATE "+tableName+" SET ymd=?, type=?, sales=?, net_business_profits=?, business_profits=?, business_profits_release=?, net_profit=?, significant_shareholder_net_profit=?, none_significant_shareholder_net_profit=?, total_assets=?, total_debt=?, total_ownership_interest=?, equity_holdings=?, none_equity_holdings=?, capital=?, debt_ratio=?, reserve_ratio=?, business_profits_ratio=?, significant_shareholder_profits_ratio=?, ROA=?, ROE=?, EPS=?, BPS=?, DPS=?, PER=?, PBR=?, share_outstanding=?, dividend_rate=? WHERE CODE=?", (ymd, type, sales, net_business_profits, business_profits, business_profits_release, net_profit, significant_shareholder_net_profit, none_significant_shareholder_net_profit, total_assets, total_debt, total_ownership_interest, equity_holdings, none_equity_holdings, capital, debt_ratio, reserve_ratio, business_profits_ratio, significant_shareholder_profits_ratio, ROA, ROE, EPS, BPS, DPS, PER, PBR, share_outstanding, dividend_rate, item_code))
break
cursor.close()
conn.close()
return
if __name__ == "__main__":
PROJECT_HOME = os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(__file__))))))))
inFnguideFileName = PROJECT_HOME + '/resources/stock.db'
crawler = FnGuideCrawler()
crawler.crawl_fnguide(inFnguideFileName)