init
This commit is contained in:
221
stock/crawler/FnGuideCrawler.py
Normal file
221
stock/crawler/FnGuideCrawler.py
Normal file
@@ -0,0 +1,221 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from pandas import DataFrame
|
||||
import requests as re
|
||||
import pandas as pd
|
||||
import os
|
||||
import sqlite3
|
||||
|
||||
class FnGuideCrawler:
|
||||
header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
|
||||
START_DATE = None
|
||||
|
||||
def __init__(self, START_DATE):
|
||||
self.START_DATE = START_DATE
|
||||
return
|
||||
|
||||
def getStockInfo(self):
|
||||
code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
|
||||
#code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header).text)
|
||||
|
||||
# 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
|
||||
code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)
|
||||
|
||||
# 우리가 필요한 것은 회사명과 종목코드이기 때문에 필요없는 column들은 제외해준다.
|
||||
code_df = code_df[['회사명', '종목코드']]
|
||||
|
||||
# 한글로된 컬럼명을 영어로 바꿔준다.
|
||||
code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'})
|
||||
###print (code_df.head())
|
||||
|
||||
return code_df
|
||||
|
||||
# FnGuide에서 크롤링한 KOSPI 상장기업의 재무제표
|
||||
# http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221294884955&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
|
||||
def get_fnguide_table(self, code):
|
||||
url = re.get('http://comp.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A%s'%(code.strip()))
|
||||
url = url.content
|
||||
|
||||
html = BeautifulSoup(url,'html.parser')
|
||||
body = html.find('body')
|
||||
|
||||
try:
|
||||
fn_body = body.find('div', {'class': 'fng_body asp_body'})
|
||||
ur_table = fn_body.find('div', {'id': 'div15'})
|
||||
table = ur_table.find('div', {'id': 'highlight_D_Y'})
|
||||
|
||||
tbody = table.find('tbody')
|
||||
tr = tbody.find_all('tr')
|
||||
Table = DataFrame()
|
||||
except:
|
||||
return {}
|
||||
|
||||
for i in tr:
|
||||
''' 자료 항목 가져오기'''
|
||||
category = i.find('span', {'class': 'txt_acd'})
|
||||
|
||||
if category == None:
|
||||
category = i.find('th')
|
||||
|
||||
category = category.text.strip()
|
||||
|
||||
'''값 가져오기'''
|
||||
value_list = []
|
||||
|
||||
j = i.find_all('td', {'class': 'r'})
|
||||
|
||||
for value in j:
|
||||
temp = value.text.replace(',', '').strip()
|
||||
|
||||
try:
|
||||
temp = float(temp)
|
||||
value_list.append(temp)
|
||||
except:
|
||||
value_list.append(0)
|
||||
|
||||
Table['%s' % (category)] = value_list
|
||||
|
||||
''' 기간 가져오기 '''
|
||||
thead = table.find('thead')
|
||||
tr_2 = thead.find('tr', {'class': 'td_gapcolor2'}).find_all('th')
|
||||
|
||||
year_list = []
|
||||
|
||||
for i in tr_2:
|
||||
try:
|
||||
temp_year = i.find('span', {'class': 'txt_acd'}).text
|
||||
except:
|
||||
temp_year = i.text
|
||||
|
||||
temp_year = temp_year.replace("/",".")+".01"
|
||||
year_list.append(temp_year)
|
||||
|
||||
Table.index = year_list
|
||||
|
||||
return Table.T.to_dict()
|
||||
|
||||
def crawl_fnguide(self, inFileName):
|
||||
tableName = 'fnguide'
|
||||
conn = sqlite3.connect(inFileName, isolation_level=None)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 테이블 생성
|
||||
create_sql = "CREATE TABLE IF NOT EXISTS "+tableName+" (CODE text, NAME text, ymd text, "
|
||||
create_sql += " type text, sales REAL, net_business_profits REAL, business_profits REAL, business_profits_release REAL, "
|
||||
create_sql += " net_profit REAL, significant_shareholder_net_profit REAL, "
|
||||
create_sql += " none_significant_shareholder_net_profit REAL, total_assets REAL, total_debt REAL, "
|
||||
create_sql += " total_ownership_interest REAL, equity_holdings REAL, none_equity_holdings REAL, capital REAL, "
|
||||
create_sql += " debt_ratio REAL, reserve_ratio REAL, business_profits_ratio REAL, "
|
||||
create_sql += " significant_shareholder_profits_ratio REAL, ROA REAL, ROE REAL, EPS REAL, BPS REAL, DPS REAL, "
|
||||
create_sql += " PER REAL, PBR REAL, share_outstanding REAL, dividend_rate REAL)"
|
||||
cursor.execute(create_sql)
|
||||
|
||||
# 키 생성
|
||||
create_key = "CREATE INDEX IF NOT EXISTS "+tableName+"_idx on "+tableName+" (CODE, ymd) "
|
||||
cursor.execute(create_key)
|
||||
|
||||
code_df = self.getStockInfo()
|
||||
idx = 0
|
||||
for item in code_df.values:
|
||||
item_name = item[0]
|
||||
item_code = item[1]
|
||||
|
||||
idx += 1
|
||||
print(idx, item_code, item_name, 'http://comp.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A%s'%(item_code.strip()))
|
||||
|
||||
fnGuideData = self.get_fnguide_table(item_code)
|
||||
|
||||
"""
|
||||
매출액: sales
|
||||
순영업수익: net_business_profits REAL,
|
||||
영업이익: business_profits
|
||||
영업이익(발표기준): business_profits_release
|
||||
당기순이익: net_profit
|
||||
지배주주순이익: significant_shareholder_net_profit
|
||||
비지배주주순이익: none_significant_shareholder_net_profit
|
||||
자산총계: total_assets
|
||||
부채총계: total_debt
|
||||
자본총계: total_ownership_interest
|
||||
지배주주지분: equity_holdings
|
||||
비지배주주지분: none_equity_holdings
|
||||
자본금: capital
|
||||
부채비율: debt_ratio
|
||||
유보율: reserve_ratio
|
||||
영업이익률: business_profits_ratio
|
||||
지배주주순이익률: significant_shareholder_profits_ratio
|
||||
ROA: ROA
|
||||
ROE: ROE
|
||||
EPS(원): EPS
|
||||
BPS(원): BPS
|
||||
DPS(원): DPS
|
||||
PER: PER
|
||||
PBR: PBR
|
||||
발행주식수: share_outstanding
|
||||
배당수익률: dividend_rate
|
||||
"""
|
||||
for key_ymd in fnGuideData:
|
||||
ymd = key_ymd.replace('(P)', '').replace('(E)', '')
|
||||
if key_ymd.find('P') > 0:
|
||||
type = 'P'
|
||||
elif key_ymd.find('E') > 0:
|
||||
type = 'E'
|
||||
else:
|
||||
type = ''
|
||||
if '매출액' in fnGuideData[key_ymd]:
|
||||
sales = fnGuideData[key_ymd]['매출액']
|
||||
else:
|
||||
sales = 0
|
||||
if '순영업수익' in fnGuideData[key_ymd]:
|
||||
net_business_profits = fnGuideData[key_ymd]['순영업수익']
|
||||
else:
|
||||
net_business_profits = 0
|
||||
business_profits = fnGuideData[key_ymd]['영업이익']
|
||||
business_profits_release = fnGuideData[key_ymd]['영업이익(발표기준)']
|
||||
net_profit = fnGuideData[key_ymd]['당기순이익']
|
||||
significant_shareholder_net_profit = fnGuideData[key_ymd]['지배주주순이익']
|
||||
none_significant_shareholder_net_profit = fnGuideData[key_ymd]['비지배주주순이익']
|
||||
total_assets = fnGuideData[key_ymd]['자산총계']
|
||||
total_debt = fnGuideData[key_ymd]['부채총계']
|
||||
total_ownership_interest = fnGuideData[key_ymd]['자본총계']
|
||||
equity_holdings = fnGuideData[key_ymd]['지배주주지분']
|
||||
none_equity_holdings = fnGuideData[key_ymd]['비지배주주지분']
|
||||
capital = fnGuideData[key_ymd]['자본금']
|
||||
debt_ratio = fnGuideData[key_ymd]['부채비율']
|
||||
reserve_ratio = fnGuideData[key_ymd]['유보율']
|
||||
business_profits_ratio = fnGuideData[key_ymd]['영업이익률']
|
||||
significant_shareholder_profits_ratio = fnGuideData[key_ymd]['지배주주순이익률']
|
||||
ROA = fnGuideData[key_ymd]['ROA']
|
||||
ROE = fnGuideData[key_ymd]['ROE']
|
||||
if 'EPS(원)' in fnGuideData[key_ymd]:
|
||||
EPS = fnGuideData[key_ymd]['EPS(원)']
|
||||
else:
|
||||
EPS = fnGuideData[key_ymd]['EPS']
|
||||
if 'BPS(원)' in fnGuideData[key_ymd]:
|
||||
BPS = fnGuideData[key_ymd]['BPS(원)']
|
||||
else:
|
||||
BPS = fnGuideData[key_ymd]['BPS']
|
||||
if 'DPS(원)' in fnGuideData[key_ymd]:
|
||||
DPS = fnGuideData[key_ymd]['DPS(원)']
|
||||
else:
|
||||
DPS = fnGuideData[key_ymd]['DPS']
|
||||
PER = fnGuideData[key_ymd]['PER']
|
||||
PBR = fnGuideData[key_ymd]['PBR']
|
||||
share_outstanding = fnGuideData[key_ymd]['발행주식수']
|
||||
dividend_rate = fnGuideData[key_ymd]['배당수익률']
|
||||
|
||||
cursor.execute('SELECT * FROM '+tableName+' WHERE CODE=? and ymd=?', (item_code, ymd))
|
||||
result = cursor.fetchone()
|
||||
if result == None:
|
||||
cursor.execute("INSERT INTO "+tableName+"(CODE, NAME, ymd, type, sales, net_business_profits, business_profits, business_profits_release, net_profit, significant_shareholder_net_profit, none_significant_shareholder_net_profit, total_assets, total_debt, total_ownership_interest, equity_holdings, none_equity_holdings, capital, debt_ratio, reserve_ratio, business_profits_ratio, significant_shareholder_profits_ratio, ROA, ROE, EPS, BPS, DPS, PER, PBR, share_outstanding, dividend_rate) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (item_code, item_name, ymd, type, sales, net_business_profits, business_profits, business_profits_release, net_profit, significant_shareholder_net_profit, none_significant_shareholder_net_profit, total_assets, total_debt, total_ownership_interest, equity_holdings, none_equity_holdings, capital, debt_ratio, reserve_ratio, business_profits_ratio, significant_shareholder_profits_ratio, ROA, ROE, EPS, BPS, DPS, PER, PBR, share_outstanding, dividend_rate))
|
||||
else:
|
||||
# cursor.execute("UPDATE "+tableName+" SET ymd=?, type=?, sales=?, net_business_profits=?, business_profits=?, business_profits_release=?, net_profit=?, significant_shareholder_net_profit=?, none_significant_shareholder_net_profit=?, total_assets=?, total_debt=?, total_ownership_interest=?, equity_holdings=?, none_equity_holdings=?, capital=?, debt_ratio=?, reserve_ratio=?, business_profits_ratio=?, significant_shareholder_profits_ratio=?, ROA=?, ROE=?, EPS=?, BPS=?, DPS=?, PER=?, PBR=?, share_outstanding=?, dividend_rate=? WHERE CODE=?", (ymd, type, sales, net_business_profits, business_profits, business_profits_release, net_profit, significant_shareholder_net_profit, none_significant_shareholder_net_profit, total_assets, total_debt, total_ownership_interest, equity_holdings, none_equity_holdings, capital, debt_ratio, reserve_ratio, business_profits_ratio, significant_shareholder_profits_ratio, ROA, ROE, EPS, BPS, DPS, PER, PBR, share_outstanding, dividend_rate, item_code))
|
||||
break
|
||||
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return
|
||||
|
||||
if __name__ == "__main__":
|
||||
PROJECT_HOME = os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(__file__))))))))
|
||||
inFnguideFileName = PROJECT_HOME + '/resources/stock.db'
|
||||
crawler = FnGuideCrawler()
|
||||
crawler.crawl_fnguide(inFnguideFileName)
|
||||
Reference in New Issue
Block a user