from bs4 import BeautifulSoup from pandas import DataFrame import requests as re import pandas as pd import os import sqlite3 class FnGuideCrawler: header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'} START_DATE = None def __init__(self, START_DATE): self.START_DATE = START_DATE return def getStockInfo(self): code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0] #code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header).text) # 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌 #code_df.종목코드 = code_df.종목코드.map('{:06d}'.format) code_df.종목코드 = code_df.종목코드.map('{:06.0f}'.format) # 우리가 필요한 것은 회사명과 종목코드이기 때문에 필요없는 column들은 제외해준다. code_df = code_df[['회사명', '종목코드']] # 한글로된 컬럼명을 영어로 바꿔준다. code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'}) ###print (code_df.head()) return code_df # FnGuide에서 크롤링한 KOSPI 상장기업의 재무제표 # http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221294884955&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView def get_fnguide_table(self, code): url = re.get('http://comp.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A%s'%(code.strip())) url = url.content html = BeautifulSoup(url,'html.parser') body = html.find('body') try: fn_body = body.find('div', {'class': 'fng_body asp_body'}) ur_table = fn_body.find('div', {'id': 'div15'}) table = ur_table.find('div', {'id': 'highlight_D_Y'}) tbody = table.find('tbody') tr = tbody.find_all('tr') Table = DataFrame() except: return {} for i in tr: ''' 자료 항목 가져오기''' category = i.find('span', {'class': 'txt_acd'}) if category == None: category = i.find('th') category = category.text.strip() '''값 가져오기''' value_list = [] j = i.find_all('td', {'class': 'r'}) for value in j: temp = value.text.replace(',', '').strip() try: temp = float(temp) value_list.append(temp) except: value_list.append(0) Table['%s' % (category)] = value_list ''' 기간 가져오기 ''' thead = table.find('thead') tr_2 = thead.find('tr', {'class': 'td_gapcolor2'}).find_all('th') year_list = [] for i in tr_2: try: temp_year = i.find('span', {'class': 'txt_acd'}).text except: temp_year = i.text temp_year = temp_year.replace("/",".")+".01" year_list.append(temp_year) Table.index = year_list return Table.T.to_dict() def crawl_fnguide(self, inFileName): tableName = 'fnguide' conn = sqlite3.connect(inFileName, isolation_level=None) cursor = conn.cursor() # 테이블 생성 create_sql = "CREATE TABLE IF NOT EXISTS "+tableName+" (CODE text, NAME text, ymd text, " create_sql += " type text, sales REAL, net_business_profits REAL, business_profits REAL, business_profits_release REAL, " create_sql += " net_profit REAL, significant_shareholder_net_profit REAL, " create_sql += " none_significant_shareholder_net_profit REAL, total_assets REAL, total_debt REAL, " create_sql += " total_ownership_interest REAL, equity_holdings REAL, none_equity_holdings REAL, capital REAL, " create_sql += " debt_ratio REAL, reserve_ratio REAL, business_profits_ratio REAL, " create_sql += " significant_shareholder_profits_ratio REAL, ROA REAL, ROE REAL, EPS REAL, BPS REAL, DPS REAL, " create_sql += " PER REAL, PBR REAL, share_outstanding REAL, dividend_rate REAL)" cursor.execute(create_sql) # 키 생성 create_key = "CREATE INDEX IF NOT EXISTS "+tableName+"_idx on "+tableName+" (CODE, ymd) " cursor.execute(create_key) code_df = self.getStockInfo() idx = 0 for item in code_df.values: item_name = item[0] item_code = item[1] idx += 1 print(idx, item_code, item_name, 'http://comp.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A%s'%(item_code.strip())) fnGuideData = self.get_fnguide_table(item_code) """ 매출액: sales 순영업수익: net_business_profits REAL, 영업이익: business_profits 영업이익(발표기준): business_profits_release 당기순이익: net_profit 지배주주순이익: significant_shareholder_net_profit 비지배주주순이익: none_significant_shareholder_net_profit 자산총계: total_assets 부채총계: total_debt 자본총계: total_ownership_interest 지배주주지분: equity_holdings 비지배주주지분: none_equity_holdings 자본금: capital 부채비율: debt_ratio 유보율: reserve_ratio 영업이익률: business_profits_ratio 지배주주순이익률: significant_shareholder_profits_ratio ROA: ROA ROE: ROE EPS(원): EPS BPS(원): BPS DPS(원): DPS PER: PER PBR: PBR 발행주식수: share_outstanding 배당수익률: dividend_rate """ for key_ymd in fnGuideData: ymd = key_ymd.replace('(P)', '').replace('(E)', '') if key_ymd.find('P') > 0: type = 'P' elif key_ymd.find('E') > 0: type = 'E' else: type = '' if '매출액' in fnGuideData[key_ymd]: sales = fnGuideData[key_ymd]['매출액'] else: sales = 0 if '순영업수익' in fnGuideData[key_ymd]: net_business_profits = fnGuideData[key_ymd]['순영업수익'] else: net_business_profits = 0 business_profits = fnGuideData[key_ymd]['영업이익'] business_profits_release = fnGuideData[key_ymd]['영업이익(발표기준)'] net_profit = fnGuideData[key_ymd]['당기순이익'] significant_shareholder_net_profit = fnGuideData[key_ymd]['지배주주순이익'] none_significant_shareholder_net_profit = fnGuideData[key_ymd]['비지배주주순이익'] total_assets = fnGuideData[key_ymd]['자산총계'] total_debt = fnGuideData[key_ymd]['부채총계'] total_ownership_interest = fnGuideData[key_ymd]['자본총계'] equity_holdings = fnGuideData[key_ymd]['지배주주지분'] none_equity_holdings = fnGuideData[key_ymd]['비지배주주지분'] capital = fnGuideData[key_ymd]['자본금'] debt_ratio = fnGuideData[key_ymd]['부채비율'] reserve_ratio = fnGuideData[key_ymd]['유보율'] business_profits_ratio = fnGuideData[key_ymd]['영업이익률'] significant_shareholder_profits_ratio = fnGuideData[key_ymd]['지배주주순이익률'] ROA = fnGuideData[key_ymd]['ROA'] ROE = fnGuideData[key_ymd]['ROE'] if 'EPS(원)' in fnGuideData[key_ymd]: EPS = fnGuideData[key_ymd]['EPS(원)'] else: EPS = fnGuideData[key_ymd]['EPS'] if 'BPS(원)' in fnGuideData[key_ymd]: BPS = fnGuideData[key_ymd]['BPS(원)'] else: BPS = fnGuideData[key_ymd]['BPS'] if 'DPS(원)' in fnGuideData[key_ymd]: DPS = fnGuideData[key_ymd]['DPS(원)'] else: DPS = fnGuideData[key_ymd]['DPS'] PER = fnGuideData[key_ymd]['PER'] PBR = fnGuideData[key_ymd]['PBR'] share_outstanding = fnGuideData[key_ymd]['발행주식수'] dividend_rate = fnGuideData[key_ymd]['배당수익률'] cursor.execute('SELECT * FROM '+tableName+' WHERE CODE=? and ymd=?', (item_code, ymd)) result = cursor.fetchone() if result == None: cursor.execute("INSERT INTO "+tableName+"(CODE, NAME, ymd, type, sales, net_business_profits, business_profits, business_profits_release, net_profit, significant_shareholder_net_profit, none_significant_shareholder_net_profit, total_assets, total_debt, total_ownership_interest, equity_holdings, none_equity_holdings, capital, debt_ratio, reserve_ratio, business_profits_ratio, significant_shareholder_profits_ratio, ROA, ROE, EPS, BPS, DPS, PER, PBR, share_outstanding, dividend_rate) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (item_code, item_name, ymd, type, sales, net_business_profits, business_profits, business_profits_release, net_profit, significant_shareholder_net_profit, none_significant_shareholder_net_profit, total_assets, total_debt, total_ownership_interest, equity_holdings, none_equity_holdings, capital, debt_ratio, reserve_ratio, business_profits_ratio, significant_shareholder_profits_ratio, ROA, ROE, EPS, BPS, DPS, PER, PBR, share_outstanding, dividend_rate)) else: # cursor.execute("UPDATE "+tableName+" SET ymd=?, type=?, sales=?, net_business_profits=?, business_profits=?, business_profits_release=?, net_profit=?, significant_shareholder_net_profit=?, none_significant_shareholder_net_profit=?, total_assets=?, total_debt=?, total_ownership_interest=?, equity_holdings=?, none_equity_holdings=?, capital=?, debt_ratio=?, reserve_ratio=?, business_profits_ratio=?, significant_shareholder_profits_ratio=?, ROA=?, ROE=?, EPS=?, BPS=?, DPS=?, PER=?, PBR=?, share_outstanding=?, dividend_rate=? WHERE CODE=?", (ymd, type, sales, net_business_profits, business_profits, business_profits_release, net_profit, significant_shareholder_net_profit, none_significant_shareholder_net_profit, total_assets, total_debt, total_ownership_interest, equity_holdings, none_equity_holdings, capital, debt_ratio, reserve_ratio, business_profits_ratio, significant_shareholder_profits_ratio, ROA, ROE, EPS, BPS, DPS, PER, PBR, share_outstanding, dividend_rate, item_code)) break cursor.close() conn.close() return if __name__ == "__main__": PROJECT_HOME = os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(__file__)))))))) inFnguideFileName = PROJECT_HOME + '/resources/stock.db' crawler = FnGuideCrawler() crawler.crawl_fnguide(inFnguideFileName)