from bs4 import BeautifulSoup
from pandas import DataFrame, Series
import requests as re
import pandas as pd
import json
import sqlite3
import requests

class FnGuideCrawler:
    header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}

    def getStockInfo(self):
        #code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
        code_df = pd.read_html(requests.get('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', headers=self.header).text)

        # 종목코드가 6자리이기 때문에 6자리를 맞춰주기 위해 설정해줌
        code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)

        # 우리가 필요한 것은 회사명과 종목코드이기 때문에 필요없는 column들은 제외해준다.
        code_df = code_df[['회사명', '종목코드']]

        # 한글로된 컬럼명을 영어로 바꿔준다.
        code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'})
        ###print (code_df.head())

        return code_df

    # FnGuide에서 크롤링한 KOSPI 상장기업의 재무제표
    # http://blog.naver.com/PostView.nhn?blogId=koko8624&logNo=221294884955&parentCategoryNo=&categoryNo=&viewDate=&isShowPopularPosts=false&from=postView
    def get_fnguide_table(self, code):
        url = re.get('http://comp.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A%s'%(code.strip()))
        url = url.content

        html = BeautifulSoup(url,'html.parser')
        body = html.find('body')

        try:
            fn_body = body.find('div', {'class': 'fng_body asp_body'})
            ur_table = fn_body.find('div', {'id': 'div15'})
            table = ur_table.find('div', {'id': 'highlight_D_Y'})

            tbody = table.find('tbody')
            tr = tbody.find_all('tr')
            Table = DataFrame()
        except:
            return {}

        for i in tr:
            ''' 자료 항목 가져오기'''
            category = i.find('span', {'class': 'txt_acd'})

            if category == None:
                category = i.find('th')

            category = category.text.strip()

            '''값 가져오기'''
            value_list = []

            j = i.find_all('td', {'class': 'r'})

            for value in j:
                temp = value.text.replace(',', '').strip()

                try:
                    temp = float(temp)
                    value_list.append(temp)
                except:
                    value_list.append(0)

            Table['%s' % (category)] = value_list

            ''' 기간 가져오기 '''
            thead = table.find('thead')
            tr_2 = thead.find('tr', {'class': 'td_gapcolor2'}).find_all('th')

            year_list = []

            for i in tr_2:
                try:
                    temp_year = i.find('span', {'class': 'txt_acd'}).text
                except:
                    temp_year = i.text

                temp_year = temp_year.replace("/",".")+".01"
                year_list.append(temp_year)

            Table.index = year_list

        return Table.T.to_dict()

    def crawl_fnguide(self, inFileName):
        tableName = 'fnguide'
        conn = sqlite3.connect(inFileName, isolation_level=None)
        cursor = conn.cursor()
        cursor.execute("CREATE TABLE IF NOT EXISTS "+tableName+" (CODE text PRIMARY KEY, NAME text, PRICE text)")

        code_df = self.getStockInfo()
        idx = 0
        for item in code_df.values:
            item_name = item[0]
            item_code = item[1]

            idx += 1
            print(idx, item_name)

            fnGuideData = self.get_fnguide_table(item_code)
            text = json.dumps(fnGuideData, ensure_ascii=False)

            cursor.execute('SELECT * FROM '+tableName+' WHERE CODE=?', (item_code, ))
            result = cursor.fetchone()
            if result == None:
                cursor.execute("INSERT INTO "+tableName+"(CODE, NAME, PRICE) VALUES(?, ?, ?)", (item_code, item_name, text))
            else:
                cursor.execute("UPDATE "+tableName+" SET PRICE=? WHERE CODE=?", (text, item_code))

        cursor.close()
        conn.close()
        return

if __name__ == "__main__":
    crawler = FnGuideCrawler()
    crawler.get_fnguide_table('155660')