import os import json import sqlite3 import pandas as pd class SPCorrelationAnalyzer: stock_info = None def __init__(self, stockFileName): self.stock_info = {} self.stockFileName = stockFileName return def open(self): self.conn = sqlite3.connect(self.stockFileName) self.cursor = self.conn.cursor() def close(self): self.cursor.close() self.conn.close() return def get_all_stock_code(self): self.open() sql = "SELECT distinct code, name FROM stock" self.cursor.execute(sql) rows = self.cursor.fetchall() for row in rows: self.stock_info[row[0]] = row[1] self.close() return [row[0] for row in rows] def load(self, master_code, limit_count=2000): self.open() sql = "SELECT ymd, close FROM stock where CODE=? order by ymd" self.cursor.execute(sql, (master_code, )) master_data = self.cursor.fetchall() self.close() master_data = master_data[-limit_count:] return master_data def getClosePrice(self, stock_codes, first_day, limit_count=2000): data = {} self.open() # 신용잔고 누적 sql = "SELECT ymd, dep2_1 FROM meta_3 where ymd>=? order by ymd" self.cursor.execute(sql, (first_day, )) rows = self.cursor.fetchall() if len(rows) >=limit_count: #data['dep2_1'] = [(rows[i][0], rows[i][1]-rows[i-1][1]) for i in range(1, len(rows))] data['dep2_1'] = [(rows[i][0], rows[i][1]) for i in range(len(rows))] data['dep2_1_diff'] = [(rows[i][0], rows[i][1] - rows[i - 1][1]) for i in range(1, len(rows))] self.stock_info['dep2_1'] = '신용잔고 누적' self.stock_info['dep2_1_diff'] = '신용잔고 누적 차이' # 투자자별 매매 동향 sql = "SELECT ymd, pri, fori, ins, ins0, ins1, ins2, ins3, ins4, ins5, cor FROM meta_2 where ymd>=? order by ymd" self.cursor.execute(sql, (first_day,)) rows = self.cursor.fetchall() if len(rows) >= limit_count: data['pri'] = [(rows[i][0], rows[i][1]) for i in range(len(rows))] data['fori'] = [(rows[i][0], rows[i][2]) for i in range(len(rows))] data['ins'] = [(rows[i][0], rows[i][3]) for i in range(len(rows))] data['ins0'] = [(rows[i][0], rows[i][4]) for i in range(len(rows))] data['ins1'] = [(rows[i][0], rows[i][5]) for i in range(len(rows))] data['ins2'] = [(rows[i][0], rows[i][6]) for i in range(len(rows))] data['ins3'] = [(rows[i][0], rows[i][7]) for i in range(len(rows))] data['ins4'] = [(rows[i][0], rows[i][8]) for i in range(len(rows))] data['ins5'] = [(rows[i][0], rows[i][9]) for i in range(len(rows))] data['cor'] = [(rows[i][0], rows[i][10]) for i in range(len(rows))] self.stock_info['pri'] = '개인' self.stock_info['fori'] = '외국인' self.stock_info['ins'] = '기관합' self.stock_info['ins0'] = '금융투자' self.stock_info['ins1'] = '보험' self.stock_info['ins2'] = '투신 (사모)' self.stock_info['ins3'] = '은행' self.stock_info['ins4'] = '기타금융기관' self.stock_info['ins5'] = '연기금 등' self.stock_info['cor'] = '기타법인' # 환율 sql = "SELECT distinct code FROM meta_1" self.cursor.execute(sql) rows = self.cursor.fetchall() exchange_codes = [row[0] for row in rows] for exchange_code in exchange_codes: sql = "SELECT ymd, price FROM meta_1 where code=? and ymd>=? order by ymd" self.cursor.execute(sql, (exchange_code, first_day, )) rows = self.cursor.fetchall() if len(rows) >= limit_count: data[exchange_code] = rows self.stock_info[exchange_code] = exchange_code # 원자재 sql = "SELECT distinct code FROM meta_5" self.cursor.execute(sql) rows = self.cursor.fetchall() meterial_codes = [row[0] for row in rows] for meterial_code in meterial_codes: sql = "SELECT ymd, close FROM meta_5 where code=? and ymd>=? order by ymd" self.cursor.execute(sql, (meterial_code, first_day, )) rows = self.cursor.fetchall() if len(rows) >= limit_count: data[meterial_code] = rows self.stock_info[meterial_code] = meterial_code # 종목 종가 for stock_code in stock_codes: sql = "SELECT ymd, close FROM stock where CODE=? and ymd>=? order by ymd" self.cursor.execute(sql, (stock_code, first_day, )) rows = self.cursor.fetchall() if len(rows) >=limit_count: data[stock_code] = rows self.close() return data def debug(self, master_data, data): master_days = [item[0] for item in master_data] trimedData = {} for i, stock_code in enumerate(data): stock_data = data[stock_code] stock_days = [item[0] for item in stock_data] if len(master_days) < len(stock_days): diff = set(stock_days) - set(master_days) else: diff = set(master_days) - set(stock_days) tmp = [] for item in stock_data: if item[0] not in diff: tmp.append((item[0], item[1])) if len(tmp) == len(master_data): trimedData[stock_code] = tmp return trimedData def trim(self, master_data, data): master_days = [item[0] for item in master_data] trimedData = {} for i, stock_code in enumerate(data): stock_data = data[stock_code] stock_days = [item[0] for item in stock_data] intersection = set(stock_days) & set(master_days) tmp = [] for item in stock_data: if item[0] in intersection: tmp.append((item[0], item[1])) trimedData[stock_code] = tmp return trimedData def analyzeCorRelation(self, master_data, trimedData): corr_scores = {} master_days = [item[0] for item in master_data] master_set = {} for item in master_data: master_set[item[0]] = item[1] for stock_code in trimedData: stock_data = trimedData[stock_code] stock_days = [item[0] for item in stock_data] stock_set = {} for item in stock_data: stock_set[item[0]] = item[1] intersection = sorted(list(set(stock_days) & set(master_days))) master_list = [] stock_list = [] for day in intersection: master_list.append(master_set[day]) stock_list.append(stock_set[day]) lst = [master_list[3:], stock_list[:-3]] df = pd.DataFrame(lst).T corr = df.corr(method='pearson') corr_scores[stock_code + "_" + self.stock_info[stock_code]] = corr.at[0, 1] return corr_scores def analyze(self, master_code, limit_count=1501): stock_codes = self.get_all_stock_code() master_data = self.load(master_code, limit_count) first_day = master_data[0][0] data = self.getClosePrice(stock_codes, first_day, limit_count) #debugData = self.debug(master_data, data) trimedData = self.trim(master_data, data) corr_scores = self.analyzeCorRelation(master_data, trimedData) return corr_scores if __name__ == "__main__": PROJECT_HOME = "." RESOURCE_PATH = os.path.join(PROJECT_HOME, 'resources') stockFileName = os.path.join(RESOURCE_PATH, 'stock.db') spCorrelationAnalyzer = SPCorrelationAnalyzer(stockFileName) inputs = [ {"stock_code": "122630", "stock_name": "KODEX 레버리지", "corr": []}, {"stock_code": "252670", "stock_name": "KODEX 200선물인버스2X", "corr": []}, {"stock_code": "^NDX", "stock_name": "NASDAQ 100", "corr": []}, {"stock_code": "TQQQ", "stock_name": "ProShares UltraPro QQQ", "corr": []}, {"stock_code": "SQQQ", "stock_name": "ProShares UltraPro Short QQQ", "corr": []}, {"stock_code": "SOXL", "stock_name": "Direxion Daily Semiconductor Bull 3X Shares", "corr": []}, {"stock_code": "SOXS", "stock_name": "Direxion Daily Semiconductor Bear -3X Shares", "corr": []} ] for input in inputs: corr_scores = spCorrelationAnalyzer.analyze(master_code=input["stock_code"]) corr_scores_list = sorted(corr_scores.items(), key=lambda item: item[1], reverse=True) for item in corr_scores_list: input["corr"].append({item[0]: item[1]}) print("%s,%s,%4.3f" % (input["stock_code"], item[0], item[1])) outFileName = os.path.join(PROJECT_HOME, 'analyzer/corr.json') with open(outFileName, 'w', encoding='utf-8') as file: json.dump(inputs, file, indent="\t", ensure_ascii=False) print('done...')