From b24c8d251d73686022238af59825de842bb388bf Mon Sep 17 00:00:00 2001 From: dsyoon Date: Sun, 16 Jul 2023 22:45:35 +0900 Subject: [PATCH] init --- analyzer/StockPriceDirectionAnalyzer.py | 236 +++++++++++++++++++++++- 1 file changed, 234 insertions(+), 2 deletions(-) diff --git a/analyzer/StockPriceDirectionAnalyzer.py b/analyzer/StockPriceDirectionAnalyzer.py index d441b0d..6e1c233 100644 --- a/analyzer/StockPriceDirectionAnalyzer.py +++ b/analyzer/StockPriceDirectionAnalyzer.py @@ -1,7 +1,239 @@ +import os +import sqlite3 +import pandas as pd + class StockPriceDirectionAnalyzer: - def __init__(self): + stock_info = None + + def __init__(self, stockFileName): + self.stock_info = {} + self.stockFileName = stockFileName return + def open(self): + self.conn = sqlite3.connect(self.stockFileName) + self.cursor = self.conn.cursor() + + def close(self): + self.cursor.close() + self.conn.close() + return + + def get_all_stock_code(self): + self.open() + + + sql = "SELECT distinct code FROM stock" + self.cursor.execute(sql) + rows = self.cursor.fetchall() + + sql = "SELECT distinct code, name FROM stock" + self.cursor.execute(sql) + rows = self.cursor.fetchall() + for row in rows: + self.stock_info[row[0]] = row[1] + + self.close() + return [row[0] for row in rows] + + def load(self, master_code, limit_count=2000): + self.open() + sql = "SELECT ymd, close FROM stock where CODE=? order by ymd" + self.cursor.execute(sql, (master_code, )) + master_data = self.cursor.fetchall() + self.close() + master_data = master_data[-limit_count:] + + return master_data + + def getClosePrice(self, stock_codes, first_day, limit_count=2000): + data = {} + self.open() + + + # 신용잔고 누적 + sql = "SELECT ymd, dep2_1 FROM meta_3 where ymd>=? order by ymd" + self.cursor.execute(sql, (first_day, )) + rows = self.cursor.fetchall() + if len(rows) >=limit_count: + #data['dep2_1'] = [(rows[i][0], rows[i][1]-rows[i-1][1]) for i in range(1, len(rows))] + data['dep2_1'] = [(rows[i][0], rows[i][1]) for i in range(len(rows))] + data['dep2_1_diff'] = [(rows[i][0], rows[i][1] - rows[i - 1][1]) for i in range(1, len(rows))] + self.stock_info['dep2_1'] = '신용잔고 누적' + self.stock_info['dep2_1_diff'] = '신용잔고 누적 차이' + + + # 투자자별 매매 동향 + sql = "SELECT ymd, pri, fori, ins, ins0, ins1, ins2, ins3, ins4, ins5, cor FROM meta_2 where ymd>=? order by ymd" + self.cursor.execute(sql, (first_day,)) + rows = self.cursor.fetchall() + if len(rows) >= limit_count: + data['pri'] = [(rows[i][0], rows[i][1]) for i in range(len(rows))] + data['fori'] = [(rows[i][0], rows[i][2]) for i in range(len(rows))] + data['ins'] = [(rows[i][0], rows[i][3]) for i in range(len(rows))] + data['ins0'] = [(rows[i][0], rows[i][4]) for i in range(len(rows))] + data['ins1'] = [(rows[i][0], rows[i][5]) for i in range(len(rows))] + data['ins2'] = [(rows[i][0], rows[i][6]) for i in range(len(rows))] + data['ins3'] = [(rows[i][0], rows[i][7]) for i in range(len(rows))] + data['ins4'] = [(rows[i][0], rows[i][8]) for i in range(len(rows))] + data['ins5'] = [(rows[i][0], rows[i][9]) for i in range(len(rows))] + data['cor'] = [(rows[i][0], rows[i][10]) for i in range(len(rows))] + self.stock_info['pri'] = '개인' + self.stock_info['fori'] = '외국인' + self.stock_info['ins'] = '기관합' + self.stock_info['ins0'] = '금융투자' + self.stock_info['ins1'] = '보험' + self.stock_info['ins2'] = '투신 (사모)' + self.stock_info['ins3'] = '은행' + self.stock_info['ins4'] = '기타금융기관' + self.stock_info['ins5'] = '연기금 등' + self.stock_info['cor'] = '기타법인' + + + # 환율 + sql = "SELECT distinct code FROM meta_1" + self.cursor.execute(sql) + rows = self.cursor.fetchall() + exchange_codes = [row[0] for row in rows] + + for exchange_code in exchange_codes: + sql = "SELECT ymd, price FROM meta_1 where code=? and ymd>=? order by ymd" + self.cursor.execute(sql, (exchange_code, first_day, )) + rows = self.cursor.fetchall() + if len(rows) >= limit_count: + data[exchange_code] = rows + self.stock_info[exchange_code] = exchange_code + + + # 원자재 + sql = "SELECT distinct code FROM meta_5" + self.cursor.execute(sql) + rows = self.cursor.fetchall() + meterial_codes = [row[0] for row in rows] + + for meterial_code in meterial_codes: + sql = "SELECT ymd, close FROM meta_5 where code=? and ymd>=? order by ymd" + self.cursor.execute(sql, (meterial_code, first_day, )) + rows = self.cursor.fetchall() + if len(rows) >= limit_count: + data[meterial_code] = rows + self.stock_info[meterial_code] = meterial_code + + # 종목 종가 + for stock_code in stock_codes: + sql = "SELECT ymd, close FROM stock where CODE=? and ymd>=? order by ymd" + self.cursor.execute(sql, (stock_code, first_day, )) + rows = self.cursor.fetchall() + if len(rows) >=limit_count: + data[stock_code] = rows + + + + + self.close() + return data + + def debug(self, master_data, data): + master_days = [item[0] for item in master_data] + + trimedData = {} + for i, stock_code in enumerate(data): + stock_data = data[stock_code] + stock_days = [item[0] for item in stock_data] + + if len(master_days) < len(stock_days): + diff = set(stock_days) - set(master_days) + else: + diff = set(master_days) - set(stock_days) + + tmp = [] + for item in stock_data: + if item[0] not in diff: + tmp.append((item[0], item[1])) + if len(tmp) == len(master_data): + trimedData[stock_code] = tmp + + return trimedData + + def trim(self, master_data, data): + master_days = [item[0] for item in master_data] + + trimedData = {} + for i, stock_code in enumerate(data): + stock_data = data[stock_code] + stock_days = [item[0] for item in stock_data] + + intersection = set(stock_days) & set(master_days) + + tmp = [] + for item in stock_data: + if item[0] in intersection: + tmp.append((item[0], item[1])) + trimedData[stock_code] = tmp + + return trimedData + + def analyzeCorRelation(self, master_data, trimedData): + corr_scores = {} + + master_days = [item[0] for item in master_data] + master_set = {} + for item in master_data: + master_set[item[0]] = item[1] + + + for stock_code in trimedData: + stock_data = trimedData[stock_code] + + stock_days = [item[0] for item in stock_data] + stock_set = {} + for item in stock_data: + stock_set[item[0]] = item[1] + + intersection = sorted(list(set(stock_days) & set(master_days))) + master_list = [] + stock_list = [] + for day in intersection: + master_list.append(master_set[day]) + stock_list.append(stock_set[day]) + + lst = [master_list[3:], stock_list[:-3]] + df = pd.DataFrame(lst).T + corr = df.corr(method='pearson') + + corr_scores[stock_code + "_" + self.stock_info[stock_code]] = corr.at[0, 1] + + return corr_scores + + def analyze(self, master_code, limit_count=1501): + stock_codes = self.get_all_stock_code() + + master_data = self.load(master_code, limit_count) + first_day = master_data[0][0] + + data = self.getClosePrice(stock_codes, first_day, limit_count) + + #debugData = self.debug(master_data, data) + trimedData = self.trim(master_data, data) + + corr_scores = self.analyzeCorRelation(master_data, trimedData) + return corr_scores + + if __name__ == "__main__": - stockPriceDirectionAnalyzer = StockPriceDirectionAnalyzer() \ No newline at end of file + PROJECT_HOME = "." + + RESOURCE_PATH = os.path.join(PROJECT_HOME, 'resources') + stockFileName = os.path.join(RESOURCE_PATH, 'stock.db') + + stockPriceDirectionAnalyzer = StockPriceDirectionAnalyzer(stockFileName) + corr_scores = stockPriceDirectionAnalyzer.analyze(master_code='252670') + corr_scores_list = sorted(corr_scores.items(), key=lambda item: item[1], reverse=True) + for item in corr_scores_list: + print("%s %4.3f" % (item[0], item[1])) + + + print('done...') + +