diff --git a/Simulation.py b/Simulation.py index d5384b0..bba5fb4 100644 --- a/Simulation.py +++ b/Simulation.py @@ -1,4 +1,4 @@ -import time +import numpy as np from math import nan import pandas as pd import plotly.graph_objects as go @@ -152,9 +152,9 @@ class Simulation (HTS): self.labelMaker.showLabels(stock_code, today) else: if method == "ml": - LAST_DATA = self.stock2Vector.getLastData(stock_code, today, n=1) + LAST_DATA = self.stock2Vector.getLastData(stock_code, today, n=3) data = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA) - X, Y = self.stock2Vector.getDataset2D(data) + X, Y = self.stock2Vector.getVectorData(data) predY = self.stockPredictor.predict(X, Y) bsLine = None @@ -180,11 +180,11 @@ if __name__ == "__main__": # to check bying stock_codes = { - "252670": ['20220801', '20220802', '20220803', '20220804', '20220805', '20220808', '20220809', '20220810', '20220811'], - "122630": ['20220801', '20220802', '20220803', '20220804', '20220805', '20220808', '20220809', '20220810', '20220811'], + "252670": ['20220805', '20220808', '20220809', '20220810', '20220811'], + "122630": ['20220805', '20220808', '20220809', '20220810', '20220811'], } - method = "rule" # "rule", "ml", "answer" + method = "ml" # "rule", "ml", "answer" for stock_code in stock_codes: simulation = Simulation(RESOURCE_PATH) diff --git a/VitTrainer.py b/VitTrainer.py index a9809c5..57ca910 100755 --- a/VitTrainer.py +++ b/VitTrainer.py @@ -1,11 +1,13 @@ # tensor - numpy - PILImage 변환 (https://qlsenddl-lab.tistory.com/37) + import os os.environ['KMP_DUPLICATE_LIB_OK']='True' import random import numpy as np import torch -from datasets import Dataset, load_dataset +from datasets import Dataset, load_metric, ClassLabel from datasets import load_metric +from transformers import AutoConfig from transformers import TrainingArguments, Trainer from transformers import ViTForImageClassification from torch.utils.data import DataLoader @@ -44,14 +46,14 @@ class VitTrainer: save_strategy="epoch", evaluation_strategy="epoch", learning_rate=2e-5, - per_device_train_batch_size=16, - per_device_eval_batch_size=16, + per_device_train_batch_size=32, + per_device_eval_batch_size=32, weight_decay=0.01, load_best_model_at_end=True, metric_for_best_model="accuracy", logging_dir=os.path.join(self.RESOURCE_PATH, 'model', 'logs'), remove_unused_columns=False, - num_train_epochs=14, + num_train_epochs=4, ) return @@ -117,7 +119,7 @@ class VitTrainer: train_ds.set_transform(self.train_transforms) val_ds.set_transform(self.val_transforms) - train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=4) + train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=32) batch = next(iter(train_dataloader)) for k,v in batch.items(): @@ -157,7 +159,7 @@ class VitTrainer: train_ds.set_transform(self.train_transforms) val_ds.set_transform(self.val_transforms) - train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=4) + train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=32) batch = next(iter(train_dataloader)) for k,v in batch.items(): @@ -211,6 +213,14 @@ class VitTrainer: train_ds = Dataset.from_dict(train_data) val_ds = Dataset.from_dict(val_dsta) + features = train_ds.features.copy() + features["label"] = ClassLabel(num_classes=self.num_labels, names=["none", "sell", "buy"]) + def adjust_labels(batch): + batch["label"] = [lbl for lbl in batch["label"]] + return batch + train_ds = train_ds.map(adjust_labels, batched=True, features=features) + val_ds = train_ds.map(adjust_labels, batched=True, features=features) + return train_ds, val_ds if __name__ == "__main__": @@ -222,107 +232,5 @@ if __name__ == "__main__": stock_code = "252670" vitTrainer = VitTrainer(RESOURCE_PATH) - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220701", eDate="20220731") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220809", eDate="20220812") vitTrainer.train(train_ds, val_ds, model_path) - - """ - print("ym: 2020-07") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200701", eDate="20200731") - vitTrainer.train(train_ds, val_ds, model_path) - - print ("ym: 2020-08") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200725", eDate="20200831") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2020-09") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200825", eDate="20200931") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2020-10") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200925", eDate="20201031") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2020-11") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201025", eDate="20201131") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2020-12") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201125", eDate="20201231") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2021-01") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201225", eDate="20210131") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2021-02") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210125", eDate="20210231") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2021-03") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210225", eDate="20210331") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2021-04") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210325", eDate="20210431") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2021-05") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210425", eDate="20210531") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2021-06") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210525", eDate="20210631") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2021-07") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210625", eDate="20210731") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2021-08") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210725", eDate="20210831") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2021-09") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210825", eDate="20210931") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2021-10") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210925", eDate="20212031") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2021-11") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211025", eDate="20211131") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2021-12") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211125", eDate="20211231") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2022-01") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211225", eDate="20220131") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2022-02") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220125", eDate="20220231") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2022-03") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220225", eDate="20220331") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2022-04") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220325", eDate="20220431") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2022-05") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220425", eDate="20220531") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2022-06") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220525", eDate="20220631") - vitTrainer.finetunning(train_ds, val_ds, model_path) - - print("ym: 2022-07") - train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220625", eDate="20220731") - vitTrainer.finetunning(train_ds, val_ds, model_path) - """ \ No newline at end of file diff --git a/hts/BuySellChecker.py b/hts/BuySellChecker.py index f4c256f..775ef4b 100644 --- a/hts/BuySellChecker.py +++ b/hts/BuySellChecker.py @@ -1007,22 +1007,34 @@ class BuySellChecker: avg3 = [item[0] for item in avg3_list] avg5_list = close_df.rolling(window=5).mean().fillna(close[0]).values.tolist() avg5 = [item[0] for item in avg5_list] + avg6_list = close_df.rolling(window=6).mean().fillna(close[0]).values.tolist() + avg6 = [item[0] for item in avg6_list] + avg9_list = close_df.rolling(window=9).mean().fillna(close[0]).values.tolist() + avg9 = [item[0] for item in avg9_list] avg10_list = close_df.rolling(window=10).mean().fillna(close[0]).values.tolist() avg10 = [item[0] for item in avg10_list] + avg12_list = close_df.rolling(window=12).mean().fillna(close[0]).values.tolist() + avg12 = [item[0] for item in avg12_list] avg20_list = close_df.rolling(window=20).mean().fillna(close[0]).values.tolist() avg20 = [item[0] for item in avg20_list] + avg24_list = close_df.rolling(window=24).mean().fillna(close[0]).values.tolist() + avg24 = [item[0] for item in avg24_list] avg30_list = close_df.rolling(window=30).mean().fillna(close[0]).values.tolist() avg30 = [item[0] for item in avg30_list] avg60_list = close_df.rolling(window=60).mean().fillna(close[0]).values.tolist() avg60 = [item[0] for item in avg60_list] - abs_avg_1 = [max(avg3[i], avg5[i], avg10[i], avg20[i], avg30[i], avg60[i]) - min(avg3[i], avg5[i], avg10[i], avg20[i], avg30[i], avg60[i]) for i in range(0, len(close))] - abs_avg_2 = [max(avg3[i], avg5[i], avg10[i], avg20[i], avg30[i]) - min(avg3[i], avg5[i], avg10[i], avg20[i], avg30[i]) for i in range(0, len(close))] - abs_avg_3 = [max(avg3[i], avg5[i], avg10[i], avg20[i]) - min(avg3[i], avg5[i], avg10[i], avg20[i]) for i in range(0, len(close))] - abs_avg_4 = [max(avg3[i], avg5[i], avg10[i]) - min(avg3[i], avg5[i], avg10[i]) for i in range(0, len(close))] - abs_avg_5 = [max(avg3[i], avg5[i]) - min(avg3[i], avg5[i]) for i in range(0, len(close))] + abs_avg_1 = [max(avg3[i], avg5[i], avg6[i], avg9[i], avg10[i], avg12[i], avg20[i], avg24[i], avg30[i], avg60[i]) - min(avg3[i], avg5[i], avg6[i], avg9[i], avg10[i], avg12[i], avg20[i], avg30[i], avg60[i]) for i in range(0, len(close))] + abs_avg_2 = [max(avg3[i], avg5[i], avg6[i], avg9[i], avg10[i], avg12[i], avg20[i], avg24[i], avg30[i]) - min(avg3[i], avg5[i], avg6[i], avg9[i], avg10[i], avg12[i], avg20[i], avg24[i], avg30[i]) for i in range(0, len(close))] + abs_avg_3 = [max(avg3[i], avg5[i], avg6[i], avg9[i], avg10[i], avg12[i], avg20[i]) - min(avg3[i], avg5[i], avg6[i], avg9[i], avg10[i], avg12[i], avg20[i]) for i in range(0, len(close))] + abs_avg_4 = [max(avg3[i], avg5[i], avg6[i], avg9[i], avg12[i]) - min(avg3[i], avg5[i], avg6[i], avg9[i], avg12[i]) for i in range(0, len(close))] + abs_avg_5 = [max(avg3[i], avg5[i], avg6[i], avg9[i]) - min(avg3[i], avg5[i], avg6[i], avg9[i]) for i in range(0, len(close))] + abs_avg_6 = [max(avg3[i], avg5[i], avg6[i]) - min(avg3[i], avg5[i], avg6[i]) for i in range(0, len(close))] diff_avg3_avg5 = [avg3[i]-avg5[i] for i in range(0, len(close))] + diff_avg3_avg6 = [avg3[i] - avg6[i] for i in range(0, len(close))] + diff_avg3_avg9 = [avg3[i] - avg9[i] for i in range(0, len(close))] diff_avg3_avg10 = [avg3[i] - avg10[i] for i in range(0, len(close))] + diff_avg3_avg12 = [avg3[i] - avg12[i] for i in range(0, len(close))] diff_avg3_avg20 = [avg3[i] - avg20[i] for i in range(0, len(close))] diff_avg3_avg30 = [avg3[i] - avg30[i] for i in range(0, len(close))] diff_avg3_avg60 = [avg3[i] - avg60[i] for i in range(0, len(close))] @@ -1037,7 +1049,10 @@ class BuySellChecker: diff_avg20_avg60 = [avg20[i] - avg60[i] for i in range(0, len(close))] diff_avg30_avg60 = [avg30[i] - avg60[i] for i in range(0, len(close))] diff_avg3_avg5_sign = self.getSign(avg3, avg5) + diff_avg3_avg6_sign = self.getSign(avg3, avg6) + diff_avg3_avg9_sign = self.getSign(avg3, avg9) diff_avg3_avg10_sign = self.getSign(avg3, avg10) + diff_avg3_avg12_sign = self.getSign(avg3, avg12) diff_avg3_avg20_sign = self.getSign(avg3, avg20) diff_avg3_avg30_sign = self.getSign(avg3, avg30) diff_avg3_avg60_sign = self.getSign(avg3, avg60) @@ -1072,7 +1087,7 @@ class BuySellChecker: STOCK = [] for i in range(len(open)): STOCK.append({'volume': vol[i], 'close': close[i], 'open': open[i], 'high': high[i], 'low': low[i], - 'avg3': avg3[i], 'avg5': avg5[i],'avg10': avg10[i],'avg20': avg20[i],'avg30': avg30[i],'avg60': avg60[i]}) + 'avg3': avg3[i], 'avg5': avg5[i],'avg6': avg6[i],'avg9': avg9[i],'avg10': avg10[i],'avg12': avg12[i],'avg20': avg20[i],'avg30': avg30[i],'avg60': avg60[i]}) # stochastic stochastic_df = self.stochastic.apply(STOCK, n=30, m=5, t=5) @@ -1179,16 +1194,19 @@ class BuySellChecker: temp = { "date": point_temp, "open": open, "high": high, "low": low, "close": close, "volume": vol, - "avg3": avg3, "avg5": avg5, "avg10": avg10, "avg20": avg20, "avg30": avg30, "avg60": avg60, + "avg3": avg3, "avg5": avg5, "avg6": avg6, "avg9": avg9, "avg10": avg10, "avg12": avg12, "avg20": avg20, "avg30": avg30, "avg60": avg60, "upper": upper, "lower": lower, "macd": macd, "macds": macds, "macdo": macdo, "fast_k": fast_k, "slow_k": slow_k, "slow_d": slow_d, "rsi": rsi, "rsis": rsis, "changeLine": changeLine, "baseLine": baseLine, "leadingSpan1": leadingSpan1, "leadingSpan2": leadingSpan2, "diff_price": diff_price, "height": height, "top_tail_height": top_tail_height, "bottom_tail_height": bottom_tail_height, - "abs_avg_1": abs_avg_1, "abs_avg_2": abs_avg_2, "abs_avg_3": abs_avg_3, "abs_avg_4": abs_avg_4, "abs_avg_5": abs_avg_5, + "abs_avg_1": abs_avg_1, "abs_avg_2": abs_avg_2, "abs_avg_3": abs_avg_3, "abs_avg_4": abs_avg_4, "abs_avg_5": abs_avg_5, "abs_avg_6": abs_avg_6, "diff_avg3_avg5": diff_avg3_avg5, + "diff_avg3_avg6": diff_avg3_avg6, + "diff_avg3_avg9": diff_avg3_avg9, "diff_avg3_avg10": diff_avg3_avg10, + "diff_avg3_avg12": diff_avg3_avg12, "diff_avg3_avg20": diff_avg3_avg20, "diff_avg3_avg30": diff_avg3_avg30, "diff_avg3_avg60": diff_avg3_avg60, diff --git a/stock/util/Stock2Vector.py b/stock/util/Stock2Vector.py index 976d9a3..b01be56 100644 --- a/stock/util/Stock2Vector.py +++ b/stock/util/Stock2Vector.py @@ -285,9 +285,13 @@ class Stock2Vector(HTS): Y = np.asarray(Y, dtype='int64') return X, Y - def getVectorData(self, data, type="avg10", VECTOR_SIZE = 32): + def getVectorData(self, data, VECTOR_SIZE = 32): + return self.getVectorData_2(data, VECTOR_SIZE) + + def getVectorData_1(self, data, VECTOR_SIZE): df = self.buySellChecker.getVectorFeature(data) + # avg10, 볼린져밴드 상단과 하단의 차이, rsi, avg3만 이용한다. # channel1: avg10, channel2: diff_upper_lower, channel3: abs_avg_2, channel4: abs_avg_3 avg10 = df['avg10'].tolist() diff_upper_lower = df['diff_upper_lower'].tolist() @@ -313,11 +317,55 @@ class Stock2Vector(HTS): h = 0 batch_X.append(X) batch_Y.append(label[i]) - + """ + if label[i] == 2: + batch_Y.append([0, 0, 1]) + elif label[i] == 1: + batch_Y.append([0, 1, 0]) + else: + batch_Y.append([1, 0, 0]) + """ batch_X = np.asarray(batch_X) batch_Y = np.asarray(batch_Y, dtype='int64') return batch_X, batch_Y + def getVectorData_2(self, data, VECTOR_SIZE = 32): + df = self.buySellChecker.getVectorFeature(data) + + # avg10, 볼린져밴드 상단과 하단의 차이, rsi, avg3만 이용한다. + # channel1: avg10, channel2: diff_upper_lower, channel3: abs_avg_2, channel4: abs_avg_3 + avg3 = df['avg3'].tolist() + avg6 = df['avg6'].tolist() + avg9 = df['avg9'].tolist() + + + diff_upper_lower = df['diff_upper_lower'].tolist() + rsi = df['rsi'].tolist() + abs_avg_3 = df['abs_avg_3'].tolist() + + size = len(avg10) + batch_X, batch_Y = [], [] + X = np.zeros((4, VECTOR_SIZE, VECTOR_SIZE)) + label = df['label'].tolist() + for i in range(VECTOR_SIZE * VECTOR_SIZE - 1, size): + w, h = 0, 0 + for j in range(i - VECTOR_SIZE * VECTOR_SIZE + 1, i + 1): + X[0, h, w] = avg10[j] + X[1, h, w] = diff_upper_lower[j] + X[2, h, w] = abs_avg_3[j] + X[3, h, w] = rsi[j] + w += 1 + if w >= VECTOR_SIZE: + w = 0 + h += 1 + if h >= VECTOR_SIZE: + h = 0 + batch_X.append(X) + batch_Y.append(label[i]) + + batch_X = np.asarray(batch_X) + batch_Y = np.asarray(batch_Y, dtype='int64') + return batch_X, batch_Y def getDataset3D(self, data, VECTOR_SIZE = 299): df, minmax_df = self.preprocessData(data) diff --git a/stock/util/StockPredictor.py b/stock/util/StockPredictor.py index b1c2a5f..6af6007 100644 --- a/stock/util/StockPredictor.py +++ b/stock/util/StockPredictor.py @@ -102,7 +102,7 @@ class StockPredictor: def predict(self, X, Y=None): print("Data count: ", len(X)) - X = [self.trans(torch.tensor([x])) for x in X] + X = [self.trans(torch.tensor(x)) for x in X] test_X = X test_Y = Y