This commit is contained in:
dsyoon
2022-08-13 17:16:47 +09:00
parent 8ec05b8447
commit f9ffa363fa
5 changed files with 100 additions and 126 deletions

View File

@@ -1,4 +1,4 @@
import time
import numpy as np
from math import nan
import pandas as pd
import plotly.graph_objects as go
@@ -152,9 +152,9 @@ class Simulation (HTS):
self.labelMaker.showLabels(stock_code, today)
else:
if method == "ml":
LAST_DATA = self.stock2Vector.getLastData(stock_code, today, n=1)
LAST_DATA = self.stock2Vector.getLastData(stock_code, today, n=3)
data = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA)
X, Y = self.stock2Vector.getDataset2D(data)
X, Y = self.stock2Vector.getVectorData(data)
predY = self.stockPredictor.predict(X, Y)
bsLine = None
@@ -180,11 +180,11 @@ if __name__ == "__main__":
# to check bying
stock_codes = {
"252670": ['20220801', '20220802', '20220803', '20220804', '20220805', '20220808', '20220809', '20220810', '20220811'],
"122630": ['20220801', '20220802', '20220803', '20220804', '20220805', '20220808', '20220809', '20220810', '20220811'],
"252670": ['20220805', '20220808', '20220809', '20220810', '20220811'],
"122630": ['20220805', '20220808', '20220809', '20220810', '20220811'],
}
method = "rule" # "rule", "ml", "answer"
method = "ml" # "rule", "ml", "answer"
for stock_code in stock_codes:
simulation = Simulation(RESOURCE_PATH)

View File

@@ -1,11 +1,13 @@
# tensor - numpy - PILImage 변환 (https://qlsenddl-lab.tistory.com/37)
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import random
import numpy as np
import torch
from datasets import Dataset, load_dataset
from datasets import Dataset, load_metric, ClassLabel
from datasets import load_metric
from transformers import AutoConfig
from transformers import TrainingArguments, Trainer
from transformers import ViTForImageClassification
from torch.utils.data import DataLoader
@@ -44,14 +46,14 @@ class VitTrainer:
save_strategy="epoch",
evaluation_strategy="epoch",
learning_rate=2e-5,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
per_device_train_batch_size=32,
per_device_eval_batch_size=32,
weight_decay=0.01,
load_best_model_at_end=True,
metric_for_best_model="accuracy",
logging_dir=os.path.join(self.RESOURCE_PATH, 'model', 'logs'),
remove_unused_columns=False,
num_train_epochs=14,
num_train_epochs=4,
)
return
@@ -117,7 +119,7 @@ class VitTrainer:
train_ds.set_transform(self.train_transforms)
val_ds.set_transform(self.val_transforms)
train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=4)
train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=32)
batch = next(iter(train_dataloader))
for k,v in batch.items():
@@ -157,7 +159,7 @@ class VitTrainer:
train_ds.set_transform(self.train_transforms)
val_ds.set_transform(self.val_transforms)
train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=4)
train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=32)
batch = next(iter(train_dataloader))
for k,v in batch.items():
@@ -211,6 +213,14 @@ class VitTrainer:
train_ds = Dataset.from_dict(train_data)
val_ds = Dataset.from_dict(val_dsta)
features = train_ds.features.copy()
features["label"] = ClassLabel(num_classes=self.num_labels, names=["none", "sell", "buy"])
def adjust_labels(batch):
batch["label"] = [lbl for lbl in batch["label"]]
return batch
train_ds = train_ds.map(adjust_labels, batched=True, features=features)
val_ds = train_ds.map(adjust_labels, batched=True, features=features)
return train_ds, val_ds
if __name__ == "__main__":
@@ -222,107 +232,5 @@ if __name__ == "__main__":
stock_code = "252670"
vitTrainer = VitTrainer(RESOURCE_PATH)
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220701", eDate="20220731")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220809", eDate="20220812")
vitTrainer.train(train_ds, val_ds, model_path)
"""
print("ym: 2020-07")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200701", eDate="20200731")
vitTrainer.train(train_ds, val_ds, model_path)
print ("ym: 2020-08")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200725", eDate="20200831")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2020-09")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200825", eDate="20200931")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2020-10")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200925", eDate="20201031")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2020-11")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201025", eDate="20201131")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2020-12")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201125", eDate="20201231")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-01")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201225", eDate="20210131")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-02")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210125", eDate="20210231")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-03")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210225", eDate="20210331")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-04")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210325", eDate="20210431")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-05")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210425", eDate="20210531")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-06")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210525", eDate="20210631")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-07")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210625", eDate="20210731")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-08")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210725", eDate="20210831")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-09")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210825", eDate="20210931")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-10")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210925", eDate="20212031")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-11")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211025", eDate="20211131")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-12")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211125", eDate="20211231")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-01")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211225", eDate="20220131")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-02")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220125", eDate="20220231")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-03")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220225", eDate="20220331")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-04")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220325", eDate="20220431")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-05")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220425", eDate="20220531")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-06")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220525", eDate="20220631")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-07")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220625", eDate="20220731")
vitTrainer.finetunning(train_ds, val_ds, model_path)
"""

View File

@@ -1007,22 +1007,34 @@ class BuySellChecker:
avg3 = [item[0] for item in avg3_list]
avg5_list = close_df.rolling(window=5).mean().fillna(close[0]).values.tolist()
avg5 = [item[0] for item in avg5_list]
avg6_list = close_df.rolling(window=6).mean().fillna(close[0]).values.tolist()
avg6 = [item[0] for item in avg6_list]
avg9_list = close_df.rolling(window=9).mean().fillna(close[0]).values.tolist()
avg9 = [item[0] for item in avg9_list]
avg10_list = close_df.rolling(window=10).mean().fillna(close[0]).values.tolist()
avg10 = [item[0] for item in avg10_list]
avg12_list = close_df.rolling(window=12).mean().fillna(close[0]).values.tolist()
avg12 = [item[0] for item in avg12_list]
avg20_list = close_df.rolling(window=20).mean().fillna(close[0]).values.tolist()
avg20 = [item[0] for item in avg20_list]
avg24_list = close_df.rolling(window=24).mean().fillna(close[0]).values.tolist()
avg24 = [item[0] for item in avg24_list]
avg30_list = close_df.rolling(window=30).mean().fillna(close[0]).values.tolist()
avg30 = [item[0] for item in avg30_list]
avg60_list = close_df.rolling(window=60).mean().fillna(close[0]).values.tolist()
avg60 = [item[0] for item in avg60_list]
abs_avg_1 = [max(avg3[i], avg5[i], avg10[i], avg20[i], avg30[i], avg60[i]) - min(avg3[i], avg5[i], avg10[i], avg20[i], avg30[i], avg60[i]) for i in range(0, len(close))]
abs_avg_2 = [max(avg3[i], avg5[i], avg10[i], avg20[i], avg30[i]) - min(avg3[i], avg5[i], avg10[i], avg20[i], avg30[i]) for i in range(0, len(close))]
abs_avg_3 = [max(avg3[i], avg5[i], avg10[i], avg20[i]) - min(avg3[i], avg5[i], avg10[i], avg20[i]) for i in range(0, len(close))]
abs_avg_4 = [max(avg3[i], avg5[i], avg10[i]) - min(avg3[i], avg5[i], avg10[i]) for i in range(0, len(close))]
abs_avg_5 = [max(avg3[i], avg5[i]) - min(avg3[i], avg5[i]) for i in range(0, len(close))]
abs_avg_1 = [max(avg3[i], avg5[i], avg6[i], avg9[i], avg10[i], avg12[i], avg20[i], avg24[i], avg30[i], avg60[i]) - min(avg3[i], avg5[i], avg6[i], avg9[i], avg10[i], avg12[i], avg20[i], avg30[i], avg60[i]) for i in range(0, len(close))]
abs_avg_2 = [max(avg3[i], avg5[i], avg6[i], avg9[i], avg10[i], avg12[i], avg20[i], avg24[i], avg30[i]) - min(avg3[i], avg5[i], avg6[i], avg9[i], avg10[i], avg12[i], avg20[i], avg24[i], avg30[i]) for i in range(0, len(close))]
abs_avg_3 = [max(avg3[i], avg5[i], avg6[i], avg9[i], avg10[i], avg12[i], avg20[i]) - min(avg3[i], avg5[i], avg6[i], avg9[i], avg10[i], avg12[i], avg20[i]) for i in range(0, len(close))]
abs_avg_4 = [max(avg3[i], avg5[i], avg6[i], avg9[i], avg12[i]) - min(avg3[i], avg5[i], avg6[i], avg9[i], avg12[i]) for i in range(0, len(close))]
abs_avg_5 = [max(avg3[i], avg5[i], avg6[i], avg9[i]) - min(avg3[i], avg5[i], avg6[i], avg9[i]) for i in range(0, len(close))]
abs_avg_6 = [max(avg3[i], avg5[i], avg6[i]) - min(avg3[i], avg5[i], avg6[i]) for i in range(0, len(close))]
diff_avg3_avg5 = [avg3[i]-avg5[i] for i in range(0, len(close))]
diff_avg3_avg6 = [avg3[i] - avg6[i] for i in range(0, len(close))]
diff_avg3_avg9 = [avg3[i] - avg9[i] for i in range(0, len(close))]
diff_avg3_avg10 = [avg3[i] - avg10[i] for i in range(0, len(close))]
diff_avg3_avg12 = [avg3[i] - avg12[i] for i in range(0, len(close))]
diff_avg3_avg20 = [avg3[i] - avg20[i] for i in range(0, len(close))]
diff_avg3_avg30 = [avg3[i] - avg30[i] for i in range(0, len(close))]
diff_avg3_avg60 = [avg3[i] - avg60[i] for i in range(0, len(close))]
@@ -1037,7 +1049,10 @@ class BuySellChecker:
diff_avg20_avg60 = [avg20[i] - avg60[i] for i in range(0, len(close))]
diff_avg30_avg60 = [avg30[i] - avg60[i] for i in range(0, len(close))]
diff_avg3_avg5_sign = self.getSign(avg3, avg5)
diff_avg3_avg6_sign = self.getSign(avg3, avg6)
diff_avg3_avg9_sign = self.getSign(avg3, avg9)
diff_avg3_avg10_sign = self.getSign(avg3, avg10)
diff_avg3_avg12_sign = self.getSign(avg3, avg12)
diff_avg3_avg20_sign = self.getSign(avg3, avg20)
diff_avg3_avg30_sign = self.getSign(avg3, avg30)
diff_avg3_avg60_sign = self.getSign(avg3, avg60)
@@ -1072,7 +1087,7 @@ class BuySellChecker:
STOCK = []
for i in range(len(open)):
STOCK.append({'volume': vol[i], 'close': close[i], 'open': open[i], 'high': high[i], 'low': low[i],
'avg3': avg3[i], 'avg5': avg5[i],'avg10': avg10[i],'avg20': avg20[i],'avg30': avg30[i],'avg60': avg60[i]})
'avg3': avg3[i], 'avg5': avg5[i],'avg6': avg6[i],'avg9': avg9[i],'avg10': avg10[i],'avg12': avg12[i],'avg20': avg20[i],'avg30': avg30[i],'avg60': avg60[i]})
# stochastic
stochastic_df = self.stochastic.apply(STOCK, n=30, m=5, t=5)
@@ -1179,16 +1194,19 @@ class BuySellChecker:
temp = {
"date": point_temp,
"open": open, "high": high, "low": low, "close": close, "volume": vol,
"avg3": avg3, "avg5": avg5, "avg10": avg10, "avg20": avg20, "avg30": avg30, "avg60": avg60,
"avg3": avg3, "avg5": avg5, "avg6": avg6, "avg9": avg9, "avg10": avg10, "avg12": avg12, "avg20": avg20, "avg30": avg30, "avg60": avg60,
"upper": upper, "lower": lower,
"macd": macd, "macds": macds, "macdo": macdo,
"fast_k": fast_k, "slow_k": slow_k, "slow_d": slow_d,
"rsi": rsi, "rsis": rsis,
"changeLine": changeLine, "baseLine": baseLine, "leadingSpan1": leadingSpan1, "leadingSpan2": leadingSpan2,
"diff_price": diff_price, "height": height, "top_tail_height": top_tail_height, "bottom_tail_height": bottom_tail_height,
"abs_avg_1": abs_avg_1, "abs_avg_2": abs_avg_2, "abs_avg_3": abs_avg_3, "abs_avg_4": abs_avg_4, "abs_avg_5": abs_avg_5,
"abs_avg_1": abs_avg_1, "abs_avg_2": abs_avg_2, "abs_avg_3": abs_avg_3, "abs_avg_4": abs_avg_4, "abs_avg_5": abs_avg_5, "abs_avg_6": abs_avg_6,
"diff_avg3_avg5": diff_avg3_avg5,
"diff_avg3_avg6": diff_avg3_avg6,
"diff_avg3_avg9": diff_avg3_avg9,
"diff_avg3_avg10": diff_avg3_avg10,
"diff_avg3_avg12": diff_avg3_avg12,
"diff_avg3_avg20": diff_avg3_avg20,
"diff_avg3_avg30": diff_avg3_avg30,
"diff_avg3_avg60": diff_avg3_avg60,

View File

@@ -285,9 +285,13 @@ class Stock2Vector(HTS):
Y = np.asarray(Y, dtype='int64')
return X, Y
def getVectorData(self, data, type="avg10", VECTOR_SIZE = 32):
def getVectorData(self, data, VECTOR_SIZE = 32):
return self.getVectorData_2(data, VECTOR_SIZE)
def getVectorData_1(self, data, VECTOR_SIZE):
df = self.buySellChecker.getVectorFeature(data)
# avg10, 볼린져밴드 상단과 하단의 차이, rsi, avg3만 이용한다.
# channel1: avg10, channel2: diff_upper_lower, channel3: abs_avg_2, channel4: abs_avg_3
avg10 = df['avg10'].tolist()
diff_upper_lower = df['diff_upper_lower'].tolist()
@@ -313,11 +317,55 @@ class Stock2Vector(HTS):
h = 0
batch_X.append(X)
batch_Y.append(label[i])
"""
if label[i] == 2:
batch_Y.append([0, 0, 1])
elif label[i] == 1:
batch_Y.append([0, 1, 0])
else:
batch_Y.append([1, 0, 0])
"""
batch_X = np.asarray(batch_X)
batch_Y = np.asarray(batch_Y, dtype='int64')
return batch_X, batch_Y
def getVectorData_2(self, data, VECTOR_SIZE = 32):
df = self.buySellChecker.getVectorFeature(data)
# avg10, 볼린져밴드 상단과 하단의 차이, rsi, avg3만 이용한다.
# channel1: avg10, channel2: diff_upper_lower, channel3: abs_avg_2, channel4: abs_avg_3
avg3 = df['avg3'].tolist()
avg6 = df['avg6'].tolist()
avg9 = df['avg9'].tolist()
diff_upper_lower = df['diff_upper_lower'].tolist()
rsi = df['rsi'].tolist()
abs_avg_3 = df['abs_avg_3'].tolist()
size = len(avg10)
batch_X, batch_Y = [], []
X = np.zeros((4, VECTOR_SIZE, VECTOR_SIZE))
label = df['label'].tolist()
for i in range(VECTOR_SIZE * VECTOR_SIZE - 1, size):
w, h = 0, 0
for j in range(i - VECTOR_SIZE * VECTOR_SIZE + 1, i + 1):
X[0, h, w] = avg10[j]
X[1, h, w] = diff_upper_lower[j]
X[2, h, w] = abs_avg_3[j]
X[3, h, w] = rsi[j]
w += 1
if w >= VECTOR_SIZE:
w = 0
h += 1
if h >= VECTOR_SIZE:
h = 0
batch_X.append(X)
batch_Y.append(label[i])
batch_X = np.asarray(batch_X)
batch_Y = np.asarray(batch_Y, dtype='int64')
return batch_X, batch_Y
def getDataset3D(self, data, VECTOR_SIZE = 299):
df, minmax_df = self.preprocessData(data)

View File

@@ -102,7 +102,7 @@ class StockPredictor:
def predict(self, X, Y=None):
print("Data count: ", len(X))
X = [self.trans(torch.tensor([x])) for x in X]
X = [self.trans(torch.tensor(x)) for x in X]
test_X = X
test_Y = Y