From c173a6d7dc3d2eb22a9121e36d6e8568e3412cf8 Mon Sep 17 00:00:00 2001 From: dsyoon Date: Sun, 7 Aug 2022 00:36:18 +0900 Subject: [PATCH] init --- HTS_122630.py | 2 +- HTS_252670.py | 2 +- Simulation.py | 25 ++- StockCrawler.bat | 1 + StockCrawler.py | 2 +- StockTrainer.py | 2 +- VitTrainer.py | 398 +++++++++++++++++++++++++---------- requirements.txt | 2 +- stock/util/Stock2Vector.py | 46 ++-- stock/util/StockPredictor.py | 150 +++++++++---- 10 files changed, 427 insertions(+), 203 deletions(-) diff --git a/HTS_122630.py b/HTS_122630.py index 968b401..c686493 100644 --- a/HTS_122630.py +++ b/HTS_122630.py @@ -188,7 +188,7 @@ if __name__ == "__main__": today = datetime.today() - PROJECT_HOME = os.path.join(os.path.dirname(__file__)) + PROJECT_HOME = os.getcwd() RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources") # KODEX 인버스 * 2 diff --git a/HTS_252670.py b/HTS_252670.py index c27ac30..dedd630 100644 --- a/HTS_252670.py +++ b/HTS_252670.py @@ -190,7 +190,7 @@ if __name__ == "__main__": today = datetime.today() - PROJECT_HOME = os.path.join(os.path.dirname(__file__)) + PROJECT_HOME = os.getcwd() RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources") # KODEX 인버스 * 2 diff --git a/Simulation.py b/Simulation.py index 3aeb711..5b62658 100644 --- a/Simulation.py +++ b/Simulation.py @@ -6,22 +6,24 @@ import os from hts.HTS import HTS from stock.util.Stock2Vector import Stock2Vector -from stock.util.StockPredictor import StockPredictor from stock.util.LabelMaker import LabelMaker +from stock.util.StockPredictor import StockPredictor from hts.BuySellChecker import BuySellChecker class Simulation (HTS): stock2Vector = None buySellChecker = None + stockPredictor = None def __init__(self, RESOURCE_PATH): super().__init__(RESOURCE_PATH) + self.RESOURCE_PATH = RESOURCE_PATH + self.stock2Vector = Stock2Vector(RESOURCE_PATH) self.labelMaker = LabelMaker(RESOURCE_PATH) - self.stockPredictor = StockPredictor() self.buySellChecker = BuySellChecker() - self.RESOURCE_PATH = RESOURCE_PATH + self.stockPredictor = StockPredictor(RESOURCE_PATH) #self.connect() return @@ -134,14 +136,15 @@ class Simulation (HTS): def simulate(self, stock_code, today, method="rule"): if method == "answer": - bsLine, data = self.labelMaker.makeCandidate(stock_code, today, view=True) + self.labelMaker.makeCandidate(stock_code, today, view=True) else: if method == "ml": - LAST_DATA = self.stock2Vector.getLastData(stock_code, today, n=10) - result = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA) + LAST_DATA = self.stock2Vector.getLastData(stock_code, today, n=1) + data = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA) + X, Y = self.stock2Vector.getDataset2D(data) - df, minmax_df = self.stock2Vector.preprocessData(result) - bsLine, data = self.stockPredictor.predict(df, minmax_df, isRealTime=False) + predY = self.stockPredictor.predict(X, Y) + print (predY) else: LAST_DATA = self.stock2Vector.getLastData(stock_code, today) result = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA) @@ -159,17 +162,17 @@ class Simulation (HTS): if __name__ == "__main__": - PROJECT_HOME = os.path.join(os.path.dirname(__file__)) + PROJECT_HOME = os.getcwd() RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources") # to check bying stock_codes = { # 252670 # 122630 - "252670": ['20220805'], + "252670": ['20200731'], } - method = "" # "ml", "answer" + method = "ml" # "ml", "answer" for stock_code in stock_codes: simulation = Simulation(RESOURCE_PATH) diff --git a/StockCrawler.bat b/StockCrawler.bat index ca4564d..7ba8a73 100644 --- a/StockCrawler.bat +++ b/StockCrawler.bat @@ -1,2 +1,3 @@ +cd C:\workspace\DeepStock C:\workspace\Anaconda3\envs\hts\python C:\workspace\DeepStock\StockCrawler.py pause \ No newline at end of file diff --git a/StockCrawler.py b/StockCrawler.py index 824d6ef..60315f6 100644 --- a/StockCrawler.py +++ b/StockCrawler.py @@ -12,7 +12,7 @@ today = datetime.now().strftime("%Y-%m-%d") # DB Browser for SQLite: http://hleecaster.com/python-sqlite3/ -PROJECT_HOME = os.path.join(os.path.dirname(__file__)) +PROJECT_HOME = os.getcwd() START_DATE = "1900.01.01" start = time.time() diff --git a/StockTrainer.py b/StockTrainer.py index 8426d73..1727f6b 100644 --- a/StockTrainer.py +++ b/StockTrainer.py @@ -49,7 +49,7 @@ class StockTrainer: if __name__ == "__main__": - PROJECT_HOME = os.path.join(os.path.dirname(__file__)) + PROJECT_HOME = os.getcwd() RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources") stock_codes = { diff --git a/VitTrainer.py b/VitTrainer.py index 0e84940..e127586 100755 --- a/VitTrainer.py +++ b/VitTrainer.py @@ -2,156 +2,326 @@ import os os.environ['KMP_DUPLICATE_LIB_OK']='True' -from datasets import Dataset +import random +import numpy as np import torch +from datasets import Dataset, load_dataset +from datasets import load_metric +from transformers import TrainingArguments, Trainer +from transformers import ViTForImageClassification +from torch.utils.data import DataLoader import torchvision.transforms as transforms +from transformers import ViTFeatureExtractor +from torchvision.transforms import (CenterCrop, Compose, Normalize, RandomHorizontalFlip, RandomResizedCrop, Resize, ToTensor) from stock.util.Stock2Vector import Stock2Vector -PROJECT_HOME = os.path.join(os.path.dirname(__file__)) -RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources") -stock2Vector = Stock2Vector(RESOURCE_PATH) -X, Y = stock2Vector.getDataset2D("252670") +class VitTrainer: -trans = transforms.ToPILImage() -X = [trans(torch.tensor([x])) for x in X] + RESOURCE_PATH = None + stock2Vector = None -split_point1 = int(len(X)*0.7) -split_point2 = int(len(X)*0.9) -train_X = X[:split_point1] -train_Y = Y[:split_point1] -valid_X = X[split_point1:split_point2] -valid_Y = X[split_point1:split_point2] -test_X = X[split_point2:] -test_Y = X[split_point2:] + num_labels = None + id2label = None + label2id = None -id2label = {0: '0', 1: '1', 2: '2'} -label2id = {'0': 0, '1': 1, '2': 2} + args = None -# load cifar10 (only small portion for demonstration purposes) -train_data = {'img': train_X, 'label': train_Y} -val_dsta = {'img': valid_X, 'label': valid_Y} -test_data = {'img': test_X, 'label': test_Y} + _train_transforms = None + _val_transforms = None -train_ds = Dataset.from_dict(train_data) -val_ds = Dataset.from_dict(val_dsta) -test_ds = Dataset.from_dict(test_data) + def __init__(self, RESOURCE_PATH): + self.set_seed(42) -from transformers import ViTFeatureExtractor + self.RESOURCE_PATH = RESOURCE_PATH + self.stock2Vector = Stock2Vector(RESOURCE_PATH) -feature_extractor = ViTFeatureExtractor() + self.num_labels = 3 + self.id2label = {0: 'none', 1: 'sell', 2: 'buy'} + self.label2id = {'none': 0, 'sell': 1, 'buy': 2} -from torchvision.transforms import (CenterCrop, - Compose, - Normalize, - RandomHorizontalFlip, - RandomResizedCrop, - Resize, - ToTensor) + self.args = TrainingArguments( + f"stock_vit_predictor", + save_strategy="epoch", + evaluation_strategy="epoch", + learning_rate=2e-5, + per_device_train_batch_size=381, + per_device_eval_batch_size=381, + weight_decay=0.01, + load_best_model_at_end=True, + metric_for_best_model="accuracy", + logging_dir='logs', + remove_unused_columns=False, + num_train_epochs=20, + ) -normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std) -_train_transforms = Compose( - [ - RandomResizedCrop(feature_extractor.size), - RandomHorizontalFlip(), - ToTensor(), - normalize, - ] - ) + return -_val_transforms = Compose( - [ - Resize(feature_extractor.size), - CenterCrop(feature_extractor.size), - ToTensor(), - normalize, - ] - ) + def set_seed(self, seed=42, n_gpu=0): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if n_gpu > 0: + torch.cuda.manual_seed_all(seed) -def train_transforms(examples): - examples['pixel_values'] = [_train_transforms(image.convert("RGB")) for image in examples['img']] - return examples + def train_transforms(self, examples): + examples['pixel_values'] = [self._train_transforms(image.convert("RGB")) for image in examples['img']] + return examples -def val_transforms(examples): - examples['pixel_values'] = [_val_transforms(image.convert("RGB")) for image in examples['img']] - return examples + def val_transforms(self, examples): + examples['pixel_values'] = [self._val_transforms(image.convert("RGB")) for image in examples['img']] + return examples -# Set the transforms -train_ds.set_transform(train_transforms) -val_ds.set_transform(val_transforms) -test_ds.set_transform(val_transforms) + def collate_fn(self, examples): + pixel_values = torch.stack([example["pixel_values"] for example in examples]) + labels = torch.tensor([example["label"] for example in examples]) + return {"pixel_values": pixel_values, "labels": labels} + def compute_metrics(self, eval_pred): + predictions, labels = eval_pred + predictions = np.argmax(predictions, axis=1) + metric = load_metric("accuracy") + return metric.compute(predictions=predictions, references=labels) -from torch.utils.data import DataLoader -import torch + def getFeature(self, model_path=None): + if model_path == None: + self.feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k") + #self.feature_extractor = ViTFeatureExtractor() + else: + #self.feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k") + self.feature_extractor = ViTFeatureExtractor.from_pretrained(model_path) -def collate_fn(examples): - pixel_values = torch.stack([example["pixel_values"] for example in examples]) - labels = torch.tensor([example["label"] for example in examples]) - return {"pixel_values": pixel_values, "labels": labels} + normalize = Normalize(mean=self.feature_extractor.image_mean, std=self.feature_extractor.image_std) + self._train_transforms = Compose( + [ + RandomResizedCrop(self.feature_extractor.size), + RandomHorizontalFlip(), + ToTensor(), + normalize, + ] + ) -train_dataloader = DataLoader(train_ds, collate_fn=collate_fn, batch_size=4) -train_data_loader = torch.utils.data.DataLoader(train_X, - batch_size=32, - shuffle=True, - num_workers=16) + self._val_transforms = Compose( + [ + Resize(self.feature_extractor.size), + CenterCrop(self.feature_extractor.size), + ToTensor(), + normalize, + ] + ) + return -batch = next(iter(train_dataloader)) -for k,v in batch.items(): - if isinstance(v, torch.Tensor): - print(k, v.shape) + def train(self, train_ds, val_ds, model_path): + self.getFeature() + # Set the transforms + train_ds.set_transform(self.train_transforms) + val_ds.set_transform(self.val_transforms) -from transformers import ViTForImageClassification + train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=4) -model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k', - num_labels=10, - id2label=id2label, - label2id=label2id) + batch = next(iter(train_dataloader)) + for k,v in batch.items(): + if isinstance(v, torch.Tensor): + print(k, v.shape) + model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k', + num_labels=self.num_labels, + id2label=self.id2label, + label2id=self.label2id) + model = ViTForImageClassification(model.config) -from transformers import TrainingArguments, Trainer + trainer = Trainer( + model, + self.args, + train_dataset=train_ds, + eval_dataset=val_ds, + data_collator=self.collate_fn, + compute_metrics=self.compute_metrics, + tokenizer=self.feature_extractor + ) -metric_name = "accuracy" + trainer.train() -args = TrainingArguments( - f"test-cifar-10", - save_strategy="epoch", - evaluation_strategy="epoch", - learning_rate=2e-5, - per_device_train_batch_size=10, - per_device_eval_batch_size=4, - num_train_epochs=3, - weight_decay=0.01, - load_best_model_at_end=True, - metric_for_best_model=metric_name, - logging_dir='logs', - remove_unused_columns=False, -) + # save trained model + model_to_save = (model.module if hasattr(model, "module") else model) # Take care of distributed/parallel training + model_to_save.save_pretrained(model_path) + self.feature_extractor.save_pretrained(model_path) + torch.save(self.args, os.path.join(RESOURCE_PATH, "model", "training_args.bin")) + return -from datasets import load_metric -import numpy as np + def finetunning(self, train_ds, val_ds, model_path): + self.getFeature(model_path) -metric = load_metric("accuracy") + # Set the transforms + train_ds.set_transform(self.train_transforms) + val_ds.set_transform(self.val_transforms) -def compute_metrics(eval_pred): - predictions, labels = eval_pred - predictions = np.argmax(predictions, axis=1) - return metric.compute(predictions=predictions, references=labels) + train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=4) + batch = next(iter(train_dataloader)) + for k,v in batch.items(): + if isinstance(v, torch.Tensor): + print(k, v.shape) -import torch + model = ViTForImageClassification.from_pretrained(model_path, + num_labels=self.num_labels, + id2label=self.id2label, + label2id=self.label2id) + trainer = Trainer( + model, + self.args, + train_dataset=train_ds, + eval_dataset=val_ds, + data_collator=self.collate_fn, + compute_metrics=self.compute_metrics, + tokenizer=self.feature_extractor + ) -trainer = Trainer( - model, - args, - train_dataset=train_ds, - eval_dataset=val_ds, - data_collator=collate_fn, - compute_metrics=compute_metrics, - tokenizer=feature_extractor, -) + trainer.train() + # save trained model + model_to_save = (model.module if hasattr(model, "module") else model) # Take care of distributed/parallel training + model_to_save.save_pretrained(model_path) + self.feature_extractor.save_pretrained(model_path) + torch.save(self.args, os.path.join(RESOURCE_PATH, "model", "training_args.bin")) -trainer.train() \ No newline at end of file + return + + def getData(self, stock_code, sDate, eDate): + data = self.stock2Vector.getTrainData(stock_code, sDate, eDate) + X, Y = self.stock2Vector.getDataset2D(data) + print("Data count: ", len(X)) + + trans = transforms.ToPILImage() + X = [trans(torch.tensor([x])) for x in X] + + split_point1 = int(len(X) * 0.9) + train_X = X[:split_point1] + train_Y = Y[:split_point1] + valid_X = X[split_point1:] + valid_Y = Y[split_point1:] + + # load cifar10 (only small portion for demonstration purposes) + train_data = {'img': train_X, 'label': train_Y} + val_dsta = {'img': valid_X, 'label': valid_Y} + + train_ds = Dataset.from_dict(train_data) + val_ds = Dataset.from_dict(val_dsta) + + return train_ds, val_ds + +if __name__ == "__main__": + + PROJECT_HOME = os.getcwd() + RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources") + model_path = os.path.join(RESOURCE_PATH, "model") + + stock_code = "252670" + vitTrainer = VitTrainer(RESOURCE_PATH) + + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200729", eDate="20200731") + vitTrainer.train(train_ds, val_ds, model_path) + + """ + print("ym: 2020-07") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200701", eDate="20200731") + vitTrainer.train(train_ds, val_ds, model_path) + + print ("ym: 2020-08") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200725", eDate="20200831") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2020-09") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200825", eDate="20200931") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2020-10") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200925", eDate="20201031") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2020-11") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201025", eDate="20201131") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2020-12") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201125", eDate="20201231") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2021-01") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201225", eDate="20210131") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2021-02") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210125", eDate="20210231") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2021-03") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210225", eDate="20210331") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2021-04") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210325", eDate="20210431") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2021-05") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210425", eDate="20210531") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2021-06") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210525", eDate="20210631") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2021-07") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210625", eDate="20210731") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2021-08") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210725", eDate="20210831") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2021-09") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210825", eDate="20210931") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2021-10") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210925", eDate="20212031") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2021-11") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211025", eDate="20211131") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2021-12") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211125", eDate="20211231") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2022-01") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211225", eDate="20220131") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2022-02") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220125", eDate="20220231") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2022-03") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220225", eDate="20220331") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2022-04") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220325", eDate="20220431") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2022-05") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220425", eDate="20220531") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2022-06") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220525", eDate="20220631") + vitTrainer.finetunning(train_ds, val_ds, model_path) + + print("ym: 2022-07") + train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220625", eDate="20220731") + vitTrainer.finetunning(train_ds, val_ds, model_path) + """ \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e6413d8..dfa9eea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,4 @@ ipywidgets==7.0.0 torchvision transformers -sklearn \ No newline at end of file +sklearn diff --git a/stock/util/Stock2Vector.py b/stock/util/Stock2Vector.py index a9fe18b..40db79f 100644 --- a/stock/util/Stock2Vector.py +++ b/stock/util/Stock2Vector.py @@ -154,13 +154,16 @@ class Stock2Vector(HTS): return df, minmax_df - def getTrainData(self, stock_code): + def getTrainData(self, stock_code, sDate=None, eDate=None): tableName = 'hts' conn = sqlite3.connect(os.path.join(self.RESOURCE_PATH, "hts.db")) cursor = conn.cursor() - cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, "20220726", "20220731")) - #cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,)) + if sDate is None and eDate is None: + cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,)) + else: + cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, sDate, eDate)) + db_result = cursor.fetchall() temp_result = [] for rows in db_result: @@ -168,6 +171,9 @@ class Stock2Vector(HTS): temp_result.sort(key=lambda x: (x[0], x[1])) result = {"check": set(), "time": [], "open": [], "close": [], "high": [], "low": [], "vol": [], "label": []} + if len(db_result) == 0: + return result + for rows in temp_result: ymd = rows[0] # hts.날짜 hms = rows[1] # hts.시간 @@ -246,9 +252,9 @@ class Stock2Vector(HTS): return np.asarray(vector) - def getDataset2D(self, stock_code, VECTOR_SIZE = 381): - result = self.getTrainData(stock_code) - df, minmax_df = self.preprocessData(result) + def getDataset2D(self, data, VECTOR_SIZE = 381): + + df, minmax_df = self.preprocessData(data) TOTAL_X, TOTAL_Y = [], [] for key in minmax_df: @@ -262,38 +268,24 @@ class Stock2Vector(HTS): SIZE_WIDTH = len(TOTAL_X[0]) SIZE_HEIGHT = len(TOTAL_X) X, Y = [], [] - for i in range(VECTOR_SIZE, SIZE_WIDTH): + for i in range(VECTOR_SIZE-1, SIZE_WIDTH): temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0) for j in range(SIZE_HEIGHT): - temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE:i] + temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE+1:i+1] X.append(temp_X) if TOTAL_Y[0][i] == 0: - #Y.append([1, 0, 0]) Y.append(0) elif TOTAL_Y[0][i] == 0.5: - #Y.append([0, 1, 0]) Y.append(1) else: - #Y.append([0, 0, 1]) Y.append(2) X = np.asarray(X) - Y = np.asarray(Y) + Y = np.asarray(Y, dtype='int64') return X, Y - def makeDataset2D(self, stock_code, outFileName=None): - X, Y = self.getDataset2D(stock_code) - - #reX = X.reshape(X.shape[0], (X.shape[1] * X.shape[2])) - #df = pd.DataFrame(np.hstack((reX, Y))) - #df.to_csv(outFileName, index=False, header=False) - - - return X, Y - - def getDataset3D(self, stock_code, VECTOR_SIZE = 299): - result = self.getTrainData(stock_code) - df, minmax_df = self.preprocessData(result) + def getDataset3D(self, data, VECTOR_SIZE = 299): + df, minmax_df = self.preprocessData(data) TOTAL_X, TOTAL_Y = [], [] for key in minmax_df: @@ -338,8 +330,8 @@ if __name__ == "__main__": for stock_code in stock_codes: stock2Vector = Stock2Vector(RESOURCE_PATH) - # X, Y = stock2Vector.getDataset2D(stock_code) - stock2Vector.makeDataset2D(stock_code, outFileName=os.path.join(RESOURCE_PATH, "tmp", "stock_features.csv")) + # data = self.stock2Vector.getTrainData(stock_code, sDate, eDate) + # X, Y = self.stock2Vector.getDataset2D(data) for given_day in stock_codes[stock_code]: data, minmax_data = stock2Vector.makeData(given_day, stock_code) diff --git a/stock/util/StockPredictor.py b/stock/util/StockPredictor.py index 8b802c4..c667eac 100644 --- a/stock/util/StockPredictor.py +++ b/stock/util/StockPredictor.py @@ -1,59 +1,117 @@ +# tensor - numpy - PILImage 변환 (https://qlsenddl-lab.tistory.com/37) + import os -import keras + +os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' +import random import numpy as np -from keras.applications.imagenet_utils import decode_predictions -from classification_models.keras import Classifiers +from datasets import Dataset, load_dataset +import torch +import torchvision.transforms as transforms + +from transformers import ViTFeatureExtractor, ViTForImageClassification, TrainingArguments, Trainer +from torchvision.transforms import (CenterCrop, Compose, Normalize, RandomHorizontalFlip, RandomResizedCrop, Resize, ToTensor) + +from stock.util.Stock2Vector import Stock2Vector + class StockPredictor: - RESOURCE_PATH = None stock2Vector = None + model_dir = None + predictor = None - def __init__(self): - return + def __init__(self, RESOURCE_PATH): + self.RESOURCE_PATH = RESOURCE_PATH - def getDataset(self, df): - VECTOR_SIZE = 299 - TOTAL_X, TOTAL_Y = [], [] - for key in df: - if key == "date": - continue - elif key == "label": - TOTAL_Y.append(df[key].tolist()) - else: - TOTAL_X.append(df[key].tolist()) + self.model_dir = os.path.join(RESOURCE_PATH, "tmp") + self.stock2Vector = Stock2Vector(RESOURCE_PATH) - SIZE_WIDTH = len(TOTAL_X[0]) - SIZE_HEIGHT = len(TOTAL_X) - X = [] - for i in range(VECTOR_SIZE, SIZE_WIDTH): - temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0) - for j in range(SIZE_HEIGHT): - temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i - VECTOR_SIZE:i] - temp_X = np.stack([temp_X, temp_X, temp_X], axis=-1) - X.append(temp_X) + self.set_seed(42) - X = np.asarray(X[len(X)-1]) - - return X - - def predict(self, df, minmax_df, isRealTime=False): - X = self.getDataset(df) - - # build model - n_classes = 3 - Inceptionresnetv2, preprocess_input = Classifiers.get('inceptionresnetv2') - X = preprocess_input(X) - base_model = Inceptionresnetv2(input_shape=(299, 299, 3), include_top=False) - model = keras.models.Model(inputs=[base_model.input]) - - checkpoint_filename = os.path.join(self.RESOURCE_PATH, "model", "stock.ckpt") - model.load_weights(checkpoint_filename) - - y = model.predict(X) - - # result - print(decode_predictions(y)) + self.num_labels = 3 + self.id2label = {0: 'none', 1: 'sell', 2: 'buy'} + self.label2id = {'none': 0, 'sell': 1, 'buy': 2} + self.trans = transforms.ToPILImage() + self.predictor = self.loadModel() return + + def set_seed(self, seed=42, n_gpu=0): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if n_gpu > 0: + torch.cuda.manual_seed_all(seed) + + def loadModel(self): + feature_extractor = ViTFeatureExtractor.from_pretrained(self.model_dir) + + normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std) + + self._test_transforms = Compose( + [ + Resize(feature_extractor.size), + CenterCrop(feature_extractor.size), + ToTensor(), + normalize, + ] + ) + + model = ViTForImageClassification.from_pretrained(self.model_dir, + num_labels=self.num_labels, + id2label=self.id2label, + label2id=self.label2id) + args = TrainingArguments( + f"stock_vit_predictor", + save_strategy="epoch", + evaluation_strategy="epoch", + learning_rate=2e-5, + per_device_train_batch_size=762, + per_device_eval_batch_size=762, + weight_decay=0.01, + load_best_model_at_end=True, + metric_for_best_model="accuracy", + logging_dir='logs', + remove_unused_columns=False, + num_train_epochs=4, + ) + + trainer = Trainer( + model, + args, + data_collator=self.collate_fn, + tokenizer=feature_extractor, + ) + + return trainer + + def test_transforms(self, examples): + examples['pixel_values'] = [self._test_transforms(image.convert("RGB")) for image in examples['img']] + return examples + + def collate_fn(self, examples): + pixel_values = torch.stack([example["pixel_values"] for example in examples]) + #labels = torch.tensor([example["label"] for example in examples]) + #return {"pixel_values": pixel_values, "labels": labels} + return {"pixel_values": pixel_values} + + def predict(self, X, Y=None): + print("Data count: ", len(X)) + + X = [self.trans(torch.tensor([x])) for x in X] + + test_X = X + test_Y = Y + + # load cifar10 (only small portion for demonstration purposes) + test_data = {'img': test_X, 'label': test_Y} + + test_ds = Dataset.from_dict(test_data) + + # Set the transforms + test_ds.set_transform(self.test_transforms) + + outputs = self.predictor.predict(test_ds) + return outputs.predictions \ No newline at end of file