This commit is contained in:
dsyoon
2022-08-07 00:36:18 +09:00
parent f43688f0da
commit c173a6d7dc
10 changed files with 427 additions and 203 deletions

View File

@@ -188,7 +188,7 @@ if __name__ == "__main__":
today = datetime.today()
PROJECT_HOME = os.path.join(os.path.dirname(__file__))
PROJECT_HOME = os.getcwd()
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
# KODEX 인버스 * 2

View File

@@ -190,7 +190,7 @@ if __name__ == "__main__":
today = datetime.today()
PROJECT_HOME = os.path.join(os.path.dirname(__file__))
PROJECT_HOME = os.getcwd()
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
# KODEX 인버스 * 2

View File

@@ -6,22 +6,24 @@ import os
from hts.HTS import HTS
from stock.util.Stock2Vector import Stock2Vector
from stock.util.StockPredictor import StockPredictor
from stock.util.LabelMaker import LabelMaker
from stock.util.StockPredictor import StockPredictor
from hts.BuySellChecker import BuySellChecker
class Simulation (HTS):
stock2Vector = None
buySellChecker = None
stockPredictor = None
def __init__(self, RESOURCE_PATH):
super().__init__(RESOURCE_PATH)
self.RESOURCE_PATH = RESOURCE_PATH
self.stock2Vector = Stock2Vector(RESOURCE_PATH)
self.labelMaker = LabelMaker(RESOURCE_PATH)
self.stockPredictor = StockPredictor()
self.buySellChecker = BuySellChecker()
self.RESOURCE_PATH = RESOURCE_PATH
self.stockPredictor = StockPredictor(RESOURCE_PATH)
#self.connect()
return
@@ -134,14 +136,15 @@ class Simulation (HTS):
def simulate(self, stock_code, today, method="rule"):
if method == "answer":
bsLine, data = self.labelMaker.makeCandidate(stock_code, today, view=True)
self.labelMaker.makeCandidate(stock_code, today, view=True)
else:
if method == "ml":
LAST_DATA = self.stock2Vector.getLastData(stock_code, today, n=10)
result = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA)
LAST_DATA = self.stock2Vector.getLastData(stock_code, today, n=1)
data = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA)
X, Y = self.stock2Vector.getDataset2D(data)
df, minmax_df = self.stock2Vector.preprocessData(result)
bsLine, data = self.stockPredictor.predict(df, minmax_df, isRealTime=False)
predY = self.stockPredictor.predict(X, Y)
print (predY)
else:
LAST_DATA = self.stock2Vector.getLastData(stock_code, today)
result = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA)
@@ -159,17 +162,17 @@ class Simulation (HTS):
if __name__ == "__main__":
PROJECT_HOME = os.path.join(os.path.dirname(__file__))
PROJECT_HOME = os.getcwd()
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
# to check bying
stock_codes = {
# 252670
# 122630
"252670": ['20220805'],
"252670": ['20200731'],
}
method = "" # "ml", "answer"
method = "ml" # "ml", "answer"
for stock_code in stock_codes:
simulation = Simulation(RESOURCE_PATH)

View File

@@ -1,2 +1,3 @@
cd C:\workspace\DeepStock
C:\workspace\Anaconda3\envs\hts\python C:\workspace\DeepStock\StockCrawler.py
pause

View File

@@ -12,7 +12,7 @@ today = datetime.now().strftime("%Y-%m-%d")
# DB Browser for SQLite: http://hleecaster.com/python-sqlite3/
PROJECT_HOME = os.path.join(os.path.dirname(__file__))
PROJECT_HOME = os.getcwd()
START_DATE = "1900.01.01"
start = time.time()

View File

@@ -49,7 +49,7 @@ class StockTrainer:
if __name__ == "__main__":
PROJECT_HOME = os.path.join(os.path.dirname(__file__))
PROJECT_HOME = os.getcwd()
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
stock_codes = {

View File

@@ -2,156 +2,326 @@
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
from datasets import Dataset
import random
import numpy as np
import torch
from datasets import Dataset, load_dataset
from datasets import load_metric
from transformers import TrainingArguments, Trainer
from transformers import ViTForImageClassification
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from transformers import ViTFeatureExtractor
from torchvision.transforms import (CenterCrop, Compose, Normalize, RandomHorizontalFlip, RandomResizedCrop, Resize, ToTensor)
from stock.util.Stock2Vector import Stock2Vector
PROJECT_HOME = os.path.join(os.path.dirname(__file__))
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
stock2Vector = Stock2Vector(RESOURCE_PATH)
X, Y = stock2Vector.getDataset2D("252670")
class VitTrainer:
trans = transforms.ToPILImage()
X = [trans(torch.tensor([x])) for x in X]
RESOURCE_PATH = None
stock2Vector = None
split_point1 = int(len(X)*0.7)
split_point2 = int(len(X)*0.9)
train_X = X[:split_point1]
train_Y = Y[:split_point1]
valid_X = X[split_point1:split_point2]
valid_Y = X[split_point1:split_point2]
test_X = X[split_point2:]
test_Y = X[split_point2:]
num_labels = None
id2label = None
label2id = None
id2label = {0: '0', 1: '1', 2: '2'}
label2id = {'0': 0, '1': 1, '2': 2}
args = None
# load cifar10 (only small portion for demonstration purposes)
train_data = {'img': train_X, 'label': train_Y}
val_dsta = {'img': valid_X, 'label': valid_Y}
test_data = {'img': test_X, 'label': test_Y}
_train_transforms = None
_val_transforms = None
train_ds = Dataset.from_dict(train_data)
val_ds = Dataset.from_dict(val_dsta)
test_ds = Dataset.from_dict(test_data)
def __init__(self, RESOURCE_PATH):
self.set_seed(42)
from transformers import ViTFeatureExtractor
self.RESOURCE_PATH = RESOURCE_PATH
self.stock2Vector = Stock2Vector(RESOURCE_PATH)
feature_extractor = ViTFeatureExtractor()
self.num_labels = 3
self.id2label = {0: 'none', 1: 'sell', 2: 'buy'}
self.label2id = {'none': 0, 'sell': 1, 'buy': 2}
from torchvision.transforms import (CenterCrop,
Compose,
Normalize,
RandomHorizontalFlip,
RandomResizedCrop,
Resize,
ToTensor)
self.args = TrainingArguments(
f"stock_vit_predictor",
save_strategy="epoch",
evaluation_strategy="epoch",
learning_rate=2e-5,
per_device_train_batch_size=381,
per_device_eval_batch_size=381,
weight_decay=0.01,
load_best_model_at_end=True,
metric_for_best_model="accuracy",
logging_dir='logs',
remove_unused_columns=False,
num_train_epochs=20,
)
normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
_train_transforms = Compose(
return
def set_seed(self, seed=42, n_gpu=0):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if n_gpu > 0:
torch.cuda.manual_seed_all(seed)
def train_transforms(self, examples):
examples['pixel_values'] = [self._train_transforms(image.convert("RGB")) for image in examples['img']]
return examples
def val_transforms(self, examples):
examples['pixel_values'] = [self._val_transforms(image.convert("RGB")) for image in examples['img']]
return examples
def collate_fn(self, examples):
pixel_values = torch.stack([example["pixel_values"] for example in examples])
labels = torch.tensor([example["label"] for example in examples])
return {"pixel_values": pixel_values, "labels": labels}
def compute_metrics(self, eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
metric = load_metric("accuracy")
return metric.compute(predictions=predictions, references=labels)
def getFeature(self, model_path=None):
if model_path == None:
self.feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
#self.feature_extractor = ViTFeatureExtractor()
else:
#self.feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
self.feature_extractor = ViTFeatureExtractor.from_pretrained(model_path)
normalize = Normalize(mean=self.feature_extractor.image_mean, std=self.feature_extractor.image_std)
self._train_transforms = Compose(
[
RandomResizedCrop(feature_extractor.size),
RandomResizedCrop(self.feature_extractor.size),
RandomHorizontalFlip(),
ToTensor(),
normalize,
]
)
_val_transforms = Compose(
self._val_transforms = Compose(
[
Resize(feature_extractor.size),
CenterCrop(feature_extractor.size),
Resize(self.feature_extractor.size),
CenterCrop(self.feature_extractor.size),
ToTensor(),
normalize,
]
)
return
def train_transforms(examples):
examples['pixel_values'] = [_train_transforms(image.convert("RGB")) for image in examples['img']]
return examples
def val_transforms(examples):
examples['pixel_values'] = [_val_transforms(image.convert("RGB")) for image in examples['img']]
return examples
def train(self, train_ds, val_ds, model_path):
self.getFeature()
# Set the transforms
train_ds.set_transform(train_transforms)
val_ds.set_transform(val_transforms)
test_ds.set_transform(val_transforms)
train_ds.set_transform(self.train_transforms)
val_ds.set_transform(self.val_transforms)
from torch.utils.data import DataLoader
import torch
def collate_fn(examples):
pixel_values = torch.stack([example["pixel_values"] for example in examples])
labels = torch.tensor([example["label"] for example in examples])
return {"pixel_values": pixel_values, "labels": labels}
train_dataloader = DataLoader(train_ds, collate_fn=collate_fn, batch_size=4)
train_data_loader = torch.utils.data.DataLoader(train_X,
batch_size=32,
shuffle=True,
num_workers=16)
train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=4)
batch = next(iter(train_dataloader))
for k,v in batch.items():
if isinstance(v, torch.Tensor):
print(k, v.shape)
from transformers import ViTForImageClassification
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',
num_labels=10,
id2label=id2label,
label2id=label2id)
from transformers import TrainingArguments, Trainer
metric_name = "accuracy"
args = TrainingArguments(
f"test-cifar-10",
save_strategy="epoch",
evaluation_strategy="epoch",
learning_rate=2e-5,
per_device_train_batch_size=10,
per_device_eval_batch_size=4,
num_train_epochs=3,
weight_decay=0.01,
load_best_model_at_end=True,
metric_for_best_model=metric_name,
logging_dir='logs',
remove_unused_columns=False,
)
from datasets import load_metric
import numpy as np
metric = load_metric("accuracy")
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
return metric.compute(predictions=predictions, references=labels)
import torch
num_labels=self.num_labels,
id2label=self.id2label,
label2id=self.label2id)
model = ViTForImageClassification(model.config)
trainer = Trainer(
model,
args,
self.args,
train_dataset=train_ds,
eval_dataset=val_ds,
data_collator=collate_fn,
compute_metrics=compute_metrics,
tokenizer=feature_extractor,
data_collator=self.collate_fn,
compute_metrics=self.compute_metrics,
tokenizer=self.feature_extractor
)
trainer.train()
# save trained model
model_to_save = (model.module if hasattr(model, "module") else model) # Take care of distributed/parallel training
model_to_save.save_pretrained(model_path)
self.feature_extractor.save_pretrained(model_path)
torch.save(self.args, os.path.join(RESOURCE_PATH, "model", "training_args.bin"))
return
def finetunning(self, train_ds, val_ds, model_path):
self.getFeature(model_path)
# Set the transforms
train_ds.set_transform(self.train_transforms)
val_ds.set_transform(self.val_transforms)
train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=4)
batch = next(iter(train_dataloader))
for k,v in batch.items():
if isinstance(v, torch.Tensor):
print(k, v.shape)
model = ViTForImageClassification.from_pretrained(model_path,
num_labels=self.num_labels,
id2label=self.id2label,
label2id=self.label2id)
trainer = Trainer(
model,
self.args,
train_dataset=train_ds,
eval_dataset=val_ds,
data_collator=self.collate_fn,
compute_metrics=self.compute_metrics,
tokenizer=self.feature_extractor
)
trainer.train()
# save trained model
model_to_save = (model.module if hasattr(model, "module") else model) # Take care of distributed/parallel training
model_to_save.save_pretrained(model_path)
self.feature_extractor.save_pretrained(model_path)
torch.save(self.args, os.path.join(RESOURCE_PATH, "model", "training_args.bin"))
return
def getData(self, stock_code, sDate, eDate):
data = self.stock2Vector.getTrainData(stock_code, sDate, eDate)
X, Y = self.stock2Vector.getDataset2D(data)
print("Data count: ", len(X))
trans = transforms.ToPILImage()
X = [trans(torch.tensor([x])) for x in X]
split_point1 = int(len(X) * 0.9)
train_X = X[:split_point1]
train_Y = Y[:split_point1]
valid_X = X[split_point1:]
valid_Y = Y[split_point1:]
# load cifar10 (only small portion for demonstration purposes)
train_data = {'img': train_X, 'label': train_Y}
val_dsta = {'img': valid_X, 'label': valid_Y}
train_ds = Dataset.from_dict(train_data)
val_ds = Dataset.from_dict(val_dsta)
return train_ds, val_ds
if __name__ == "__main__":
PROJECT_HOME = os.getcwd()
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
model_path = os.path.join(RESOURCE_PATH, "model")
stock_code = "252670"
vitTrainer = VitTrainer(RESOURCE_PATH)
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200729", eDate="20200731")
vitTrainer.train(train_ds, val_ds, model_path)
"""
print("ym: 2020-07")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200701", eDate="20200731")
vitTrainer.train(train_ds, val_ds, model_path)
print ("ym: 2020-08")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200725", eDate="20200831")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2020-09")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200825", eDate="20200931")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2020-10")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200925", eDate="20201031")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2020-11")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201025", eDate="20201131")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2020-12")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201125", eDate="20201231")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-01")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201225", eDate="20210131")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-02")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210125", eDate="20210231")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-03")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210225", eDate="20210331")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-04")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210325", eDate="20210431")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-05")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210425", eDate="20210531")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-06")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210525", eDate="20210631")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-07")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210625", eDate="20210731")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-08")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210725", eDate="20210831")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-09")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210825", eDate="20210931")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-10")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210925", eDate="20212031")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-11")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211025", eDate="20211131")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-12")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211125", eDate="20211231")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-01")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211225", eDate="20220131")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-02")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220125", eDate="20220231")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-03")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220225", eDate="20220331")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-04")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220325", eDate="20220431")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-05")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220425", eDate="20220531")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-06")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220525", eDate="20220631")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-07")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220625", eDate="20220731")
vitTrainer.finetunning(train_ds, val_ds, model_path)
"""

View File

@@ -154,13 +154,16 @@ class Stock2Vector(HTS):
return df, minmax_df
def getTrainData(self, stock_code):
def getTrainData(self, stock_code, sDate=None, eDate=None):
tableName = 'hts'
conn = sqlite3.connect(os.path.join(self.RESOURCE_PATH, "hts.db"))
cursor = conn.cursor()
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, "20220726", "20220731"))
#cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,))
if sDate is None and eDate is None:
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,))
else:
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, sDate, eDate))
db_result = cursor.fetchall()
temp_result = []
for rows in db_result:
@@ -168,6 +171,9 @@ class Stock2Vector(HTS):
temp_result.sort(key=lambda x: (x[0], x[1]))
result = {"check": set(), "time": [], "open": [], "close": [], "high": [], "low": [], "vol": [], "label": []}
if len(db_result) == 0:
return result
for rows in temp_result:
ymd = rows[0] # hts.날짜
hms = rows[1] # hts.시간
@@ -246,9 +252,9 @@ class Stock2Vector(HTS):
return np.asarray(vector)
def getDataset2D(self, stock_code, VECTOR_SIZE = 381):
result = self.getTrainData(stock_code)
df, minmax_df = self.preprocessData(result)
def getDataset2D(self, data, VECTOR_SIZE = 381):
df, minmax_df = self.preprocessData(data)
TOTAL_X, TOTAL_Y = [], []
for key in minmax_df:
@@ -262,38 +268,24 @@ class Stock2Vector(HTS):
SIZE_WIDTH = len(TOTAL_X[0])
SIZE_HEIGHT = len(TOTAL_X)
X, Y = [], []
for i in range(VECTOR_SIZE, SIZE_WIDTH):
for i in range(VECTOR_SIZE-1, SIZE_WIDTH):
temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0)
for j in range(SIZE_HEIGHT):
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE:i]
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE+1:i+1]
X.append(temp_X)
if TOTAL_Y[0][i] == 0:
#Y.append([1, 0, 0])
Y.append(0)
elif TOTAL_Y[0][i] == 0.5:
#Y.append([0, 1, 0])
Y.append(1)
else:
#Y.append([0, 0, 1])
Y.append(2)
X = np.asarray(X)
Y = np.asarray(Y)
Y = np.asarray(Y, dtype='int64')
return X, Y
def makeDataset2D(self, stock_code, outFileName=None):
X, Y = self.getDataset2D(stock_code)
#reX = X.reshape(X.shape[0], (X.shape[1] * X.shape[2]))
#df = pd.DataFrame(np.hstack((reX, Y)))
#df.to_csv(outFileName, index=False, header=False)
return X, Y
def getDataset3D(self, stock_code, VECTOR_SIZE = 299):
result = self.getTrainData(stock_code)
df, minmax_df = self.preprocessData(result)
def getDataset3D(self, data, VECTOR_SIZE = 299):
df, minmax_df = self.preprocessData(data)
TOTAL_X, TOTAL_Y = [], []
for key in minmax_df:
@@ -338,8 +330,8 @@ if __name__ == "__main__":
for stock_code in stock_codes:
stock2Vector = Stock2Vector(RESOURCE_PATH)
# X, Y = stock2Vector.getDataset2D(stock_code)
stock2Vector.makeDataset2D(stock_code, outFileName=os.path.join(RESOURCE_PATH, "tmp", "stock_features.csv"))
# data = self.stock2Vector.getTrainData(stock_code, sDate, eDate)
# X, Y = self.stock2Vector.getDataset2D(data)
for given_day in stock_codes[stock_code]:
data, minmax_data = stock2Vector.makeData(given_day, stock_code)

View File

@@ -1,59 +1,117 @@
# tensor - numpy - PILImage 변환 (https://qlsenddl-lab.tistory.com/37)
import os
import keras
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
import random
import numpy as np
from keras.applications.imagenet_utils import decode_predictions
from classification_models.keras import Classifiers
from datasets import Dataset, load_dataset
import torch
import torchvision.transforms as transforms
from transformers import ViTFeatureExtractor, ViTForImageClassification, TrainingArguments, Trainer
from torchvision.transforms import (CenterCrop, Compose, Normalize, RandomHorizontalFlip, RandomResizedCrop, Resize, ToTensor)
from stock.util.Stock2Vector import Stock2Vector
class StockPredictor:
RESOURCE_PATH = None
stock2Vector = None
model_dir = None
predictor = None
def __init__(self):
return
def __init__(self, RESOURCE_PATH):
self.RESOURCE_PATH = RESOURCE_PATH
def getDataset(self, df):
VECTOR_SIZE = 299
TOTAL_X, TOTAL_Y = [], []
for key in df:
if key == "date":
continue
elif key == "label":
TOTAL_Y.append(df[key].tolist())
else:
TOTAL_X.append(df[key].tolist())
self.model_dir = os.path.join(RESOURCE_PATH, "tmp")
self.stock2Vector = Stock2Vector(RESOURCE_PATH)
SIZE_WIDTH = len(TOTAL_X[0])
SIZE_HEIGHT = len(TOTAL_X)
X = []
for i in range(VECTOR_SIZE, SIZE_WIDTH):
temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0)
for j in range(SIZE_HEIGHT):
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i - VECTOR_SIZE:i]
temp_X = np.stack([temp_X, temp_X, temp_X], axis=-1)
X.append(temp_X)
self.set_seed(42)
X = np.asarray(X[len(X)-1])
return X
def predict(self, df, minmax_df, isRealTime=False):
X = self.getDataset(df)
# build model
n_classes = 3
Inceptionresnetv2, preprocess_input = Classifiers.get('inceptionresnetv2')
X = preprocess_input(X)
base_model = Inceptionresnetv2(input_shape=(299, 299, 3), include_top=False)
model = keras.models.Model(inputs=[base_model.input])
checkpoint_filename = os.path.join(self.RESOURCE_PATH, "model", "stock.ckpt")
model.load_weights(checkpoint_filename)
y = model.predict(X)
# result
print(decode_predictions(y))
self.num_labels = 3
self.id2label = {0: 'none', 1: 'sell', 2: 'buy'}
self.label2id = {'none': 0, 'sell': 1, 'buy': 2}
self.trans = transforms.ToPILImage()
self.predictor = self.loadModel()
return
def set_seed(self, seed=42, n_gpu=0):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if n_gpu > 0:
torch.cuda.manual_seed_all(seed)
def loadModel(self):
feature_extractor = ViTFeatureExtractor.from_pretrained(self.model_dir)
normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
self._test_transforms = Compose(
[
Resize(feature_extractor.size),
CenterCrop(feature_extractor.size),
ToTensor(),
normalize,
]
)
model = ViTForImageClassification.from_pretrained(self.model_dir,
num_labels=self.num_labels,
id2label=self.id2label,
label2id=self.label2id)
args = TrainingArguments(
f"stock_vit_predictor",
save_strategy="epoch",
evaluation_strategy="epoch",
learning_rate=2e-5,
per_device_train_batch_size=762,
per_device_eval_batch_size=762,
weight_decay=0.01,
load_best_model_at_end=True,
metric_for_best_model="accuracy",
logging_dir='logs',
remove_unused_columns=False,
num_train_epochs=4,
)
trainer = Trainer(
model,
args,
data_collator=self.collate_fn,
tokenizer=feature_extractor,
)
return trainer
def test_transforms(self, examples):
examples['pixel_values'] = [self._test_transforms(image.convert("RGB")) for image in examples['img']]
return examples
def collate_fn(self, examples):
pixel_values = torch.stack([example["pixel_values"] for example in examples])
#labels = torch.tensor([example["label"] for example in examples])
#return {"pixel_values": pixel_values, "labels": labels}
return {"pixel_values": pixel_values}
def predict(self, X, Y=None):
print("Data count: ", len(X))
X = [self.trans(torch.tensor([x])) for x in X]
test_X = X
test_Y = Y
# load cifar10 (only small portion for demonstration purposes)
test_data = {'img': test_X, 'label': test_Y}
test_ds = Dataset.from_dict(test_data)
# Set the transforms
test_ds.set_transform(self.test_transforms)
outputs = self.predictor.predict(test_ds)
return outputs.predictions