This commit is contained in:
dsyoon
2022-08-07 00:36:18 +09:00
parent f43688f0da
commit c173a6d7dc
10 changed files with 427 additions and 203 deletions

View File

@@ -188,7 +188,7 @@ if __name__ == "__main__":
today = datetime.today() today = datetime.today()
PROJECT_HOME = os.path.join(os.path.dirname(__file__)) PROJECT_HOME = os.getcwd()
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources") RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
# KODEX 인버스 * 2 # KODEX 인버스 * 2

View File

@@ -190,7 +190,7 @@ if __name__ == "__main__":
today = datetime.today() today = datetime.today()
PROJECT_HOME = os.path.join(os.path.dirname(__file__)) PROJECT_HOME = os.getcwd()
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources") RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
# KODEX 인버스 * 2 # KODEX 인버스 * 2

View File

@@ -6,22 +6,24 @@ import os
from hts.HTS import HTS from hts.HTS import HTS
from stock.util.Stock2Vector import Stock2Vector from stock.util.Stock2Vector import Stock2Vector
from stock.util.StockPredictor import StockPredictor
from stock.util.LabelMaker import LabelMaker from stock.util.LabelMaker import LabelMaker
from stock.util.StockPredictor import StockPredictor
from hts.BuySellChecker import BuySellChecker from hts.BuySellChecker import BuySellChecker
class Simulation (HTS): class Simulation (HTS):
stock2Vector = None stock2Vector = None
buySellChecker = None buySellChecker = None
stockPredictor = None
def __init__(self, RESOURCE_PATH): def __init__(self, RESOURCE_PATH):
super().__init__(RESOURCE_PATH) super().__init__(RESOURCE_PATH)
self.RESOURCE_PATH = RESOURCE_PATH
self.stock2Vector = Stock2Vector(RESOURCE_PATH) self.stock2Vector = Stock2Vector(RESOURCE_PATH)
self.labelMaker = LabelMaker(RESOURCE_PATH) self.labelMaker = LabelMaker(RESOURCE_PATH)
self.stockPredictor = StockPredictor()
self.buySellChecker = BuySellChecker() self.buySellChecker = BuySellChecker()
self.RESOURCE_PATH = RESOURCE_PATH self.stockPredictor = StockPredictor(RESOURCE_PATH)
#self.connect() #self.connect()
return return
@@ -134,14 +136,15 @@ class Simulation (HTS):
def simulate(self, stock_code, today, method="rule"): def simulate(self, stock_code, today, method="rule"):
if method == "answer": if method == "answer":
bsLine, data = self.labelMaker.makeCandidate(stock_code, today, view=True) self.labelMaker.makeCandidate(stock_code, today, view=True)
else: else:
if method == "ml": if method == "ml":
LAST_DATA = self.stock2Vector.getLastData(stock_code, today, n=10) LAST_DATA = self.stock2Vector.getLastData(stock_code, today, n=1)
result = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA) data = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA)
X, Y = self.stock2Vector.getDataset2D(data)
df, minmax_df = self.stock2Vector.preprocessData(result) predY = self.stockPredictor.predict(X, Y)
bsLine, data = self.stockPredictor.predict(df, minmax_df, isRealTime=False) print (predY)
else: else:
LAST_DATA = self.stock2Vector.getLastData(stock_code, today) LAST_DATA = self.stock2Vector.getLastData(stock_code, today)
result = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA) result = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA)
@@ -159,17 +162,17 @@ class Simulation (HTS):
if __name__ == "__main__": if __name__ == "__main__":
PROJECT_HOME = os.path.join(os.path.dirname(__file__)) PROJECT_HOME = os.getcwd()
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources") RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
# to check bying # to check bying
stock_codes = { stock_codes = {
# 252670 # 252670
# 122630 # 122630
"252670": ['20220805'], "252670": ['20200731'],
} }
method = "" # "ml", "answer" method = "ml" # "ml", "answer"
for stock_code in stock_codes: for stock_code in stock_codes:
simulation = Simulation(RESOURCE_PATH) simulation = Simulation(RESOURCE_PATH)

View File

@@ -1,2 +1,3 @@
cd C:\workspace\DeepStock
C:\workspace\Anaconda3\envs\hts\python C:\workspace\DeepStock\StockCrawler.py C:\workspace\Anaconda3\envs\hts\python C:\workspace\DeepStock\StockCrawler.py
pause pause

View File

@@ -12,7 +12,7 @@ today = datetime.now().strftime("%Y-%m-%d")
# DB Browser for SQLite: http://hleecaster.com/python-sqlite3/ # DB Browser for SQLite: http://hleecaster.com/python-sqlite3/
PROJECT_HOME = os.path.join(os.path.dirname(__file__)) PROJECT_HOME = os.getcwd()
START_DATE = "1900.01.01" START_DATE = "1900.01.01"
start = time.time() start = time.time()

View File

@@ -49,7 +49,7 @@ class StockTrainer:
if __name__ == "__main__": if __name__ == "__main__":
PROJECT_HOME = os.path.join(os.path.dirname(__file__)) PROJECT_HOME = os.getcwd()
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources") RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
stock_codes = { stock_codes = {

View File

@@ -2,156 +2,326 @@
import os import os
os.environ['KMP_DUPLICATE_LIB_OK']='True' os.environ['KMP_DUPLICATE_LIB_OK']='True'
from datasets import Dataset import random
import numpy as np
import torch import torch
from datasets import Dataset, load_dataset
from datasets import load_metric
from transformers import TrainingArguments, Trainer
from transformers import ViTForImageClassification
from torch.utils.data import DataLoader
import torchvision.transforms as transforms import torchvision.transforms as transforms
from transformers import ViTFeatureExtractor
from torchvision.transforms import (CenterCrop, Compose, Normalize, RandomHorizontalFlip, RandomResizedCrop, Resize, ToTensor)
from stock.util.Stock2Vector import Stock2Vector from stock.util.Stock2Vector import Stock2Vector
PROJECT_HOME = os.path.join(os.path.dirname(__file__)) class VitTrainer:
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
stock2Vector = Stock2Vector(RESOURCE_PATH)
X, Y = stock2Vector.getDataset2D("252670")
trans = transforms.ToPILImage() RESOURCE_PATH = None
X = [trans(torch.tensor([x])) for x in X] stock2Vector = None
split_point1 = int(len(X)*0.7) num_labels = None
split_point2 = int(len(X)*0.9) id2label = None
train_X = X[:split_point1] label2id = None
train_Y = Y[:split_point1]
valid_X = X[split_point1:split_point2]
valid_Y = X[split_point1:split_point2]
test_X = X[split_point2:]
test_Y = X[split_point2:]
id2label = {0: '0', 1: '1', 2: '2'} args = None
label2id = {'0': 0, '1': 1, '2': 2}
# load cifar10 (only small portion for demonstration purposes) _train_transforms = None
train_data = {'img': train_X, 'label': train_Y} _val_transforms = None
val_dsta = {'img': valid_X, 'label': valid_Y}
test_data = {'img': test_X, 'label': test_Y}
train_ds = Dataset.from_dict(train_data) def __init__(self, RESOURCE_PATH):
val_ds = Dataset.from_dict(val_dsta) self.set_seed(42)
test_ds = Dataset.from_dict(test_data)
from transformers import ViTFeatureExtractor self.RESOURCE_PATH = RESOURCE_PATH
self.stock2Vector = Stock2Vector(RESOURCE_PATH)
feature_extractor = ViTFeatureExtractor() self.num_labels = 3
self.id2label = {0: 'none', 1: 'sell', 2: 'buy'}
self.label2id = {'none': 0, 'sell': 1, 'buy': 2}
from torchvision.transforms import (CenterCrop, self.args = TrainingArguments(
Compose, f"stock_vit_predictor",
Normalize, save_strategy="epoch",
RandomHorizontalFlip, evaluation_strategy="epoch",
RandomResizedCrop, learning_rate=2e-5,
Resize, per_device_train_batch_size=381,
ToTensor) per_device_eval_batch_size=381,
weight_decay=0.01,
load_best_model_at_end=True,
metric_for_best_model="accuracy",
logging_dir='logs',
remove_unused_columns=False,
num_train_epochs=20,
)
normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std) return
_train_transforms = Compose(
[
RandomResizedCrop(feature_extractor.size),
RandomHorizontalFlip(),
ToTensor(),
normalize,
]
)
_val_transforms = Compose( def set_seed(self, seed=42, n_gpu=0):
[ random.seed(seed)
Resize(feature_extractor.size), np.random.seed(seed)
CenterCrop(feature_extractor.size), torch.manual_seed(seed)
ToTensor(), if n_gpu > 0:
normalize, torch.cuda.manual_seed_all(seed)
]
)
def train_transforms(examples): def train_transforms(self, examples):
examples['pixel_values'] = [_train_transforms(image.convert("RGB")) for image in examples['img']] examples['pixel_values'] = [self._train_transforms(image.convert("RGB")) for image in examples['img']]
return examples return examples
def val_transforms(examples): def val_transforms(self, examples):
examples['pixel_values'] = [_val_transforms(image.convert("RGB")) for image in examples['img']] examples['pixel_values'] = [self._val_transforms(image.convert("RGB")) for image in examples['img']]
return examples return examples
# Set the transforms def collate_fn(self, examples):
train_ds.set_transform(train_transforms) pixel_values = torch.stack([example["pixel_values"] for example in examples])
val_ds.set_transform(val_transforms) labels = torch.tensor([example["label"] for example in examples])
test_ds.set_transform(val_transforms) return {"pixel_values": pixel_values, "labels": labels}
def compute_metrics(self, eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
metric = load_metric("accuracy")
return metric.compute(predictions=predictions, references=labels)
from torch.utils.data import DataLoader def getFeature(self, model_path=None):
import torch if model_path == None:
self.feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
#self.feature_extractor = ViTFeatureExtractor()
else:
#self.feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
self.feature_extractor = ViTFeatureExtractor.from_pretrained(model_path)
def collate_fn(examples): normalize = Normalize(mean=self.feature_extractor.image_mean, std=self.feature_extractor.image_std)
pixel_values = torch.stack([example["pixel_values"] for example in examples]) self._train_transforms = Compose(
labels = torch.tensor([example["label"] for example in examples]) [
return {"pixel_values": pixel_values, "labels": labels} RandomResizedCrop(self.feature_extractor.size),
RandomHorizontalFlip(),
ToTensor(),
normalize,
]
)
train_dataloader = DataLoader(train_ds, collate_fn=collate_fn, batch_size=4) self._val_transforms = Compose(
train_data_loader = torch.utils.data.DataLoader(train_X, [
batch_size=32, Resize(self.feature_extractor.size),
shuffle=True, CenterCrop(self.feature_extractor.size),
num_workers=16) ToTensor(),
normalize,
]
)
return
batch = next(iter(train_dataloader)) def train(self, train_ds, val_ds, model_path):
for k,v in batch.items(): self.getFeature()
if isinstance(v, torch.Tensor):
print(k, v.shape)
# Set the transforms
train_ds.set_transform(self.train_transforms)
val_ds.set_transform(self.val_transforms)
from transformers import ViTForImageClassification train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=4)
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k', batch = next(iter(train_dataloader))
num_labels=10, for k,v in batch.items():
id2label=id2label, if isinstance(v, torch.Tensor):
label2id=label2id) print(k, v.shape)
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',
num_labels=self.num_labels,
id2label=self.id2label,
label2id=self.label2id)
model = ViTForImageClassification(model.config)
from transformers import TrainingArguments, Trainer trainer = Trainer(
model,
self.args,
train_dataset=train_ds,
eval_dataset=val_ds,
data_collator=self.collate_fn,
compute_metrics=self.compute_metrics,
tokenizer=self.feature_extractor
)
metric_name = "accuracy" trainer.train()
args = TrainingArguments( # save trained model
f"test-cifar-10", model_to_save = (model.module if hasattr(model, "module") else model) # Take care of distributed/parallel training
save_strategy="epoch", model_to_save.save_pretrained(model_path)
evaluation_strategy="epoch", self.feature_extractor.save_pretrained(model_path)
learning_rate=2e-5, torch.save(self.args, os.path.join(RESOURCE_PATH, "model", "training_args.bin"))
per_device_train_batch_size=10,
per_device_eval_batch_size=4,
num_train_epochs=3,
weight_decay=0.01,
load_best_model_at_end=True,
metric_for_best_model=metric_name,
logging_dir='logs',
remove_unused_columns=False,
)
return
from datasets import load_metric def finetunning(self, train_ds, val_ds, model_path):
import numpy as np self.getFeature(model_path)
metric = load_metric("accuracy") # Set the transforms
train_ds.set_transform(self.train_transforms)
val_ds.set_transform(self.val_transforms)
def compute_metrics(eval_pred): train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=4)
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
return metric.compute(predictions=predictions, references=labels)
batch = next(iter(train_dataloader))
for k,v in batch.items():
if isinstance(v, torch.Tensor):
print(k, v.shape)
import torch model = ViTForImageClassification.from_pretrained(model_path,
num_labels=self.num_labels,
id2label=self.id2label,
label2id=self.label2id)
trainer = Trainer(
model,
self.args,
train_dataset=train_ds,
eval_dataset=val_ds,
data_collator=self.collate_fn,
compute_metrics=self.compute_metrics,
tokenizer=self.feature_extractor
)
trainer = Trainer( trainer.train()
model,
args,
train_dataset=train_ds,
eval_dataset=val_ds,
data_collator=collate_fn,
compute_metrics=compute_metrics,
tokenizer=feature_extractor,
)
# save trained model
model_to_save = (model.module if hasattr(model, "module") else model) # Take care of distributed/parallel training
model_to_save.save_pretrained(model_path)
self.feature_extractor.save_pretrained(model_path)
torch.save(self.args, os.path.join(RESOURCE_PATH, "model", "training_args.bin"))
trainer.train() return
def getData(self, stock_code, sDate, eDate):
data = self.stock2Vector.getTrainData(stock_code, sDate, eDate)
X, Y = self.stock2Vector.getDataset2D(data)
print("Data count: ", len(X))
trans = transforms.ToPILImage()
X = [trans(torch.tensor([x])) for x in X]
split_point1 = int(len(X) * 0.9)
train_X = X[:split_point1]
train_Y = Y[:split_point1]
valid_X = X[split_point1:]
valid_Y = Y[split_point1:]
# load cifar10 (only small portion for demonstration purposes)
train_data = {'img': train_X, 'label': train_Y}
val_dsta = {'img': valid_X, 'label': valid_Y}
train_ds = Dataset.from_dict(train_data)
val_ds = Dataset.from_dict(val_dsta)
return train_ds, val_ds
if __name__ == "__main__":
PROJECT_HOME = os.getcwd()
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
model_path = os.path.join(RESOURCE_PATH, "model")
stock_code = "252670"
vitTrainer = VitTrainer(RESOURCE_PATH)
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200729", eDate="20200731")
vitTrainer.train(train_ds, val_ds, model_path)
"""
print("ym: 2020-07")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200701", eDate="20200731")
vitTrainer.train(train_ds, val_ds, model_path)
print ("ym: 2020-08")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200725", eDate="20200831")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2020-09")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200825", eDate="20200931")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2020-10")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200925", eDate="20201031")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2020-11")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201025", eDate="20201131")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2020-12")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201125", eDate="20201231")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-01")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201225", eDate="20210131")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-02")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210125", eDate="20210231")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-03")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210225", eDate="20210331")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-04")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210325", eDate="20210431")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-05")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210425", eDate="20210531")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-06")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210525", eDate="20210631")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-07")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210625", eDate="20210731")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-08")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210725", eDate="20210831")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-09")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210825", eDate="20210931")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-10")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210925", eDate="20212031")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-11")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211025", eDate="20211131")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2021-12")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211125", eDate="20211231")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-01")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211225", eDate="20220131")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-02")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220125", eDate="20220231")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-03")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220225", eDate="20220331")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-04")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220325", eDate="20220431")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-05")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220425", eDate="20220531")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-06")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220525", eDate="20220631")
vitTrainer.finetunning(train_ds, val_ds, model_path)
print("ym: 2022-07")
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220625", eDate="20220731")
vitTrainer.finetunning(train_ds, val_ds, model_path)
"""

View File

@@ -154,13 +154,16 @@ class Stock2Vector(HTS):
return df, minmax_df return df, minmax_df
def getTrainData(self, stock_code): def getTrainData(self, stock_code, sDate=None, eDate=None):
tableName = 'hts' tableName = 'hts'
conn = sqlite3.connect(os.path.join(self.RESOURCE_PATH, "hts.db")) conn = sqlite3.connect(os.path.join(self.RESOURCE_PATH, "hts.db"))
cursor = conn.cursor() cursor = conn.cursor()
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, "20220726", "20220731")) if sDate is None and eDate is None:
#cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,)) cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,))
else:
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, sDate, eDate))
db_result = cursor.fetchall() db_result = cursor.fetchall()
temp_result = [] temp_result = []
for rows in db_result: for rows in db_result:
@@ -168,6 +171,9 @@ class Stock2Vector(HTS):
temp_result.sort(key=lambda x: (x[0], x[1])) temp_result.sort(key=lambda x: (x[0], x[1]))
result = {"check": set(), "time": [], "open": [], "close": [], "high": [], "low": [], "vol": [], "label": []} result = {"check": set(), "time": [], "open": [], "close": [], "high": [], "low": [], "vol": [], "label": []}
if len(db_result) == 0:
return result
for rows in temp_result: for rows in temp_result:
ymd = rows[0] # hts.날짜 ymd = rows[0] # hts.날짜
hms = rows[1] # hts.시간 hms = rows[1] # hts.시간
@@ -246,9 +252,9 @@ class Stock2Vector(HTS):
return np.asarray(vector) return np.asarray(vector)
def getDataset2D(self, stock_code, VECTOR_SIZE = 381): def getDataset2D(self, data, VECTOR_SIZE = 381):
result = self.getTrainData(stock_code)
df, minmax_df = self.preprocessData(result) df, minmax_df = self.preprocessData(data)
TOTAL_X, TOTAL_Y = [], [] TOTAL_X, TOTAL_Y = [], []
for key in minmax_df: for key in minmax_df:
@@ -262,38 +268,24 @@ class Stock2Vector(HTS):
SIZE_WIDTH = len(TOTAL_X[0]) SIZE_WIDTH = len(TOTAL_X[0])
SIZE_HEIGHT = len(TOTAL_X) SIZE_HEIGHT = len(TOTAL_X)
X, Y = [], [] X, Y = [], []
for i in range(VECTOR_SIZE, SIZE_WIDTH): for i in range(VECTOR_SIZE-1, SIZE_WIDTH):
temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0) temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0)
for j in range(SIZE_HEIGHT): for j in range(SIZE_HEIGHT):
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE:i] temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE+1:i+1]
X.append(temp_X) X.append(temp_X)
if TOTAL_Y[0][i] == 0: if TOTAL_Y[0][i] == 0:
#Y.append([1, 0, 0])
Y.append(0) Y.append(0)
elif TOTAL_Y[0][i] == 0.5: elif TOTAL_Y[0][i] == 0.5:
#Y.append([0, 1, 0])
Y.append(1) Y.append(1)
else: else:
#Y.append([0, 0, 1])
Y.append(2) Y.append(2)
X = np.asarray(X) X = np.asarray(X)
Y = np.asarray(Y) Y = np.asarray(Y, dtype='int64')
return X, Y return X, Y
def makeDataset2D(self, stock_code, outFileName=None): def getDataset3D(self, data, VECTOR_SIZE = 299):
X, Y = self.getDataset2D(stock_code) df, minmax_df = self.preprocessData(data)
#reX = X.reshape(X.shape[0], (X.shape[1] * X.shape[2]))
#df = pd.DataFrame(np.hstack((reX, Y)))
#df.to_csv(outFileName, index=False, header=False)
return X, Y
def getDataset3D(self, stock_code, VECTOR_SIZE = 299):
result = self.getTrainData(stock_code)
df, minmax_df = self.preprocessData(result)
TOTAL_X, TOTAL_Y = [], [] TOTAL_X, TOTAL_Y = [], []
for key in minmax_df: for key in minmax_df:
@@ -338,8 +330,8 @@ if __name__ == "__main__":
for stock_code in stock_codes: for stock_code in stock_codes:
stock2Vector = Stock2Vector(RESOURCE_PATH) stock2Vector = Stock2Vector(RESOURCE_PATH)
# X, Y = stock2Vector.getDataset2D(stock_code) # data = self.stock2Vector.getTrainData(stock_code, sDate, eDate)
stock2Vector.makeDataset2D(stock_code, outFileName=os.path.join(RESOURCE_PATH, "tmp", "stock_features.csv")) # X, Y = self.stock2Vector.getDataset2D(data)
for given_day in stock_codes[stock_code]: for given_day in stock_codes[stock_code]:
data, minmax_data = stock2Vector.makeData(given_day, stock_code) data, minmax_data = stock2Vector.makeData(given_day, stock_code)

View File

@@ -1,59 +1,117 @@
# tensor - numpy - PILImage 변환 (https://qlsenddl-lab.tistory.com/37)
import os import os
import keras
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
import random
import numpy as np import numpy as np
from keras.applications.imagenet_utils import decode_predictions from datasets import Dataset, load_dataset
from classification_models.keras import Classifiers import torch
import torchvision.transforms as transforms
from transformers import ViTFeatureExtractor, ViTForImageClassification, TrainingArguments, Trainer
from torchvision.transforms import (CenterCrop, Compose, Normalize, RandomHorizontalFlip, RandomResizedCrop, Resize, ToTensor)
from stock.util.Stock2Vector import Stock2Vector
class StockPredictor: class StockPredictor:
RESOURCE_PATH = None RESOURCE_PATH = None
stock2Vector = None stock2Vector = None
model_dir = None
predictor = None
def __init__(self): def __init__(self, RESOURCE_PATH):
return self.RESOURCE_PATH = RESOURCE_PATH
def getDataset(self, df): self.model_dir = os.path.join(RESOURCE_PATH, "tmp")
VECTOR_SIZE = 299 self.stock2Vector = Stock2Vector(RESOURCE_PATH)
TOTAL_X, TOTAL_Y = [], []
for key in df:
if key == "date":
continue
elif key == "label":
TOTAL_Y.append(df[key].tolist())
else:
TOTAL_X.append(df[key].tolist())
SIZE_WIDTH = len(TOTAL_X[0]) self.set_seed(42)
SIZE_HEIGHT = len(TOTAL_X)
X = []
for i in range(VECTOR_SIZE, SIZE_WIDTH):
temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0)
for j in range(SIZE_HEIGHT):
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i - VECTOR_SIZE:i]
temp_X = np.stack([temp_X, temp_X, temp_X], axis=-1)
X.append(temp_X)
X = np.asarray(X[len(X)-1]) self.num_labels = 3
self.id2label = {0: 'none', 1: 'sell', 2: 'buy'}
return X self.label2id = {'none': 0, 'sell': 1, 'buy': 2}
def predict(self, df, minmax_df, isRealTime=False):
X = self.getDataset(df)
# build model
n_classes = 3
Inceptionresnetv2, preprocess_input = Classifiers.get('inceptionresnetv2')
X = preprocess_input(X)
base_model = Inceptionresnetv2(input_shape=(299, 299, 3), include_top=False)
model = keras.models.Model(inputs=[base_model.input])
checkpoint_filename = os.path.join(self.RESOURCE_PATH, "model", "stock.ckpt")
model.load_weights(checkpoint_filename)
y = model.predict(X)
# result
print(decode_predictions(y))
self.trans = transforms.ToPILImage()
self.predictor = self.loadModel()
return return
def set_seed(self, seed=42, n_gpu=0):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if n_gpu > 0:
torch.cuda.manual_seed_all(seed)
def loadModel(self):
feature_extractor = ViTFeatureExtractor.from_pretrained(self.model_dir)
normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
self._test_transforms = Compose(
[
Resize(feature_extractor.size),
CenterCrop(feature_extractor.size),
ToTensor(),
normalize,
]
)
model = ViTForImageClassification.from_pretrained(self.model_dir,
num_labels=self.num_labels,
id2label=self.id2label,
label2id=self.label2id)
args = TrainingArguments(
f"stock_vit_predictor",
save_strategy="epoch",
evaluation_strategy="epoch",
learning_rate=2e-5,
per_device_train_batch_size=762,
per_device_eval_batch_size=762,
weight_decay=0.01,
load_best_model_at_end=True,
metric_for_best_model="accuracy",
logging_dir='logs',
remove_unused_columns=False,
num_train_epochs=4,
)
trainer = Trainer(
model,
args,
data_collator=self.collate_fn,
tokenizer=feature_extractor,
)
return trainer
def test_transforms(self, examples):
examples['pixel_values'] = [self._test_transforms(image.convert("RGB")) for image in examples['img']]
return examples
def collate_fn(self, examples):
pixel_values = torch.stack([example["pixel_values"] for example in examples])
#labels = torch.tensor([example["label"] for example in examples])
#return {"pixel_values": pixel_values, "labels": labels}
return {"pixel_values": pixel_values}
def predict(self, X, Y=None):
print("Data count: ", len(X))
X = [self.trans(torch.tensor([x])) for x in X]
test_X = X
test_Y = Y
# load cifar10 (only small portion for demonstration purposes)
test_data = {'img': test_X, 'label': test_Y}
test_ds = Dataset.from_dict(test_data)
# Set the transforms
test_ds.set_transform(self.test_transforms)
outputs = self.predictor.predict(test_ds)
return outputs.predictions