init
This commit is contained in:
@@ -188,7 +188,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
today = datetime.today()
|
today = datetime.today()
|
||||||
|
|
||||||
PROJECT_HOME = os.path.join(os.path.dirname(__file__))
|
PROJECT_HOME = os.getcwd()
|
||||||
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
|
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
|
||||||
|
|
||||||
# KODEX 인버스 * 2
|
# KODEX 인버스 * 2
|
||||||
|
|||||||
@@ -190,7 +190,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
today = datetime.today()
|
today = datetime.today()
|
||||||
|
|
||||||
PROJECT_HOME = os.path.join(os.path.dirname(__file__))
|
PROJECT_HOME = os.getcwd()
|
||||||
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
|
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
|
||||||
|
|
||||||
# KODEX 인버스 * 2
|
# KODEX 인버스 * 2
|
||||||
|
|||||||
@@ -6,22 +6,24 @@ import os
|
|||||||
|
|
||||||
from hts.HTS import HTS
|
from hts.HTS import HTS
|
||||||
from stock.util.Stock2Vector import Stock2Vector
|
from stock.util.Stock2Vector import Stock2Vector
|
||||||
from stock.util.StockPredictor import StockPredictor
|
|
||||||
from stock.util.LabelMaker import LabelMaker
|
from stock.util.LabelMaker import LabelMaker
|
||||||
|
from stock.util.StockPredictor import StockPredictor
|
||||||
from hts.BuySellChecker import BuySellChecker
|
from hts.BuySellChecker import BuySellChecker
|
||||||
|
|
||||||
class Simulation (HTS):
|
class Simulation (HTS):
|
||||||
stock2Vector = None
|
stock2Vector = None
|
||||||
buySellChecker = None
|
buySellChecker = None
|
||||||
|
stockPredictor = None
|
||||||
|
|
||||||
def __init__(self, RESOURCE_PATH):
|
def __init__(self, RESOURCE_PATH):
|
||||||
super().__init__(RESOURCE_PATH)
|
super().__init__(RESOURCE_PATH)
|
||||||
|
|
||||||
|
self.RESOURCE_PATH = RESOURCE_PATH
|
||||||
|
|
||||||
self.stock2Vector = Stock2Vector(RESOURCE_PATH)
|
self.stock2Vector = Stock2Vector(RESOURCE_PATH)
|
||||||
self.labelMaker = LabelMaker(RESOURCE_PATH)
|
self.labelMaker = LabelMaker(RESOURCE_PATH)
|
||||||
self.stockPredictor = StockPredictor()
|
|
||||||
self.buySellChecker = BuySellChecker()
|
self.buySellChecker = BuySellChecker()
|
||||||
self.RESOURCE_PATH = RESOURCE_PATH
|
self.stockPredictor = StockPredictor(RESOURCE_PATH)
|
||||||
#self.connect()
|
#self.connect()
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -134,14 +136,15 @@ class Simulation (HTS):
|
|||||||
def simulate(self, stock_code, today, method="rule"):
|
def simulate(self, stock_code, today, method="rule"):
|
||||||
|
|
||||||
if method == "answer":
|
if method == "answer":
|
||||||
bsLine, data = self.labelMaker.makeCandidate(stock_code, today, view=True)
|
self.labelMaker.makeCandidate(stock_code, today, view=True)
|
||||||
else:
|
else:
|
||||||
if method == "ml":
|
if method == "ml":
|
||||||
LAST_DATA = self.stock2Vector.getLastData(stock_code, today, n=10)
|
LAST_DATA = self.stock2Vector.getLastData(stock_code, today, n=1)
|
||||||
result = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA)
|
data = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA)
|
||||||
|
X, Y = self.stock2Vector.getDataset2D(data)
|
||||||
|
|
||||||
df, minmax_df = self.stock2Vector.preprocessData(result)
|
predY = self.stockPredictor.predict(X, Y)
|
||||||
bsLine, data = self.stockPredictor.predict(df, minmax_df, isRealTime=False)
|
print (predY)
|
||||||
else:
|
else:
|
||||||
LAST_DATA = self.stock2Vector.getLastData(stock_code, today)
|
LAST_DATA = self.stock2Vector.getLastData(stock_code, today)
|
||||||
result = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA)
|
result = self.stock2Vector.getRealTime(stock_code, today, LAST_DATA)
|
||||||
@@ -159,17 +162,17 @@ class Simulation (HTS):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
PROJECT_HOME = os.path.join(os.path.dirname(__file__))
|
PROJECT_HOME = os.getcwd()
|
||||||
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
|
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
|
||||||
|
|
||||||
# to check bying
|
# to check bying
|
||||||
stock_codes = {
|
stock_codes = {
|
||||||
# 252670
|
# 252670
|
||||||
# 122630
|
# 122630
|
||||||
"252670": ['20220805'],
|
"252670": ['20200731'],
|
||||||
}
|
}
|
||||||
|
|
||||||
method = "" # "ml", "answer"
|
method = "ml" # "ml", "answer"
|
||||||
for stock_code in stock_codes:
|
for stock_code in stock_codes:
|
||||||
simulation = Simulation(RESOURCE_PATH)
|
simulation = Simulation(RESOURCE_PATH)
|
||||||
|
|
||||||
|
|||||||
@@ -1,2 +1,3 @@
|
|||||||
|
cd C:\workspace\DeepStock
|
||||||
C:\workspace\Anaconda3\envs\hts\python C:\workspace\DeepStock\StockCrawler.py
|
C:\workspace\Anaconda3\envs\hts\python C:\workspace\DeepStock\StockCrawler.py
|
||||||
pause
|
pause
|
||||||
@@ -12,7 +12,7 @@ today = datetime.now().strftime("%Y-%m-%d")
|
|||||||
|
|
||||||
# DB Browser for SQLite: http://hleecaster.com/python-sqlite3/
|
# DB Browser for SQLite: http://hleecaster.com/python-sqlite3/
|
||||||
|
|
||||||
PROJECT_HOME = os.path.join(os.path.dirname(__file__))
|
PROJECT_HOME = os.getcwd()
|
||||||
|
|
||||||
START_DATE = "1900.01.01"
|
START_DATE = "1900.01.01"
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ class StockTrainer:
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
PROJECT_HOME = os.path.join(os.path.dirname(__file__))
|
PROJECT_HOME = os.getcwd()
|
||||||
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
|
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
|
||||||
|
|
||||||
stock_codes = {
|
stock_codes = {
|
||||||
|
|||||||
398
VitTrainer.py
398
VitTrainer.py
@@ -2,156 +2,326 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
os.environ['KMP_DUPLICATE_LIB_OK']='True'
|
os.environ['KMP_DUPLICATE_LIB_OK']='True'
|
||||||
from datasets import Dataset
|
import random
|
||||||
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
from datasets import Dataset, load_dataset
|
||||||
|
from datasets import load_metric
|
||||||
|
from transformers import TrainingArguments, Trainer
|
||||||
|
from transformers import ViTForImageClassification
|
||||||
|
from torch.utils.data import DataLoader
|
||||||
import torchvision.transforms as transforms
|
import torchvision.transforms as transforms
|
||||||
|
from transformers import ViTFeatureExtractor
|
||||||
|
from torchvision.transforms import (CenterCrop, Compose, Normalize, RandomHorizontalFlip, RandomResizedCrop, Resize, ToTensor)
|
||||||
|
|
||||||
from stock.util.Stock2Vector import Stock2Vector
|
from stock.util.Stock2Vector import Stock2Vector
|
||||||
|
|
||||||
PROJECT_HOME = os.path.join(os.path.dirname(__file__))
|
class VitTrainer:
|
||||||
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
|
|
||||||
stock2Vector = Stock2Vector(RESOURCE_PATH)
|
|
||||||
X, Y = stock2Vector.getDataset2D("252670")
|
|
||||||
|
|
||||||
trans = transforms.ToPILImage()
|
RESOURCE_PATH = None
|
||||||
X = [trans(torch.tensor([x])) for x in X]
|
stock2Vector = None
|
||||||
|
|
||||||
split_point1 = int(len(X)*0.7)
|
num_labels = None
|
||||||
split_point2 = int(len(X)*0.9)
|
id2label = None
|
||||||
train_X = X[:split_point1]
|
label2id = None
|
||||||
train_Y = Y[:split_point1]
|
|
||||||
valid_X = X[split_point1:split_point2]
|
|
||||||
valid_Y = X[split_point1:split_point2]
|
|
||||||
test_X = X[split_point2:]
|
|
||||||
test_Y = X[split_point2:]
|
|
||||||
|
|
||||||
id2label = {0: '0', 1: '1', 2: '2'}
|
args = None
|
||||||
label2id = {'0': 0, '1': 1, '2': 2}
|
|
||||||
|
|
||||||
# load cifar10 (only small portion for demonstration purposes)
|
_train_transforms = None
|
||||||
train_data = {'img': train_X, 'label': train_Y}
|
_val_transforms = None
|
||||||
val_dsta = {'img': valid_X, 'label': valid_Y}
|
|
||||||
test_data = {'img': test_X, 'label': test_Y}
|
|
||||||
|
|
||||||
train_ds = Dataset.from_dict(train_data)
|
def __init__(self, RESOURCE_PATH):
|
||||||
val_ds = Dataset.from_dict(val_dsta)
|
self.set_seed(42)
|
||||||
test_ds = Dataset.from_dict(test_data)
|
|
||||||
|
|
||||||
from transformers import ViTFeatureExtractor
|
self.RESOURCE_PATH = RESOURCE_PATH
|
||||||
|
self.stock2Vector = Stock2Vector(RESOURCE_PATH)
|
||||||
|
|
||||||
feature_extractor = ViTFeatureExtractor()
|
self.num_labels = 3
|
||||||
|
self.id2label = {0: 'none', 1: 'sell', 2: 'buy'}
|
||||||
|
self.label2id = {'none': 0, 'sell': 1, 'buy': 2}
|
||||||
|
|
||||||
from torchvision.transforms import (CenterCrop,
|
self.args = TrainingArguments(
|
||||||
Compose,
|
f"stock_vit_predictor",
|
||||||
Normalize,
|
save_strategy="epoch",
|
||||||
RandomHorizontalFlip,
|
evaluation_strategy="epoch",
|
||||||
RandomResizedCrop,
|
learning_rate=2e-5,
|
||||||
Resize,
|
per_device_train_batch_size=381,
|
||||||
ToTensor)
|
per_device_eval_batch_size=381,
|
||||||
|
weight_decay=0.01,
|
||||||
|
load_best_model_at_end=True,
|
||||||
|
metric_for_best_model="accuracy",
|
||||||
|
logging_dir='logs',
|
||||||
|
remove_unused_columns=False,
|
||||||
|
num_train_epochs=20,
|
||||||
|
)
|
||||||
|
|
||||||
normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
|
return
|
||||||
_train_transforms = Compose(
|
|
||||||
[
|
|
||||||
RandomResizedCrop(feature_extractor.size),
|
|
||||||
RandomHorizontalFlip(),
|
|
||||||
ToTensor(),
|
|
||||||
normalize,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
_val_transforms = Compose(
|
def set_seed(self, seed=42, n_gpu=0):
|
||||||
[
|
random.seed(seed)
|
||||||
Resize(feature_extractor.size),
|
np.random.seed(seed)
|
||||||
CenterCrop(feature_extractor.size),
|
torch.manual_seed(seed)
|
||||||
ToTensor(),
|
if n_gpu > 0:
|
||||||
normalize,
|
torch.cuda.manual_seed_all(seed)
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
def train_transforms(examples):
|
def train_transforms(self, examples):
|
||||||
examples['pixel_values'] = [_train_transforms(image.convert("RGB")) for image in examples['img']]
|
examples['pixel_values'] = [self._train_transforms(image.convert("RGB")) for image in examples['img']]
|
||||||
return examples
|
return examples
|
||||||
|
|
||||||
def val_transforms(examples):
|
def val_transforms(self, examples):
|
||||||
examples['pixel_values'] = [_val_transforms(image.convert("RGB")) for image in examples['img']]
|
examples['pixel_values'] = [self._val_transforms(image.convert("RGB")) for image in examples['img']]
|
||||||
return examples
|
return examples
|
||||||
|
|
||||||
# Set the transforms
|
def collate_fn(self, examples):
|
||||||
train_ds.set_transform(train_transforms)
|
pixel_values = torch.stack([example["pixel_values"] for example in examples])
|
||||||
val_ds.set_transform(val_transforms)
|
labels = torch.tensor([example["label"] for example in examples])
|
||||||
test_ds.set_transform(val_transforms)
|
return {"pixel_values": pixel_values, "labels": labels}
|
||||||
|
|
||||||
|
def compute_metrics(self, eval_pred):
|
||||||
|
predictions, labels = eval_pred
|
||||||
|
predictions = np.argmax(predictions, axis=1)
|
||||||
|
metric = load_metric("accuracy")
|
||||||
|
return metric.compute(predictions=predictions, references=labels)
|
||||||
|
|
||||||
from torch.utils.data import DataLoader
|
def getFeature(self, model_path=None):
|
||||||
import torch
|
if model_path == None:
|
||||||
|
self.feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
|
||||||
|
#self.feature_extractor = ViTFeatureExtractor()
|
||||||
|
else:
|
||||||
|
#self.feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
|
||||||
|
self.feature_extractor = ViTFeatureExtractor.from_pretrained(model_path)
|
||||||
|
|
||||||
def collate_fn(examples):
|
normalize = Normalize(mean=self.feature_extractor.image_mean, std=self.feature_extractor.image_std)
|
||||||
pixel_values = torch.stack([example["pixel_values"] for example in examples])
|
self._train_transforms = Compose(
|
||||||
labels = torch.tensor([example["label"] for example in examples])
|
[
|
||||||
return {"pixel_values": pixel_values, "labels": labels}
|
RandomResizedCrop(self.feature_extractor.size),
|
||||||
|
RandomHorizontalFlip(),
|
||||||
|
ToTensor(),
|
||||||
|
normalize,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
train_dataloader = DataLoader(train_ds, collate_fn=collate_fn, batch_size=4)
|
self._val_transforms = Compose(
|
||||||
train_data_loader = torch.utils.data.DataLoader(train_X,
|
[
|
||||||
batch_size=32,
|
Resize(self.feature_extractor.size),
|
||||||
shuffle=True,
|
CenterCrop(self.feature_extractor.size),
|
||||||
num_workers=16)
|
ToTensor(),
|
||||||
|
normalize,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
batch = next(iter(train_dataloader))
|
def train(self, train_ds, val_ds, model_path):
|
||||||
for k,v in batch.items():
|
self.getFeature()
|
||||||
if isinstance(v, torch.Tensor):
|
|
||||||
print(k, v.shape)
|
|
||||||
|
|
||||||
|
# Set the transforms
|
||||||
|
train_ds.set_transform(self.train_transforms)
|
||||||
|
val_ds.set_transform(self.val_transforms)
|
||||||
|
|
||||||
from transformers import ViTForImageClassification
|
train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=4)
|
||||||
|
|
||||||
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',
|
batch = next(iter(train_dataloader))
|
||||||
num_labels=10,
|
for k,v in batch.items():
|
||||||
id2label=id2label,
|
if isinstance(v, torch.Tensor):
|
||||||
label2id=label2id)
|
print(k, v.shape)
|
||||||
|
|
||||||
|
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k',
|
||||||
|
num_labels=self.num_labels,
|
||||||
|
id2label=self.id2label,
|
||||||
|
label2id=self.label2id)
|
||||||
|
model = ViTForImageClassification(model.config)
|
||||||
|
|
||||||
from transformers import TrainingArguments, Trainer
|
trainer = Trainer(
|
||||||
|
model,
|
||||||
|
self.args,
|
||||||
|
train_dataset=train_ds,
|
||||||
|
eval_dataset=val_ds,
|
||||||
|
data_collator=self.collate_fn,
|
||||||
|
compute_metrics=self.compute_metrics,
|
||||||
|
tokenizer=self.feature_extractor
|
||||||
|
)
|
||||||
|
|
||||||
metric_name = "accuracy"
|
trainer.train()
|
||||||
|
|
||||||
args = TrainingArguments(
|
# save trained model
|
||||||
f"test-cifar-10",
|
model_to_save = (model.module if hasattr(model, "module") else model) # Take care of distributed/parallel training
|
||||||
save_strategy="epoch",
|
model_to_save.save_pretrained(model_path)
|
||||||
evaluation_strategy="epoch",
|
self.feature_extractor.save_pretrained(model_path)
|
||||||
learning_rate=2e-5,
|
torch.save(self.args, os.path.join(RESOURCE_PATH, "model", "training_args.bin"))
|
||||||
per_device_train_batch_size=10,
|
|
||||||
per_device_eval_batch_size=4,
|
|
||||||
num_train_epochs=3,
|
|
||||||
weight_decay=0.01,
|
|
||||||
load_best_model_at_end=True,
|
|
||||||
metric_for_best_model=metric_name,
|
|
||||||
logging_dir='logs',
|
|
||||||
remove_unused_columns=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
from datasets import load_metric
|
def finetunning(self, train_ds, val_ds, model_path):
|
||||||
import numpy as np
|
self.getFeature(model_path)
|
||||||
|
|
||||||
metric = load_metric("accuracy")
|
# Set the transforms
|
||||||
|
train_ds.set_transform(self.train_transforms)
|
||||||
|
val_ds.set_transform(self.val_transforms)
|
||||||
|
|
||||||
def compute_metrics(eval_pred):
|
train_dataloader = DataLoader(train_ds, collate_fn=self.collate_fn, batch_size=4)
|
||||||
predictions, labels = eval_pred
|
|
||||||
predictions = np.argmax(predictions, axis=1)
|
|
||||||
return metric.compute(predictions=predictions, references=labels)
|
|
||||||
|
|
||||||
|
batch = next(iter(train_dataloader))
|
||||||
|
for k,v in batch.items():
|
||||||
|
if isinstance(v, torch.Tensor):
|
||||||
|
print(k, v.shape)
|
||||||
|
|
||||||
import torch
|
model = ViTForImageClassification.from_pretrained(model_path,
|
||||||
|
num_labels=self.num_labels,
|
||||||
|
id2label=self.id2label,
|
||||||
|
label2id=self.label2id)
|
||||||
|
trainer = Trainer(
|
||||||
|
model,
|
||||||
|
self.args,
|
||||||
|
train_dataset=train_ds,
|
||||||
|
eval_dataset=val_ds,
|
||||||
|
data_collator=self.collate_fn,
|
||||||
|
compute_metrics=self.compute_metrics,
|
||||||
|
tokenizer=self.feature_extractor
|
||||||
|
)
|
||||||
|
|
||||||
trainer = Trainer(
|
trainer.train()
|
||||||
model,
|
|
||||||
args,
|
|
||||||
train_dataset=train_ds,
|
|
||||||
eval_dataset=val_ds,
|
|
||||||
data_collator=collate_fn,
|
|
||||||
compute_metrics=compute_metrics,
|
|
||||||
tokenizer=feature_extractor,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
# save trained model
|
||||||
|
model_to_save = (model.module if hasattr(model, "module") else model) # Take care of distributed/parallel training
|
||||||
|
model_to_save.save_pretrained(model_path)
|
||||||
|
self.feature_extractor.save_pretrained(model_path)
|
||||||
|
torch.save(self.args, os.path.join(RESOURCE_PATH, "model", "training_args.bin"))
|
||||||
|
|
||||||
trainer.train()
|
return
|
||||||
|
|
||||||
|
def getData(self, stock_code, sDate, eDate):
|
||||||
|
data = self.stock2Vector.getTrainData(stock_code, sDate, eDate)
|
||||||
|
X, Y = self.stock2Vector.getDataset2D(data)
|
||||||
|
print("Data count: ", len(X))
|
||||||
|
|
||||||
|
trans = transforms.ToPILImage()
|
||||||
|
X = [trans(torch.tensor([x])) for x in X]
|
||||||
|
|
||||||
|
split_point1 = int(len(X) * 0.9)
|
||||||
|
train_X = X[:split_point1]
|
||||||
|
train_Y = Y[:split_point1]
|
||||||
|
valid_X = X[split_point1:]
|
||||||
|
valid_Y = Y[split_point1:]
|
||||||
|
|
||||||
|
# load cifar10 (only small portion for demonstration purposes)
|
||||||
|
train_data = {'img': train_X, 'label': train_Y}
|
||||||
|
val_dsta = {'img': valid_X, 'label': valid_Y}
|
||||||
|
|
||||||
|
train_ds = Dataset.from_dict(train_data)
|
||||||
|
val_ds = Dataset.from_dict(val_dsta)
|
||||||
|
|
||||||
|
return train_ds, val_ds
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
PROJECT_HOME = os.getcwd()
|
||||||
|
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
|
||||||
|
model_path = os.path.join(RESOURCE_PATH, "model")
|
||||||
|
|
||||||
|
stock_code = "252670"
|
||||||
|
vitTrainer = VitTrainer(RESOURCE_PATH)
|
||||||
|
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200729", eDate="20200731")
|
||||||
|
vitTrainer.train(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
"""
|
||||||
|
print("ym: 2020-07")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200701", eDate="20200731")
|
||||||
|
vitTrainer.train(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print ("ym: 2020-08")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200725", eDate="20200831")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2020-09")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200825", eDate="20200931")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2020-10")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20200925", eDate="20201031")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2020-11")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201025", eDate="20201131")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2020-12")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201125", eDate="20201231")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2021-01")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20201225", eDate="20210131")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2021-02")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210125", eDate="20210231")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2021-03")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210225", eDate="20210331")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2021-04")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210325", eDate="20210431")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2021-05")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210425", eDate="20210531")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2021-06")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210525", eDate="20210631")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2021-07")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210625", eDate="20210731")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2021-08")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210725", eDate="20210831")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2021-09")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210825", eDate="20210931")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2021-10")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20210925", eDate="20212031")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2021-11")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211025", eDate="20211131")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2021-12")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211125", eDate="20211231")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2022-01")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20211225", eDate="20220131")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2022-02")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220125", eDate="20220231")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2022-03")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220225", eDate="20220331")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2022-04")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220325", eDate="20220431")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2022-05")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220425", eDate="20220531")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2022-06")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220525", eDate="20220631")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
|
||||||
|
print("ym: 2022-07")
|
||||||
|
train_ds, val_ds = vitTrainer.getData(stock_code, sDate="20220625", eDate="20220731")
|
||||||
|
vitTrainer.finetunning(train_ds, val_ds, model_path)
|
||||||
|
"""
|
||||||
@@ -154,13 +154,16 @@ class Stock2Vector(HTS):
|
|||||||
|
|
||||||
return df, minmax_df
|
return df, minmax_df
|
||||||
|
|
||||||
def getTrainData(self, stock_code):
|
def getTrainData(self, stock_code, sDate=None, eDate=None):
|
||||||
tableName = 'hts'
|
tableName = 'hts'
|
||||||
conn = sqlite3.connect(os.path.join(self.RESOURCE_PATH, "hts.db"))
|
conn = sqlite3.connect(os.path.join(self.RESOURCE_PATH, "hts.db"))
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
|
|
||||||
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, "20220726", "20220731"))
|
if sDate is None and eDate is None:
|
||||||
#cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,))
|
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,))
|
||||||
|
else:
|
||||||
|
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, sDate, eDate))
|
||||||
|
|
||||||
db_result = cursor.fetchall()
|
db_result = cursor.fetchall()
|
||||||
temp_result = []
|
temp_result = []
|
||||||
for rows in db_result:
|
for rows in db_result:
|
||||||
@@ -168,6 +171,9 @@ class Stock2Vector(HTS):
|
|||||||
temp_result.sort(key=lambda x: (x[0], x[1]))
|
temp_result.sort(key=lambda x: (x[0], x[1]))
|
||||||
|
|
||||||
result = {"check": set(), "time": [], "open": [], "close": [], "high": [], "low": [], "vol": [], "label": []}
|
result = {"check": set(), "time": [], "open": [], "close": [], "high": [], "low": [], "vol": [], "label": []}
|
||||||
|
if len(db_result) == 0:
|
||||||
|
return result
|
||||||
|
|
||||||
for rows in temp_result:
|
for rows in temp_result:
|
||||||
ymd = rows[0] # hts.날짜
|
ymd = rows[0] # hts.날짜
|
||||||
hms = rows[1] # hts.시간
|
hms = rows[1] # hts.시간
|
||||||
@@ -246,9 +252,9 @@ class Stock2Vector(HTS):
|
|||||||
|
|
||||||
return np.asarray(vector)
|
return np.asarray(vector)
|
||||||
|
|
||||||
def getDataset2D(self, stock_code, VECTOR_SIZE = 381):
|
def getDataset2D(self, data, VECTOR_SIZE = 381):
|
||||||
result = self.getTrainData(stock_code)
|
|
||||||
df, minmax_df = self.preprocessData(result)
|
df, minmax_df = self.preprocessData(data)
|
||||||
|
|
||||||
TOTAL_X, TOTAL_Y = [], []
|
TOTAL_X, TOTAL_Y = [], []
|
||||||
for key in minmax_df:
|
for key in minmax_df:
|
||||||
@@ -262,38 +268,24 @@ class Stock2Vector(HTS):
|
|||||||
SIZE_WIDTH = len(TOTAL_X[0])
|
SIZE_WIDTH = len(TOTAL_X[0])
|
||||||
SIZE_HEIGHT = len(TOTAL_X)
|
SIZE_HEIGHT = len(TOTAL_X)
|
||||||
X, Y = [], []
|
X, Y = [], []
|
||||||
for i in range(VECTOR_SIZE, SIZE_WIDTH):
|
for i in range(VECTOR_SIZE-1, SIZE_WIDTH):
|
||||||
temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0)
|
temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0)
|
||||||
for j in range(SIZE_HEIGHT):
|
for j in range(SIZE_HEIGHT):
|
||||||
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE:i]
|
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE+1:i+1]
|
||||||
X.append(temp_X)
|
X.append(temp_X)
|
||||||
if TOTAL_Y[0][i] == 0:
|
if TOTAL_Y[0][i] == 0:
|
||||||
#Y.append([1, 0, 0])
|
|
||||||
Y.append(0)
|
Y.append(0)
|
||||||
elif TOTAL_Y[0][i] == 0.5:
|
elif TOTAL_Y[0][i] == 0.5:
|
||||||
#Y.append([0, 1, 0])
|
|
||||||
Y.append(1)
|
Y.append(1)
|
||||||
else:
|
else:
|
||||||
#Y.append([0, 0, 1])
|
|
||||||
Y.append(2)
|
Y.append(2)
|
||||||
|
|
||||||
X = np.asarray(X)
|
X = np.asarray(X)
|
||||||
Y = np.asarray(Y)
|
Y = np.asarray(Y, dtype='int64')
|
||||||
return X, Y
|
return X, Y
|
||||||
|
|
||||||
def makeDataset2D(self, stock_code, outFileName=None):
|
def getDataset3D(self, data, VECTOR_SIZE = 299):
|
||||||
X, Y = self.getDataset2D(stock_code)
|
df, minmax_df = self.preprocessData(data)
|
||||||
|
|
||||||
#reX = X.reshape(X.shape[0], (X.shape[1] * X.shape[2]))
|
|
||||||
#df = pd.DataFrame(np.hstack((reX, Y)))
|
|
||||||
#df.to_csv(outFileName, index=False, header=False)
|
|
||||||
|
|
||||||
|
|
||||||
return X, Y
|
|
||||||
|
|
||||||
def getDataset3D(self, stock_code, VECTOR_SIZE = 299):
|
|
||||||
result = self.getTrainData(stock_code)
|
|
||||||
df, minmax_df = self.preprocessData(result)
|
|
||||||
|
|
||||||
TOTAL_X, TOTAL_Y = [], []
|
TOTAL_X, TOTAL_Y = [], []
|
||||||
for key in minmax_df:
|
for key in minmax_df:
|
||||||
@@ -338,8 +330,8 @@ if __name__ == "__main__":
|
|||||||
for stock_code in stock_codes:
|
for stock_code in stock_codes:
|
||||||
stock2Vector = Stock2Vector(RESOURCE_PATH)
|
stock2Vector = Stock2Vector(RESOURCE_PATH)
|
||||||
|
|
||||||
# X, Y = stock2Vector.getDataset2D(stock_code)
|
# data = self.stock2Vector.getTrainData(stock_code, sDate, eDate)
|
||||||
stock2Vector.makeDataset2D(stock_code, outFileName=os.path.join(RESOURCE_PATH, "tmp", "stock_features.csv"))
|
# X, Y = self.stock2Vector.getDataset2D(data)
|
||||||
|
|
||||||
for given_day in stock_codes[stock_code]:
|
for given_day in stock_codes[stock_code]:
|
||||||
data, minmax_data = stock2Vector.makeData(given_day, stock_code)
|
data, minmax_data = stock2Vector.makeData(given_day, stock_code)
|
||||||
|
|||||||
@@ -1,59 +1,117 @@
|
|||||||
|
# tensor - numpy - PILImage 변환 (https://qlsenddl-lab.tistory.com/37)
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import keras
|
|
||||||
|
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
|
||||||
|
import random
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from keras.applications.imagenet_utils import decode_predictions
|
from datasets import Dataset, load_dataset
|
||||||
from classification_models.keras import Classifiers
|
import torch
|
||||||
|
import torchvision.transforms as transforms
|
||||||
|
|
||||||
|
from transformers import ViTFeatureExtractor, ViTForImageClassification, TrainingArguments, Trainer
|
||||||
|
from torchvision.transforms import (CenterCrop, Compose, Normalize, RandomHorizontalFlip, RandomResizedCrop, Resize, ToTensor)
|
||||||
|
|
||||||
|
from stock.util.Stock2Vector import Stock2Vector
|
||||||
|
|
||||||
|
|
||||||
class StockPredictor:
|
class StockPredictor:
|
||||||
|
|
||||||
RESOURCE_PATH = None
|
RESOURCE_PATH = None
|
||||||
stock2Vector = None
|
stock2Vector = None
|
||||||
|
model_dir = None
|
||||||
|
predictor = None
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, RESOURCE_PATH):
|
||||||
return
|
self.RESOURCE_PATH = RESOURCE_PATH
|
||||||
|
|
||||||
def getDataset(self, df):
|
self.model_dir = os.path.join(RESOURCE_PATH, "tmp")
|
||||||
VECTOR_SIZE = 299
|
self.stock2Vector = Stock2Vector(RESOURCE_PATH)
|
||||||
TOTAL_X, TOTAL_Y = [], []
|
|
||||||
for key in df:
|
|
||||||
if key == "date":
|
|
||||||
continue
|
|
||||||
elif key == "label":
|
|
||||||
TOTAL_Y.append(df[key].tolist())
|
|
||||||
else:
|
|
||||||
TOTAL_X.append(df[key].tolist())
|
|
||||||
|
|
||||||
SIZE_WIDTH = len(TOTAL_X[0])
|
self.set_seed(42)
|
||||||
SIZE_HEIGHT = len(TOTAL_X)
|
|
||||||
X = []
|
|
||||||
for i in range(VECTOR_SIZE, SIZE_WIDTH):
|
|
||||||
temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0)
|
|
||||||
for j in range(SIZE_HEIGHT):
|
|
||||||
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i - VECTOR_SIZE:i]
|
|
||||||
temp_X = np.stack([temp_X, temp_X, temp_X], axis=-1)
|
|
||||||
X.append(temp_X)
|
|
||||||
|
|
||||||
X = np.asarray(X[len(X)-1])
|
self.num_labels = 3
|
||||||
|
self.id2label = {0: 'none', 1: 'sell', 2: 'buy'}
|
||||||
return X
|
self.label2id = {'none': 0, 'sell': 1, 'buy': 2}
|
||||||
|
|
||||||
def predict(self, df, minmax_df, isRealTime=False):
|
|
||||||
X = self.getDataset(df)
|
|
||||||
|
|
||||||
# build model
|
|
||||||
n_classes = 3
|
|
||||||
Inceptionresnetv2, preprocess_input = Classifiers.get('inceptionresnetv2')
|
|
||||||
X = preprocess_input(X)
|
|
||||||
base_model = Inceptionresnetv2(input_shape=(299, 299, 3), include_top=False)
|
|
||||||
model = keras.models.Model(inputs=[base_model.input])
|
|
||||||
|
|
||||||
checkpoint_filename = os.path.join(self.RESOURCE_PATH, "model", "stock.ckpt")
|
|
||||||
model.load_weights(checkpoint_filename)
|
|
||||||
|
|
||||||
y = model.predict(X)
|
|
||||||
|
|
||||||
# result
|
|
||||||
print(decode_predictions(y))
|
|
||||||
|
|
||||||
|
self.trans = transforms.ToPILImage()
|
||||||
|
self.predictor = self.loadModel()
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def set_seed(self, seed=42, n_gpu=0):
|
||||||
|
random.seed(seed)
|
||||||
|
np.random.seed(seed)
|
||||||
|
torch.manual_seed(seed)
|
||||||
|
if n_gpu > 0:
|
||||||
|
torch.cuda.manual_seed_all(seed)
|
||||||
|
|
||||||
|
def loadModel(self):
|
||||||
|
feature_extractor = ViTFeatureExtractor.from_pretrained(self.model_dir)
|
||||||
|
|
||||||
|
normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
|
||||||
|
|
||||||
|
self._test_transforms = Compose(
|
||||||
|
[
|
||||||
|
Resize(feature_extractor.size),
|
||||||
|
CenterCrop(feature_extractor.size),
|
||||||
|
ToTensor(),
|
||||||
|
normalize,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
model = ViTForImageClassification.from_pretrained(self.model_dir,
|
||||||
|
num_labels=self.num_labels,
|
||||||
|
id2label=self.id2label,
|
||||||
|
label2id=self.label2id)
|
||||||
|
args = TrainingArguments(
|
||||||
|
f"stock_vit_predictor",
|
||||||
|
save_strategy="epoch",
|
||||||
|
evaluation_strategy="epoch",
|
||||||
|
learning_rate=2e-5,
|
||||||
|
per_device_train_batch_size=762,
|
||||||
|
per_device_eval_batch_size=762,
|
||||||
|
weight_decay=0.01,
|
||||||
|
load_best_model_at_end=True,
|
||||||
|
metric_for_best_model="accuracy",
|
||||||
|
logging_dir='logs',
|
||||||
|
remove_unused_columns=False,
|
||||||
|
num_train_epochs=4,
|
||||||
|
)
|
||||||
|
|
||||||
|
trainer = Trainer(
|
||||||
|
model,
|
||||||
|
args,
|
||||||
|
data_collator=self.collate_fn,
|
||||||
|
tokenizer=feature_extractor,
|
||||||
|
)
|
||||||
|
|
||||||
|
return trainer
|
||||||
|
|
||||||
|
def test_transforms(self, examples):
|
||||||
|
examples['pixel_values'] = [self._test_transforms(image.convert("RGB")) for image in examples['img']]
|
||||||
|
return examples
|
||||||
|
|
||||||
|
def collate_fn(self, examples):
|
||||||
|
pixel_values = torch.stack([example["pixel_values"] for example in examples])
|
||||||
|
#labels = torch.tensor([example["label"] for example in examples])
|
||||||
|
#return {"pixel_values": pixel_values, "labels": labels}
|
||||||
|
return {"pixel_values": pixel_values}
|
||||||
|
|
||||||
|
def predict(self, X, Y=None):
|
||||||
|
print("Data count: ", len(X))
|
||||||
|
|
||||||
|
X = [self.trans(torch.tensor([x])) for x in X]
|
||||||
|
|
||||||
|
test_X = X
|
||||||
|
test_Y = Y
|
||||||
|
|
||||||
|
# load cifar10 (only small portion for demonstration purposes)
|
||||||
|
test_data = {'img': test_X, 'label': test_Y}
|
||||||
|
|
||||||
|
test_ds = Dataset.from_dict(test_data)
|
||||||
|
|
||||||
|
# Set the transforms
|
||||||
|
test_ds.set_transform(self.test_transforms)
|
||||||
|
|
||||||
|
outputs = self.predictor.predict(test_ds)
|
||||||
|
return outputs.predictions
|
||||||
Reference in New Issue
Block a user