init
This commit is contained in:
@@ -154,13 +154,16 @@ class Stock2Vector(HTS):
|
||||
|
||||
return df, minmax_df
|
||||
|
||||
def getTrainData(self, stock_code):
|
||||
def getTrainData(self, stock_code, sDate=None, eDate=None):
|
||||
tableName = 'hts'
|
||||
conn = sqlite3.connect(os.path.join(self.RESOURCE_PATH, "hts.db"))
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, "20220726", "20220731"))
|
||||
#cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,))
|
||||
if sDate is None and eDate is None:
|
||||
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,))
|
||||
else:
|
||||
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, sDate, eDate))
|
||||
|
||||
db_result = cursor.fetchall()
|
||||
temp_result = []
|
||||
for rows in db_result:
|
||||
@@ -168,6 +171,9 @@ class Stock2Vector(HTS):
|
||||
temp_result.sort(key=lambda x: (x[0], x[1]))
|
||||
|
||||
result = {"check": set(), "time": [], "open": [], "close": [], "high": [], "low": [], "vol": [], "label": []}
|
||||
if len(db_result) == 0:
|
||||
return result
|
||||
|
||||
for rows in temp_result:
|
||||
ymd = rows[0] # hts.날짜
|
||||
hms = rows[1] # hts.시간
|
||||
@@ -246,9 +252,9 @@ class Stock2Vector(HTS):
|
||||
|
||||
return np.asarray(vector)
|
||||
|
||||
def getDataset2D(self, stock_code, VECTOR_SIZE = 381):
|
||||
result = self.getTrainData(stock_code)
|
||||
df, minmax_df = self.preprocessData(result)
|
||||
def getDataset2D(self, data, VECTOR_SIZE = 381):
|
||||
|
||||
df, minmax_df = self.preprocessData(data)
|
||||
|
||||
TOTAL_X, TOTAL_Y = [], []
|
||||
for key in minmax_df:
|
||||
@@ -262,38 +268,24 @@ class Stock2Vector(HTS):
|
||||
SIZE_WIDTH = len(TOTAL_X[0])
|
||||
SIZE_HEIGHT = len(TOTAL_X)
|
||||
X, Y = [], []
|
||||
for i in range(VECTOR_SIZE, SIZE_WIDTH):
|
||||
for i in range(VECTOR_SIZE-1, SIZE_WIDTH):
|
||||
temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0)
|
||||
for j in range(SIZE_HEIGHT):
|
||||
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE:i]
|
||||
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE+1:i+1]
|
||||
X.append(temp_X)
|
||||
if TOTAL_Y[0][i] == 0:
|
||||
#Y.append([1, 0, 0])
|
||||
Y.append(0)
|
||||
elif TOTAL_Y[0][i] == 0.5:
|
||||
#Y.append([0, 1, 0])
|
||||
Y.append(1)
|
||||
else:
|
||||
#Y.append([0, 0, 1])
|
||||
Y.append(2)
|
||||
|
||||
X = np.asarray(X)
|
||||
Y = np.asarray(Y)
|
||||
Y = np.asarray(Y, dtype='int64')
|
||||
return X, Y
|
||||
|
||||
def makeDataset2D(self, stock_code, outFileName=None):
|
||||
X, Y = self.getDataset2D(stock_code)
|
||||
|
||||
#reX = X.reshape(X.shape[0], (X.shape[1] * X.shape[2]))
|
||||
#df = pd.DataFrame(np.hstack((reX, Y)))
|
||||
#df.to_csv(outFileName, index=False, header=False)
|
||||
|
||||
|
||||
return X, Y
|
||||
|
||||
def getDataset3D(self, stock_code, VECTOR_SIZE = 299):
|
||||
result = self.getTrainData(stock_code)
|
||||
df, minmax_df = self.preprocessData(result)
|
||||
def getDataset3D(self, data, VECTOR_SIZE = 299):
|
||||
df, minmax_df = self.preprocessData(data)
|
||||
|
||||
TOTAL_X, TOTAL_Y = [], []
|
||||
for key in minmax_df:
|
||||
@@ -338,8 +330,8 @@ if __name__ == "__main__":
|
||||
for stock_code in stock_codes:
|
||||
stock2Vector = Stock2Vector(RESOURCE_PATH)
|
||||
|
||||
# X, Y = stock2Vector.getDataset2D(stock_code)
|
||||
stock2Vector.makeDataset2D(stock_code, outFileName=os.path.join(RESOURCE_PATH, "tmp", "stock_features.csv"))
|
||||
# data = self.stock2Vector.getTrainData(stock_code, sDate, eDate)
|
||||
# X, Y = self.stock2Vector.getDataset2D(data)
|
||||
|
||||
for given_day in stock_codes[stock_code]:
|
||||
data, minmax_data = stock2Vector.makeData(given_day, stock_code)
|
||||
|
||||
@@ -1,59 +1,117 @@
|
||||
# tensor - numpy - PILImage 변환 (https://qlsenddl-lab.tistory.com/37)
|
||||
|
||||
import os
|
||||
import keras
|
||||
|
||||
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
|
||||
import random
|
||||
import numpy as np
|
||||
from keras.applications.imagenet_utils import decode_predictions
|
||||
from classification_models.keras import Classifiers
|
||||
from datasets import Dataset, load_dataset
|
||||
import torch
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
from transformers import ViTFeatureExtractor, ViTForImageClassification, TrainingArguments, Trainer
|
||||
from torchvision.transforms import (CenterCrop, Compose, Normalize, RandomHorizontalFlip, RandomResizedCrop, Resize, ToTensor)
|
||||
|
||||
from stock.util.Stock2Vector import Stock2Vector
|
||||
|
||||
|
||||
class StockPredictor:
|
||||
|
||||
RESOURCE_PATH = None
|
||||
stock2Vector = None
|
||||
model_dir = None
|
||||
predictor = None
|
||||
|
||||
def __init__(self):
|
||||
return
|
||||
def __init__(self, RESOURCE_PATH):
|
||||
self.RESOURCE_PATH = RESOURCE_PATH
|
||||
|
||||
def getDataset(self, df):
|
||||
VECTOR_SIZE = 299
|
||||
TOTAL_X, TOTAL_Y = [], []
|
||||
for key in df:
|
||||
if key == "date":
|
||||
continue
|
||||
elif key == "label":
|
||||
TOTAL_Y.append(df[key].tolist())
|
||||
else:
|
||||
TOTAL_X.append(df[key].tolist())
|
||||
self.model_dir = os.path.join(RESOURCE_PATH, "tmp")
|
||||
self.stock2Vector = Stock2Vector(RESOURCE_PATH)
|
||||
|
||||
SIZE_WIDTH = len(TOTAL_X[0])
|
||||
SIZE_HEIGHT = len(TOTAL_X)
|
||||
X = []
|
||||
for i in range(VECTOR_SIZE, SIZE_WIDTH):
|
||||
temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0)
|
||||
for j in range(SIZE_HEIGHT):
|
||||
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i - VECTOR_SIZE:i]
|
||||
temp_X = np.stack([temp_X, temp_X, temp_X], axis=-1)
|
||||
X.append(temp_X)
|
||||
self.set_seed(42)
|
||||
|
||||
X = np.asarray(X[len(X)-1])
|
||||
|
||||
return X
|
||||
|
||||
def predict(self, df, minmax_df, isRealTime=False):
|
||||
X = self.getDataset(df)
|
||||
|
||||
# build model
|
||||
n_classes = 3
|
||||
Inceptionresnetv2, preprocess_input = Classifiers.get('inceptionresnetv2')
|
||||
X = preprocess_input(X)
|
||||
base_model = Inceptionresnetv2(input_shape=(299, 299, 3), include_top=False)
|
||||
model = keras.models.Model(inputs=[base_model.input])
|
||||
|
||||
checkpoint_filename = os.path.join(self.RESOURCE_PATH, "model", "stock.ckpt")
|
||||
model.load_weights(checkpoint_filename)
|
||||
|
||||
y = model.predict(X)
|
||||
|
||||
# result
|
||||
print(decode_predictions(y))
|
||||
self.num_labels = 3
|
||||
self.id2label = {0: 'none', 1: 'sell', 2: 'buy'}
|
||||
self.label2id = {'none': 0, 'sell': 1, 'buy': 2}
|
||||
|
||||
self.trans = transforms.ToPILImage()
|
||||
self.predictor = self.loadModel()
|
||||
|
||||
return
|
||||
|
||||
def set_seed(self, seed=42, n_gpu=0):
|
||||
random.seed(seed)
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
if n_gpu > 0:
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
|
||||
def loadModel(self):
|
||||
feature_extractor = ViTFeatureExtractor.from_pretrained(self.model_dir)
|
||||
|
||||
normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
|
||||
|
||||
self._test_transforms = Compose(
|
||||
[
|
||||
Resize(feature_extractor.size),
|
||||
CenterCrop(feature_extractor.size),
|
||||
ToTensor(),
|
||||
normalize,
|
||||
]
|
||||
)
|
||||
|
||||
model = ViTForImageClassification.from_pretrained(self.model_dir,
|
||||
num_labels=self.num_labels,
|
||||
id2label=self.id2label,
|
||||
label2id=self.label2id)
|
||||
args = TrainingArguments(
|
||||
f"stock_vit_predictor",
|
||||
save_strategy="epoch",
|
||||
evaluation_strategy="epoch",
|
||||
learning_rate=2e-5,
|
||||
per_device_train_batch_size=762,
|
||||
per_device_eval_batch_size=762,
|
||||
weight_decay=0.01,
|
||||
load_best_model_at_end=True,
|
||||
metric_for_best_model="accuracy",
|
||||
logging_dir='logs',
|
||||
remove_unused_columns=False,
|
||||
num_train_epochs=4,
|
||||
)
|
||||
|
||||
trainer = Trainer(
|
||||
model,
|
||||
args,
|
||||
data_collator=self.collate_fn,
|
||||
tokenizer=feature_extractor,
|
||||
)
|
||||
|
||||
return trainer
|
||||
|
||||
def test_transforms(self, examples):
|
||||
examples['pixel_values'] = [self._test_transforms(image.convert("RGB")) for image in examples['img']]
|
||||
return examples
|
||||
|
||||
def collate_fn(self, examples):
|
||||
pixel_values = torch.stack([example["pixel_values"] for example in examples])
|
||||
#labels = torch.tensor([example["label"] for example in examples])
|
||||
#return {"pixel_values": pixel_values, "labels": labels}
|
||||
return {"pixel_values": pixel_values}
|
||||
|
||||
def predict(self, X, Y=None):
|
||||
print("Data count: ", len(X))
|
||||
|
||||
X = [self.trans(torch.tensor([x])) for x in X]
|
||||
|
||||
test_X = X
|
||||
test_Y = Y
|
||||
|
||||
# load cifar10 (only small portion for demonstration purposes)
|
||||
test_data = {'img': test_X, 'label': test_Y}
|
||||
|
||||
test_ds = Dataset.from_dict(test_data)
|
||||
|
||||
# Set the transforms
|
||||
test_ds.set_transform(self.test_transforms)
|
||||
|
||||
outputs = self.predictor.predict(test_ds)
|
||||
return outputs.predictions
|
||||
Reference in New Issue
Block a user