diff --git a/VitTrainer.py b/VitTrainer.py index 239a146..0e84940 100755 --- a/VitTrainer.py +++ b/VitTrainer.py @@ -1,21 +1,45 @@ +# tensor - numpy - PILImage 변환 (https://qlsenddl-lab.tistory.com/37) + import os os.environ['KMP_DUPLICATE_LIB_OK']='True' -from datasets import load_dataset +from datasets import Dataset +import torch +import torchvision.transforms as transforms -# load cifar10 (only small portion for demonstration purposes) -train_ds, test_ds = load_dataset('cifar10', split=['train[:5000]', 'test[:2000]']) -# split up training into training + validation -splits = train_ds.train_test_split(test_size=0.1) -train_ds = splits['train'] -val_ds = splits['test'] +from stock.util.Stock2Vector import Stock2Vector -id2label = {id:label for id, label in enumerate(train_ds.features['label'].names)} -label2id = {label:id for id,label in id2label.items()} +PROJECT_HOME = os.path.join(os.path.dirname(__file__)) +RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources") +stock2Vector = Stock2Vector(RESOURCE_PATH) +X, Y = stock2Vector.getDataset2D("252670") +trans = transforms.ToPILImage() +X = [trans(torch.tensor([x])) for x in X] + +split_point1 = int(len(X)*0.7) +split_point2 = int(len(X)*0.9) +train_X = X[:split_point1] +train_Y = Y[:split_point1] +valid_X = X[split_point1:split_point2] +valid_Y = X[split_point1:split_point2] +test_X = X[split_point2:] +test_Y = X[split_point2:] + +id2label = {0: '0', 1: '1', 2: '2'} +label2id = {'0': 0, '1': 1, '2': 2} + +# load cifar10 (only small portion for demonstration purposes) +train_data = {'img': train_X, 'label': train_Y} +val_dsta = {'img': valid_X, 'label': valid_Y} +test_data = {'img': test_X, 'label': test_Y} + +train_ds = Dataset.from_dict(train_data) +val_ds = Dataset.from_dict(val_dsta) +test_ds = Dataset.from_dict(test_data) from transformers import ViTFeatureExtractor -feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k") +feature_extractor = ViTFeatureExtractor() from torchvision.transforms import (CenterCrop, Compose, @@ -67,7 +91,10 @@ def collate_fn(examples): return {"pixel_values": pixel_values, "labels": labels} train_dataloader = DataLoader(train_ds, collate_fn=collate_fn, batch_size=4) - +train_data_loader = torch.utils.data.DataLoader(train_X, + batch_size=32, + shuffle=True, + num_workers=16) batch = next(iter(train_dataloader)) for k,v in batch.items(): diff --git a/stock/util/Stock2Vector.py b/stock/util/Stock2Vector.py index 57df8d6..9853ad7 100644 --- a/stock/util/Stock2Vector.py +++ b/stock/util/Stock2Vector.py @@ -1,4 +1,5 @@ import os +import csv import copy import sqlite3 import numpy as np @@ -158,8 +159,8 @@ class Stock2Vector(HTS): conn = sqlite3.connect(os.path.join(self.RESOURCE_PATH, "hts.db")) cursor = conn.cursor() - #cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, "20220721", "20220731")) - cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,)) + cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, "20220701", "20220731")) + #cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,)) db_result = cursor.fetchall() temp_result = [] for rows in db_result: @@ -245,7 +246,7 @@ class Stock2Vector(HTS): return np.asarray(vector) - def getDataset2D(self, stock_code, VECTOR_SIZE = 224): + def getDataset2D(self, stock_code, VECTOR_SIZE = 381): result = self.getTrainData(stock_code) df, minmax_df = self.preprocessData(result) @@ -266,17 +267,30 @@ class Stock2Vector(HTS): for j in range(SIZE_HEIGHT): temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE:i] X.append(temp_X) - if int(TOTAL_Y[0][i]) == 0: - Y.append([1, 0, 0]) - elif int(TOTAL_Y[0][i]) == 0.5: - Y.append([0, 1, 0]) + if TOTAL_Y[0][i] == 0: + #Y.append([1, 0, 0]) + Y.append([0]) + elif TOTAL_Y[0][i] == 0.5: + #Y.append([0, 1, 0]) + Y.append([1]) else: - Y.append([0, 0, 1]) + #Y.append([0, 0, 1]) + Y.append([2]) X = np.asarray(X) Y = np.asarray(Y) return X, Y + def makeDataset2D(self, stock_code, outFileName=None): + X, Y = self.getDataset2D(stock_code) + + #reX = X.reshape(X.shape[0], (X.shape[1] * X.shape[2])) + #df = pd.DataFrame(np.hstack((reX, Y))) + #df.to_csv(outFileName, index=False, header=False) + + + return X, Y + def getDataset3D(self, stock_code, VECTOR_SIZE = 299): result = self.getTrainData(stock_code) df, minmax_df = self.preprocessData(result) @@ -324,9 +338,10 @@ if __name__ == "__main__": for stock_code in stock_codes: stock2Vector = Stock2Vector(RESOURCE_PATH) - for given_day in stock_codes[stock_code]: - X, Y = stock2Vector.getDataset2D(stock_code) + # X, Y = stock2Vector.getDataset2D(stock_code) + stock2Vector.makeDataset2D(stock_code, outFileName=os.path.join(RESOURCE_PATH, "tmp", "stock_features.csv")) + for given_day in stock_codes[stock_code]: data, minmax_data = stock2Vector.makeData(given_day, stock_code) vector = stock2Vector.vectorize(data) minmax_vector = stock2Vector.vectorize(minmax_data)