init
This commit is contained in:
@@ -1,21 +1,45 @@
|
|||||||
|
# tensor - numpy - PILImage 변환 (https://qlsenddl-lab.tistory.com/37)
|
||||||
|
|
||||||
import os
|
import os
|
||||||
os.environ['KMP_DUPLICATE_LIB_OK']='True'
|
os.environ['KMP_DUPLICATE_LIB_OK']='True'
|
||||||
from datasets import load_dataset
|
from datasets import Dataset
|
||||||
|
import torch
|
||||||
|
import torchvision.transforms as transforms
|
||||||
|
|
||||||
# load cifar10 (only small portion for demonstration purposes)
|
from stock.util.Stock2Vector import Stock2Vector
|
||||||
train_ds, test_ds = load_dataset('cifar10', split=['train[:5000]', 'test[:2000]'])
|
|
||||||
# split up training into training + validation
|
|
||||||
splits = train_ds.train_test_split(test_size=0.1)
|
|
||||||
train_ds = splits['train']
|
|
||||||
val_ds = splits['test']
|
|
||||||
|
|
||||||
id2label = {id:label for id, label in enumerate(train_ds.features['label'].names)}
|
PROJECT_HOME = os.path.join(os.path.dirname(__file__))
|
||||||
label2id = {label:id for id,label in id2label.items()}
|
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
|
||||||
|
stock2Vector = Stock2Vector(RESOURCE_PATH)
|
||||||
|
X, Y = stock2Vector.getDataset2D("252670")
|
||||||
|
|
||||||
|
trans = transforms.ToPILImage()
|
||||||
|
X = [trans(torch.tensor([x])) for x in X]
|
||||||
|
|
||||||
|
split_point1 = int(len(X)*0.7)
|
||||||
|
split_point2 = int(len(X)*0.9)
|
||||||
|
train_X = X[:split_point1]
|
||||||
|
train_Y = Y[:split_point1]
|
||||||
|
valid_X = X[split_point1:split_point2]
|
||||||
|
valid_Y = X[split_point1:split_point2]
|
||||||
|
test_X = X[split_point2:]
|
||||||
|
test_Y = X[split_point2:]
|
||||||
|
|
||||||
|
id2label = {0: '0', 1: '1', 2: '2'}
|
||||||
|
label2id = {'0': 0, '1': 1, '2': 2}
|
||||||
|
|
||||||
|
# load cifar10 (only small portion for demonstration purposes)
|
||||||
|
train_data = {'img': train_X, 'label': train_Y}
|
||||||
|
val_dsta = {'img': valid_X, 'label': valid_Y}
|
||||||
|
test_data = {'img': test_X, 'label': test_Y}
|
||||||
|
|
||||||
|
train_ds = Dataset.from_dict(train_data)
|
||||||
|
val_ds = Dataset.from_dict(val_dsta)
|
||||||
|
test_ds = Dataset.from_dict(test_data)
|
||||||
|
|
||||||
from transformers import ViTFeatureExtractor
|
from transformers import ViTFeatureExtractor
|
||||||
|
|
||||||
feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
|
feature_extractor = ViTFeatureExtractor()
|
||||||
|
|
||||||
from torchvision.transforms import (CenterCrop,
|
from torchvision.transforms import (CenterCrop,
|
||||||
Compose,
|
Compose,
|
||||||
@@ -67,7 +91,10 @@ def collate_fn(examples):
|
|||||||
return {"pixel_values": pixel_values, "labels": labels}
|
return {"pixel_values": pixel_values, "labels": labels}
|
||||||
|
|
||||||
train_dataloader = DataLoader(train_ds, collate_fn=collate_fn, batch_size=4)
|
train_dataloader = DataLoader(train_ds, collate_fn=collate_fn, batch_size=4)
|
||||||
|
train_data_loader = torch.utils.data.DataLoader(train_X,
|
||||||
|
batch_size=32,
|
||||||
|
shuffle=True,
|
||||||
|
num_workers=16)
|
||||||
|
|
||||||
batch = next(iter(train_dataloader))
|
batch = next(iter(train_dataloader))
|
||||||
for k,v in batch.items():
|
for k,v in batch.items():
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
import csv
|
||||||
import copy
|
import copy
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -158,8 +159,8 @@ class Stock2Vector(HTS):
|
|||||||
conn = sqlite3.connect(os.path.join(self.RESOURCE_PATH, "hts.db"))
|
conn = sqlite3.connect(os.path.join(self.RESOURCE_PATH, "hts.db"))
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
|
|
||||||
#cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, "20220721", "20220731"))
|
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, "20220701", "20220731"))
|
||||||
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,))
|
#cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,))
|
||||||
db_result = cursor.fetchall()
|
db_result = cursor.fetchall()
|
||||||
temp_result = []
|
temp_result = []
|
||||||
for rows in db_result:
|
for rows in db_result:
|
||||||
@@ -245,7 +246,7 @@ class Stock2Vector(HTS):
|
|||||||
|
|
||||||
return np.asarray(vector)
|
return np.asarray(vector)
|
||||||
|
|
||||||
def getDataset2D(self, stock_code, VECTOR_SIZE = 224):
|
def getDataset2D(self, stock_code, VECTOR_SIZE = 381):
|
||||||
result = self.getTrainData(stock_code)
|
result = self.getTrainData(stock_code)
|
||||||
df, minmax_df = self.preprocessData(result)
|
df, minmax_df = self.preprocessData(result)
|
||||||
|
|
||||||
@@ -266,17 +267,30 @@ class Stock2Vector(HTS):
|
|||||||
for j in range(SIZE_HEIGHT):
|
for j in range(SIZE_HEIGHT):
|
||||||
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE:i]
|
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE:i]
|
||||||
X.append(temp_X)
|
X.append(temp_X)
|
||||||
if int(TOTAL_Y[0][i]) == 0:
|
if TOTAL_Y[0][i] == 0:
|
||||||
Y.append([1, 0, 0])
|
#Y.append([1, 0, 0])
|
||||||
elif int(TOTAL_Y[0][i]) == 0.5:
|
Y.append([0])
|
||||||
Y.append([0, 1, 0])
|
elif TOTAL_Y[0][i] == 0.5:
|
||||||
|
#Y.append([0, 1, 0])
|
||||||
|
Y.append([1])
|
||||||
else:
|
else:
|
||||||
Y.append([0, 0, 1])
|
#Y.append([0, 0, 1])
|
||||||
|
Y.append([2])
|
||||||
|
|
||||||
X = np.asarray(X)
|
X = np.asarray(X)
|
||||||
Y = np.asarray(Y)
|
Y = np.asarray(Y)
|
||||||
return X, Y
|
return X, Y
|
||||||
|
|
||||||
|
def makeDataset2D(self, stock_code, outFileName=None):
|
||||||
|
X, Y = self.getDataset2D(stock_code)
|
||||||
|
|
||||||
|
#reX = X.reshape(X.shape[0], (X.shape[1] * X.shape[2]))
|
||||||
|
#df = pd.DataFrame(np.hstack((reX, Y)))
|
||||||
|
#df.to_csv(outFileName, index=False, header=False)
|
||||||
|
|
||||||
|
|
||||||
|
return X, Y
|
||||||
|
|
||||||
def getDataset3D(self, stock_code, VECTOR_SIZE = 299):
|
def getDataset3D(self, stock_code, VECTOR_SIZE = 299):
|
||||||
result = self.getTrainData(stock_code)
|
result = self.getTrainData(stock_code)
|
||||||
df, minmax_df = self.preprocessData(result)
|
df, minmax_df = self.preprocessData(result)
|
||||||
@@ -324,9 +338,10 @@ if __name__ == "__main__":
|
|||||||
for stock_code in stock_codes:
|
for stock_code in stock_codes:
|
||||||
stock2Vector = Stock2Vector(RESOURCE_PATH)
|
stock2Vector = Stock2Vector(RESOURCE_PATH)
|
||||||
|
|
||||||
for given_day in stock_codes[stock_code]:
|
# X, Y = stock2Vector.getDataset2D(stock_code)
|
||||||
X, Y = stock2Vector.getDataset2D(stock_code)
|
stock2Vector.makeDataset2D(stock_code, outFileName=os.path.join(RESOURCE_PATH, "tmp", "stock_features.csv"))
|
||||||
|
|
||||||
|
for given_day in stock_codes[stock_code]:
|
||||||
data, minmax_data = stock2Vector.makeData(given_day, stock_code)
|
data, minmax_data = stock2Vector.makeData(given_day, stock_code)
|
||||||
vector = stock2Vector.vectorize(data)
|
vector = stock2Vector.vectorize(data)
|
||||||
minmax_vector = stock2Vector.vectorize(minmax_data)
|
minmax_vector = stock2Vector.vectorize(minmax_data)
|
||||||
|
|||||||
Reference in New Issue
Block a user