diff --git a/StockTrainer.py b/StockTrainer.py index de3abe1..8426d73 100644 --- a/StockTrainer.py +++ b/StockTrainer.py @@ -15,42 +15,10 @@ class StockTrainer: self.stock2Vector = Stock2Vector(RESOURCE_PATH) return - def getDataset(self, stock_code): - VECTOR_SIZE = 299 - result = self.stock2Vector.getTrainData(stock_code) - df, minmax_df = self.stock2Vector.preprocessData(result) - - TOTAL_X, TOTAL_Y = [], [] - for key in minmax_df: - if key == "date": - continue - elif key == "label": - TOTAL_Y.append(minmax_df[key].tolist()) - else: - TOTAL_X.append(minmax_df[key].tolist()) - - SIZE_WIDTH = len(TOTAL_X[0]) - SIZE_HEIGHT = len(TOTAL_X) - X, Y = [], [] - for i in range(VECTOR_SIZE, SIZE_WIDTH): - temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0) - for j in range(SIZE_HEIGHT): - temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE:i] - temp_X = np.stack([temp_X, temp_X, temp_X], axis=-1) - X.append(temp_X) - if int(TOTAL_Y[0][i]) == 0: - Y.append([1, 0, 0]) - elif int(TOTAL_Y[0][i]) == 0.5: - Y.append([0, 1, 0]) - else: - Y.append([0, 0, 1]) - - X = np.asarray(X) - Y = np.asarray(Y) - return X, Y def train(self, stock_code): - X, Y = self.getDataset(stock_code) + #X, Y = self.stock2Vector.getDataset3D(stock_code) + X, Y = self.stock2Vector.getDataset2D(stock_code) # build model n_classes = 3 diff --git a/stock/util/Stock2Vector.py b/stock/util/Stock2Vector.py index 47f9058..57df8d6 100644 --- a/stock/util/Stock2Vector.py +++ b/stock/util/Stock2Vector.py @@ -158,14 +158,12 @@ class Stock2Vector(HTS): conn = sqlite3.connect(os.path.join(self.RESOURCE_PATH, "hts.db")) cursor = conn.cursor() - cursor.execute( - 'SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', - (stock_code, "20220721", "20220731")) + #cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, "20220721", "20220731")) + cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,)) db_result = cursor.fetchall() temp_result = [] for rows in db_result: - temp_result.append( - [rows[0], rows[1], rows[2], rows[3], rows[4], rows[5], rows[6], 0 if rows[7] is None else rows[7]]) + temp_result.append([rows[0], rows[1], rows[2], rows[3], rows[4], rows[5], rows[6], 0 if rows[7] is None else rows[7]]) temp_result.sort(key=lambda x: (x[0], x[1])) result = {"check": set(), "time": [], "open": [], "close": [], "high": [], "low": [], "vol": [], "label": []} @@ -247,6 +245,71 @@ class Stock2Vector(HTS): return np.asarray(vector) + def getDataset2D(self, stock_code, VECTOR_SIZE = 224): + result = self.getTrainData(stock_code) + df, minmax_df = self.preprocessData(result) + + TOTAL_X, TOTAL_Y = [], [] + for key in minmax_df: + if key == "date": + continue + elif key == "label": + TOTAL_Y.append(minmax_df[key].tolist()) + else: + TOTAL_X.append(minmax_df[key].tolist()) + + SIZE_WIDTH = len(TOTAL_X[0]) + SIZE_HEIGHT = len(TOTAL_X) + X, Y = [], [] + for i in range(VECTOR_SIZE, SIZE_WIDTH): + temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0) + for j in range(SIZE_HEIGHT): + temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE:i] + X.append(temp_X) + if int(TOTAL_Y[0][i]) == 0: + Y.append([1, 0, 0]) + elif int(TOTAL_Y[0][i]) == 0.5: + Y.append([0, 1, 0]) + else: + Y.append([0, 0, 1]) + + X = np.asarray(X) + Y = np.asarray(Y) + return X, Y + + def getDataset3D(self, stock_code, VECTOR_SIZE = 299): + result = self.getTrainData(stock_code) + df, minmax_df = self.preprocessData(result) + + TOTAL_X, TOTAL_Y = [], [] + for key in minmax_df: + if key == "date": + continue + elif key == "label": + TOTAL_Y.append(minmax_df[key].tolist()) + else: + TOTAL_X.append(minmax_df[key].tolist()) + + SIZE_WIDTH = len(TOTAL_X[0]) + SIZE_HEIGHT = len(TOTAL_X) + X, Y = [], [] + for i in range(VECTOR_SIZE, SIZE_WIDTH): + temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0) + for j in range(SIZE_HEIGHT): + temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE:i] + temp_X = np.stack([temp_X, temp_X, temp_X], axis=-1) + X.append(temp_X) + if int(TOTAL_Y[0][i]) == 0: + Y.append([1, 0, 0]) + elif int(TOTAL_Y[0][i]) == 0.5: + Y.append([0, 1, 0]) + else: + Y.append([0, 0, 1]) + + X = np.asarray(X) + Y = np.asarray(Y) + return X, Y + if __name__ == "__main__": PROJECT_HOME = os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(__file__)))))) RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources") @@ -262,6 +325,8 @@ if __name__ == "__main__": stock2Vector = Stock2Vector(RESOURCE_PATH) for given_day in stock_codes[stock_code]: + X, Y = stock2Vector.getDataset2D(stock_code) + data, minmax_data = stock2Vector.makeData(given_day, stock_code) vector = stock2Vector.vectorize(data) minmax_vector = stock2Vector.vectorize(minmax_data)