Files
DeepStock/stock/util/Stock2Vector.py
dsyoon 7c12e655b4 init
2022-08-19 22:51:11 +09:00

479 lines
18 KiB
Python

import os
import csv
import copy
import sqlite3
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from hts.BuySellChecker import BuySellChecker
from hts.HTS import HTS
from stock.analysis.Common import Common
from stock.analysis.Stochastic import Stochastic
from stock.analysis.RSI import RSI
from stock.analysis.MACD import MACD
from stock.analysis.IchimokuCloud import IchimokuCloud
class Stock2Vector(HTS):
RESOURCE_PATH = None
common = None
stochastic = None
rsi = None
macd = None
ichimokuCloud = None
def __init__(self, RESOURCE_PATH):
super().__init__(RESOURCE_PATH)
self.RESOURCE_PATH = RESOURCE_PATH
self.buySellChecker = BuySellChecker()
self.common = Common()
self.stochastic = Stochastic()
self.rsi = RSI()
self.macd = MACD()
self.ichimokuCloud = IchimokuCloud()
return
def analyze(self, result):
open = result["open"]
close = result["close"]
high = result["high"]
low = result["low"]
vol = result["vol"]
close_df = pd.DataFrame(close)
avg3_list = close_df.rolling(window=3).mean().fillna(close[0]).values.tolist()
avg3 = [item[0] for item in avg3_list]
avg5_list = close_df.rolling(window=5).mean().fillna(close[0]).values.tolist()
avg5 = [item[0] for item in avg5_list]
avg10_list = close_df.rolling(window=10).mean().fillna(close[0]).values.tolist()
avg10 = [item[0] for item in avg10_list]
avg20_list = close_df.rolling(window=20).mean().fillna(close[0]).values.tolist()
avg20 = [item[0] for item in avg20_list]
avg30_list = close_df.rolling(window=30).mean().fillna(close[0]).values.tolist()
avg30 = [item[0] for item in avg30_list]
avg60_list = close_df.rolling(window=60).mean().fillna(close[0]).values.tolist()
avg60 = [item[0] for item in avg60_list]
df = pd.DataFrame(close)
max20 = df.rolling(window=20).mean()
stddev20 = df.rolling(window=20).std()
upper_df = max20 + (stddev20 * 2) # 상단 볼린저 밴드
lower_df = max20 - (stddev20 * 2) # 하단 볼린저 밴드
upper, lower = [], []
for i in range(len(upper_df)):
if i < 10:
upper.append(upper_df.values[0][0])
lower.append(lower_df.values[0][0])
else:
upper.append(upper_df.values[i][0])
lower.append(lower_df.values[i][0])
point_temp = result["time"]
STOCK = []
for i in range(len(open)):
STOCK.append({'volume': vol[i], 'close': close[i], 'open': open[i], 'high': high[i], 'low': low[i],
'avg3': avg3[i], 'avg5': avg5[i],'avg10': avg10[i],'avg20': avg20[i],'avg30': avg30[i],'avg60': avg60[i]})
# stochastic 계산
stochastic_df = self.stochastic.apply(STOCK, n=30, m=5, t=5)
stochastic_df = stochastic_df.fillna(100)
fast_k = stochastic_df['fast_k'].values.tolist()
slow_k = stochastic_df['slow_k'].values.tolist()
slow_d = stochastic_df['slow_d'].values.tolist()
# macd 계산
macd_df = self.macd.apply(STOCK, short=12, long=26, t=9)
macd_df = macd_df.fillna(100)
macd = macd_df['macd'].values.tolist()
macds = macd_df['macds'].values.tolist()
macdo = macd_df['macdo'].values.tolist()
# rsi 계산
rsi_df = self.rsi.apply(STOCK, period=30, window=5)
rsi_df = rsi_df.fillna(100)
rsi = rsi_df['rsi'].values.tolist()
rsis = rsi_df['rsis'].values.tolist()
# ichimokuCloud 계산
# ichimokuCloud_df = self.ichimokuCloud.apply(STOCK, c=9, b=26, l=52)
# ichimokuCloud_df = rsi_df.fillna(100)
# changeLine = rsi_df['changeLine'].values.tolist()
# baseLine = rsi_df['baseLine'].values.tolist()
# leadingSpan1 = rsi_df['leadingSpan1'].values.tolist()
# leadingSpan2 = rsi_df['leadingSpan2'].values.tolist()
temp = {"date": point_temp,
"open": open, "high": high, "low": low, "close": close, "volume": vol, "upper": upper, "lower": lower,
"avg3": avg3, "avg5": avg5, "avg10": avg10, "avg20": avg20, "avg30": avg30, "avg60": avg60,
"macd": macd, "macds": macds, "macdo": macdo,
"fast_k": fast_k, "slow_k": slow_k, "slow_d": slow_d,
"rsi": rsi, "rsis": rsis}
data = pd.DataFrame(temp)
df_final_time = pd.DatetimeIndex(point_temp)
data.index = df_final_time
data = data.fillna(0)
return data
def getRealTime(self, stock_code, today, LAST_DATA=None):
if LAST_DATA is not None:
result = copy.deepcopy(LAST_DATA)
else:
result = {"check": set(), "time": [], "open": [], "close": [], "high": [], "low": [], "vol": [], "label": []}
self.getDBData(stock_code, today, result)
return result
def makeData(self, today, stock_code):
LAST_DATA = self.getLastData(stock_code, today)
result = self.getRealTime(stock_code, today, LAST_DATA)
# 분석을 통해서 볼린저밴드 상/하단을 계산한다.
df = self.buySellChecker.analyze(result)
minmax_df1 = (df - df.min()) / (df.max() - df.min())
minmax_df2 = minmax_df1.drop(["date"], axis="columns")
minmax_df = minmax_df2.join(df['date'])
# 4일치 중에서 앞에 2일은 제거한다.
date = df['date'].dt.date.unique().tolist()
df = df[df['date'].dt.date != date[0]]
df = df[df['date'].dt.date != date[1]]
minmax_df = minmax_df[minmax_df['date'].dt.date != date[0]]
minmax_df = minmax_df[minmax_df['date'].dt.date != date[1]]
return df, minmax_df
def getTrainData(self, stock_code, sDate=None, eDate=None):
tableName = 'hts'
conn = sqlite3.connect(os.path.join(self.RESOURCE_PATH, "hts.db"))
cursor = conn.cursor()
if sDate is None and eDate is None:
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,))
else:
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, sDate, eDate))
db_result = cursor.fetchall()
temp_result = []
for rows in db_result:
temp_result.append([rows[0], rows[1], rows[2], rows[3], rows[4], rows[5], rows[6], 0 if rows[7] is None else rows[7]])
temp_result.sort(key=lambda x: (x[0], x[1]))
result = {"check": set(), "time": [], "open": [], "close": [], "high": [], "low": [], "vol": [], "label": []}
if len(db_result) == 0:
return result
for rows in temp_result:
ymd = rows[0] # hts.날짜
hms = rows[1] # hts.시간
open = rows[2] # hts.시가
high = rows[3] # hts.고가
low = rows[4] # hts.저가
close = rows[5] # hts.종가
vol = rows[6] # hts.거래량
label = 0 if rows[7] is None else rows[7] # hts.매매구분
temp = datetime.strptime(str(ymd) + " " + str(hms).zfill(4) + "00", '%Y%m%d %H%M%S')
result["time"].append(temp)
result["open"].append(int(open))
result["close"].append(int(close))
result["high"].append(int(high))
result["low"].append(int(low))
result["vol"].append(int(vol))
result["label"].append(int(label))
return result
def preprocessData(self, result):
# 분석을 통해서 볼린저밴드 상/하단을 계산한다.
df = self.buySellChecker.analyze(result)
minmax_df1 = (df - df.min()) / (df.max() - df.min())
minmax_df2 = minmax_df1.drop(["date"], axis="columns")
minmax_df = minmax_df2.join(df['date'])
minmax_df = minmax_df.fillna(0)
return df, minmax_df
def makeTrainData(self, stock_code):
result = {"check": set(), "time": [], "open": [], "close": [], "high": [], "low": [], "vol": [], "label": []}
tableName = 'hts'
conn = sqlite3.connect(os.path.join(self.RESOURCE_PATH, "hts.db"))
cursor = conn.cursor()
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd, hms', (stock_code,))
db_result = cursor.fetchall()
for rows in db_result:
ymd = rows[0] # hts.날짜
hms = rows[1] # hts.시간
open = rows[2] # hts.시가
high = rows[3] # hts.고가
low = rows[4] # hts.저가
close = rows[5] # hts.종가
vol = rows[6] # hts.거래량
label = 0 if rows[7] is None else rows[7] # hts.매매구분
temp = datetime.strptime(str(ymd) + " " + str(hms).zfill(4) + "00", '%Y%m%d %H%M%S')
result["time"].append(temp)
result["open"].append(int(open))
result["close"].append(int(close))
result["high"].append(int(high))
result["low"].append(int(low))
result["vol"].append(int(vol))
result["label"].append(int(label))
# 분석을 통해서 볼린저밴드 상/하단을 계산한다.
df = self.buySellChecker.analyze(result)
minmax_df1 = (df - df.min()) / (df.max() - df.min())
minmax_df2 = minmax_df1.drop(["date"], axis="columns")
minmax_df = minmax_df2.join(df['date'])
minmax_df = minmax_df.fillna(0)
return df, minmax_df
def vectorize(self, data):
vector = []
for key in data:
if key == "date":
continue
vector.append(data[key].tolist())
return np.asarray(vector)
def getDataset2D(self, data, VECTOR_SIZE = 381):
df, minmax_df = self.preprocessData(data)
TOTAL_X, TOTAL_Y = [], []
for key in minmax_df:
if key == "date":
continue
elif key == "label":
TOTAL_Y.append(minmax_df[key].tolist())
else:
TOTAL_X.append(minmax_df[key].tolist())
SIZE_WIDTH = len(TOTAL_X[0])
SIZE_HEIGHT = len(TOTAL_X)
X, Y = [], []
for i in range(VECTOR_SIZE-1, SIZE_WIDTH):
temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0)
for j in range(SIZE_HEIGHT):
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE+1:i+1]
X.append(temp_X)
if TOTAL_Y[0][i] == 0:
Y.append(0)
elif TOTAL_Y[0][i] == 0.5:
Y.append(1)
else:
Y.append(2)
X = np.asarray(X)
Y = np.asarray(Y, dtype='int64')
return X, Y
def getVectorData(self, data, VECTOR_SIZE = 32):
return self.getVectorData_2(data, VECTOR_SIZE)
def getVectorData_1(self, data, VECTOR_SIZE):
df = self.buySellChecker.analyze(data)
# avg10, 볼린져밴드 상단과 하단의 차이, rsi, avg3만 이용한다.
# channel1: avg10, channel2: diff_upper_lower, channel3: abs_avg_2, channel4: abs_avg_3
avg10 = df['avg10'].tolist()
diff_upper_lower = df['diff_upper_lower'].tolist()
rsi = df['rsi'].tolist()
abs_avg_3 = df['abs_avg_3'].tolist()
size = len(avg10)
batch_X, batch_Y = [], []
X = np.zeros((4, VECTOR_SIZE, VECTOR_SIZE))
label = df['label'].tolist()
for i in range(VECTOR_SIZE*VECTOR_SIZE-1, size):
w, h = 0, 0
for j in range(i-VECTOR_SIZE*VECTOR_SIZE+1, i+1):
X[0, h, w] = avg10[j]
X[1, h, w] = diff_upper_lower[j]
X[2, h, w] = abs_avg_3[j]
X[3, h, w] = rsi[j]
w += 1
if w >= VECTOR_SIZE:
w = 0
h += 1
if h >= VECTOR_SIZE:
h = 0
batch_X.append(X)
batch_Y.append(label[i])
"""
if label[i] == 2:
batch_Y.append([0, 0, 1])
elif label[i] == 1:
batch_Y.append([0, 1, 0])
else:
batch_Y.append([1, 0, 0])
"""
batch_X = np.asarray(batch_X)
batch_Y = np.asarray(batch_Y, dtype='int64')
return batch_X, batch_Y
def getVectorData_2(self, data, VECTOR_SIZE = 32):
df = self.buySellChecker.analyze(data)
macd = df['macd'].tolist()
diff_avg27 = df['diff_avg27'].tolist()
diff_avg3_avg27 = df['diff_avg3_avg27'].tolist()
diff_avg3_avg54 = df['diff_avg3_avg54'].tolist()
diff_avg6_avg27 = df['diff_avg6_avg27'].tolist()
diff_avg6_avg54 = df['diff_avg6_avg54'].tolist()
diff_avg9_avg27 = df['diff_avg9_avg27'].tolist()
diff_avg9_avg54 = df['diff_avg9_avg54'].tolist()
diff_avg12_avg27 = df['diff_avg12_avg27'].tolist()
diff_avg12_avg54 = df['diff_avg12_avg54'].tolist()
diff_change_lead1 = df['diff_change_lead1'].tolist()
diff_open_lead1 = df['diff_open_lead1'].tolist()
diff_close_lead1 = df['diff_close_lead1'].tolist()
diff_high_lead1 = df['diff_high_lead1'].tolist()
diff_low_lead1 = df['diff_low_lead1'].tolist()
rsi = df['rsi'].tolist()
rsis = df['rsis'].tolist()
diff_avg54 = df['diff_avg54'].tolist()
diff_change_base = df['diff_change_base'].tolist()
diff_base_lead1 = df['diff_base_lead1'].tolist()
diff_open_base = df['diff_open_base'].tolist()
diff_close_base = df['diff_close_base'].tolist()
diff_high_base = df['diff_high_base'].tolist()
diff_low_base = df['diff_low_base'].tolist()
abs_avg_1 = df['abs_avg_1'].tolist()
abs_avg_2 = df['abs_avg_2'].tolist()
abs_avg_3 = df['abs_avg_3'].tolist()
abs_avg_4 = df['abs_avg_4'].tolist()
abs_avg_5 = df['abs_avg_5'].tolist()
diff_upper_lower = df['diff_upper_lower'].tolist()
diff_open_lower = df['diff_open_lower'].tolist()
diff_close_upper = df['diff_close_upper'].tolist()
label = df['label'].tolist()
size = len(label)
batch_X, batch_Y = [], []
CHANNEL_SIZE = 4
for i in range(VECTOR_SIZE*CHANNEL_SIZE-1, size):
X = np.zeros((CHANNEL_SIZE, VECTOR_SIZE, VECTOR_SIZE))
s = i - VECTOR_SIZE*CHANNEL_SIZE + 1
e = s+VECTOR_SIZE
for c in range(0, 4):
if c > 0:
s = e
e += VECTOR_SIZE
X[c, 0] = macd[s: e]
X[c, 1] = diff_avg27[s: e]
X[c, 2] = diff_avg3_avg27[s: e]
X[c, 3] = diff_avg3_avg54[s: e]
X[c, 4] = diff_avg6_avg27[s: e]
X[c, 5] = diff_avg6_avg54[s: e]
X[c, 6] = diff_avg9_avg27[s: e]
X[c, 7] = diff_avg9_avg54[s: e]
X[c, 8] = diff_avg12_avg27[s: e]
X[c, 9] = diff_avg12_avg54[s: e]
X[c, 10] = diff_change_lead1[s: e]
X[c, 11] = diff_open_lead1[s: e]
X[c, 12] = diff_close_lead1[s: e]
X[c, 13] = diff_high_lead1[s: e]
X[c, 14] = diff_low_lead1[s: e]
X[c, 15] = rsi[s: e]
X[c, 16] = rsis[s: e]
X[c, 17] = diff_avg54[s: e]
X[c, 18] = diff_change_base[s: e]
X[c, 19] = diff_base_lead1[s: e]
X[c, 20] = diff_open_base[s: e]
X[c, 21] = diff_close_base[s: e]
X[c, 22] = diff_high_base[s: e]
X[c, 23] = diff_low_base[s: e]
X[c, 24] = abs_avg_1[s: e]
X[c, 25] = abs_avg_2[s: e]
X[c, 26] = abs_avg_3[s: e]
X[c, 27] = abs_avg_4[s: e]
X[c, 28] = abs_avg_5[s: e]
X[c, 29] = diff_upper_lower[s: e]
X[c, 30] = diff_open_lower[s: e]
X[c, 31] = diff_close_upper[s: e]
batch_X.append(X)
batch_Y.append(label[i])
batch_X = np.asarray(batch_X)
batch_Y = np.asarray(batch_Y, dtype='int64')
return batch_X, batch_Y
def getDataset3D(self, data, VECTOR_SIZE = 299):
df, minmax_df = self.preprocessData(data)
TOTAL_X, TOTAL_Y = [], []
for key in minmax_df:
if key == "date":
continue
elif key == "label":
TOTAL_Y.append(minmax_df[key].tolist())
else:
TOTAL_X.append(minmax_df[key].tolist())
SIZE_WIDTH = len(TOTAL_X[0])
SIZE_HEIGHT = len(TOTAL_X)
X, Y = [], []
for i in range(VECTOR_SIZE, SIZE_WIDTH):
temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0)
for j in range(SIZE_HEIGHT):
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE:i]
temp_X = np.stack([temp_X, temp_X, temp_X], axis=-1)
X.append(temp_X)
if int(TOTAL_Y[0][i]) == 0:
Y.append([1, 0, 0])
elif int(TOTAL_Y[0][i]) == 0.5:
Y.append([0, 1, 0])
else:
Y.append([0, 0, 1])
X = np.asarray(X)
Y = np.asarray(Y)
return X, Y
if __name__ == "__main__":
PROJECT_HOME = os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(__file__))))))
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
# to check bying
stock_codes = {
# 252670
# 122630
"252670": ['20220801', '20220802', '20220803', '20220804', '20220805']
}
for stock_code in stock_codes:
stock2Vector = Stock2Vector(RESOURCE_PATH)
# data = self.stock2Vector.getTrainData(stock_code, sDate, eDate)
# X, Y = self.stock2Vector.getDataset2D(data)
for given_day in stock_codes[stock_code]:
data, minmax_data = stock2Vector.makeData(given_day, stock_code)
vector = stock2Vector.vectorize(data)
minmax_vector = stock2Vector.vectorize(minmax_data)
print (given_day)
print ("done...")