479 lines
18 KiB
Python
479 lines
18 KiB
Python
import os
|
|
import csv
|
|
import copy
|
|
import sqlite3
|
|
import numpy as np
|
|
import pandas as pd
|
|
from datetime import datetime, timedelta
|
|
|
|
from hts.BuySellChecker import BuySellChecker
|
|
from hts.HTS import HTS
|
|
|
|
from stock.analysis.Common import Common
|
|
from stock.analysis.Stochastic import Stochastic
|
|
from stock.analysis.RSI import RSI
|
|
from stock.analysis.MACD import MACD
|
|
from stock.analysis.IchimokuCloud import IchimokuCloud
|
|
|
|
class Stock2Vector(HTS):
|
|
|
|
RESOURCE_PATH = None
|
|
|
|
common = None
|
|
stochastic = None
|
|
rsi = None
|
|
macd = None
|
|
ichimokuCloud = None
|
|
|
|
def __init__(self, RESOURCE_PATH):
|
|
super().__init__(RESOURCE_PATH)
|
|
|
|
self.RESOURCE_PATH = RESOURCE_PATH
|
|
|
|
self.buySellChecker = BuySellChecker()
|
|
|
|
self.common = Common()
|
|
self.stochastic = Stochastic()
|
|
self.rsi = RSI()
|
|
self.macd = MACD()
|
|
self.ichimokuCloud = IchimokuCloud()
|
|
return
|
|
|
|
def analyze(self, result):
|
|
open = result["open"]
|
|
close = result["close"]
|
|
high = result["high"]
|
|
low = result["low"]
|
|
vol = result["vol"]
|
|
|
|
close_df = pd.DataFrame(close)
|
|
avg3_list = close_df.rolling(window=3).mean().fillna(close[0]).values.tolist()
|
|
avg3 = [item[0] for item in avg3_list]
|
|
avg5_list = close_df.rolling(window=5).mean().fillna(close[0]).values.tolist()
|
|
avg5 = [item[0] for item in avg5_list]
|
|
avg10_list = close_df.rolling(window=10).mean().fillna(close[0]).values.tolist()
|
|
avg10 = [item[0] for item in avg10_list]
|
|
avg20_list = close_df.rolling(window=20).mean().fillna(close[0]).values.tolist()
|
|
avg20 = [item[0] for item in avg20_list]
|
|
avg30_list = close_df.rolling(window=30).mean().fillna(close[0]).values.tolist()
|
|
avg30 = [item[0] for item in avg30_list]
|
|
avg60_list = close_df.rolling(window=60).mean().fillna(close[0]).values.tolist()
|
|
avg60 = [item[0] for item in avg60_list]
|
|
|
|
df = pd.DataFrame(close)
|
|
max20 = df.rolling(window=20).mean()
|
|
stddev20 = df.rolling(window=20).std()
|
|
upper_df = max20 + (stddev20 * 2) # 상단 볼린저 밴드
|
|
lower_df = max20 - (stddev20 * 2) # 하단 볼린저 밴드
|
|
|
|
upper, lower = [], []
|
|
for i in range(len(upper_df)):
|
|
if i < 10:
|
|
upper.append(upper_df.values[0][0])
|
|
lower.append(lower_df.values[0][0])
|
|
else:
|
|
upper.append(upper_df.values[i][0])
|
|
lower.append(lower_df.values[i][0])
|
|
|
|
point_temp = result["time"]
|
|
|
|
STOCK = []
|
|
for i in range(len(open)):
|
|
STOCK.append({'volume': vol[i], 'close': close[i], 'open': open[i], 'high': high[i], 'low': low[i],
|
|
'avg3': avg3[i], 'avg5': avg5[i],'avg10': avg10[i],'avg20': avg20[i],'avg30': avg30[i],'avg60': avg60[i]})
|
|
|
|
# stochastic 계산
|
|
stochastic_df = self.stochastic.apply(STOCK, n=30, m=5, t=5)
|
|
stochastic_df = stochastic_df.fillna(100)
|
|
fast_k = stochastic_df['fast_k'].values.tolist()
|
|
slow_k = stochastic_df['slow_k'].values.tolist()
|
|
slow_d = stochastic_df['slow_d'].values.tolist()
|
|
|
|
# macd 계산
|
|
macd_df = self.macd.apply(STOCK, short=12, long=26, t=9)
|
|
macd_df = macd_df.fillna(100)
|
|
macd = macd_df['macd'].values.tolist()
|
|
macds = macd_df['macds'].values.tolist()
|
|
macdo = macd_df['macdo'].values.tolist()
|
|
|
|
# rsi 계산
|
|
rsi_df = self.rsi.apply(STOCK, period=30, window=5)
|
|
rsi_df = rsi_df.fillna(100)
|
|
rsi = rsi_df['rsi'].values.tolist()
|
|
rsis = rsi_df['rsis'].values.tolist()
|
|
|
|
# ichimokuCloud 계산
|
|
# ichimokuCloud_df = self.ichimokuCloud.apply(STOCK, c=9, b=26, l=52)
|
|
# ichimokuCloud_df = rsi_df.fillna(100)
|
|
# changeLine = rsi_df['changeLine'].values.tolist()
|
|
# baseLine = rsi_df['baseLine'].values.tolist()
|
|
# leadingSpan1 = rsi_df['leadingSpan1'].values.tolist()
|
|
# leadingSpan2 = rsi_df['leadingSpan2'].values.tolist()
|
|
|
|
temp = {"date": point_temp,
|
|
"open": open, "high": high, "low": low, "close": close, "volume": vol, "upper": upper, "lower": lower,
|
|
"avg3": avg3, "avg5": avg5, "avg10": avg10, "avg20": avg20, "avg30": avg30, "avg60": avg60,
|
|
"macd": macd, "macds": macds, "macdo": macdo,
|
|
"fast_k": fast_k, "slow_k": slow_k, "slow_d": slow_d,
|
|
"rsi": rsi, "rsis": rsis}
|
|
data = pd.DataFrame(temp)
|
|
df_final_time = pd.DatetimeIndex(point_temp)
|
|
data.index = df_final_time
|
|
|
|
data = data.fillna(0)
|
|
|
|
return data
|
|
|
|
def getRealTime(self, stock_code, today, LAST_DATA=None):
|
|
if LAST_DATA is not None:
|
|
result = copy.deepcopy(LAST_DATA)
|
|
else:
|
|
result = {"check": set(), "time": [], "open": [], "close": [], "high": [], "low": [], "vol": [], "label": []}
|
|
|
|
self.getDBData(stock_code, today, result)
|
|
|
|
return result
|
|
|
|
def makeData(self, today, stock_code):
|
|
LAST_DATA = self.getLastData(stock_code, today)
|
|
|
|
result = self.getRealTime(stock_code, today, LAST_DATA)
|
|
|
|
# 분석을 통해서 볼린저밴드 상/하단을 계산한다.
|
|
df = self.buySellChecker.analyze(result)
|
|
minmax_df1 = (df - df.min()) / (df.max() - df.min())
|
|
minmax_df2 = minmax_df1.drop(["date"], axis="columns")
|
|
minmax_df = minmax_df2.join(df['date'])
|
|
|
|
# 4일치 중에서 앞에 2일은 제거한다.
|
|
date = df['date'].dt.date.unique().tolist()
|
|
df = df[df['date'].dt.date != date[0]]
|
|
df = df[df['date'].dt.date != date[1]]
|
|
minmax_df = minmax_df[minmax_df['date'].dt.date != date[0]]
|
|
minmax_df = minmax_df[minmax_df['date'].dt.date != date[1]]
|
|
|
|
return df, minmax_df
|
|
|
|
def getTrainData(self, stock_code, sDate=None, eDate=None):
|
|
tableName = 'hts'
|
|
conn = sqlite3.connect(os.path.join(self.RESOURCE_PATH, "hts.db"))
|
|
cursor = conn.cursor()
|
|
|
|
if sDate is None and eDate is None:
|
|
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd desc, hms ', (stock_code,))
|
|
else:
|
|
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? and (ymd >= ? and ymd <= ?) order by ymd desc, hms ', (stock_code, sDate, eDate))
|
|
|
|
db_result = cursor.fetchall()
|
|
temp_result = []
|
|
for rows in db_result:
|
|
temp_result.append([rows[0], rows[1], rows[2], rows[3], rows[4], rows[5], rows[6], 0 if rows[7] is None else rows[7]])
|
|
temp_result.sort(key=lambda x: (x[0], x[1]))
|
|
|
|
result = {"check": set(), "time": [], "open": [], "close": [], "high": [], "low": [], "vol": [], "label": []}
|
|
if len(db_result) == 0:
|
|
return result
|
|
|
|
for rows in temp_result:
|
|
ymd = rows[0] # hts.날짜
|
|
hms = rows[1] # hts.시간
|
|
open = rows[2] # hts.시가
|
|
high = rows[3] # hts.고가
|
|
low = rows[4] # hts.저가
|
|
close = rows[5] # hts.종가
|
|
vol = rows[6] # hts.거래량
|
|
label = 0 if rows[7] is None else rows[7] # hts.매매구분
|
|
|
|
temp = datetime.strptime(str(ymd) + " " + str(hms).zfill(4) + "00", '%Y%m%d %H%M%S')
|
|
|
|
result["time"].append(temp)
|
|
result["open"].append(int(open))
|
|
result["close"].append(int(close))
|
|
result["high"].append(int(high))
|
|
result["low"].append(int(low))
|
|
result["vol"].append(int(vol))
|
|
result["label"].append(int(label))
|
|
|
|
return result
|
|
|
|
def preprocessData(self, result):
|
|
# 분석을 통해서 볼린저밴드 상/하단을 계산한다.
|
|
df = self.buySellChecker.analyze(result)
|
|
minmax_df1 = (df - df.min()) / (df.max() - df.min())
|
|
minmax_df2 = minmax_df1.drop(["date"], axis="columns")
|
|
minmax_df = minmax_df2.join(df['date'])
|
|
|
|
minmax_df = minmax_df.fillna(0)
|
|
return df, minmax_df
|
|
|
|
def makeTrainData(self, stock_code):
|
|
result = {"check": set(), "time": [], "open": [], "close": [], "high": [], "low": [], "vol": [], "label": []}
|
|
|
|
tableName = 'hts'
|
|
conn = sqlite3.connect(os.path.join(self.RESOURCE_PATH, "hts.db"))
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute('SELECT ymd, hms, open, high, low, close, volume, label FROM ' + tableName + ' WHERE CODE=? order by ymd, hms', (stock_code,))
|
|
db_result = cursor.fetchall()
|
|
for rows in db_result:
|
|
ymd = rows[0] # hts.날짜
|
|
hms = rows[1] # hts.시간
|
|
open = rows[2] # hts.시가
|
|
high = rows[3] # hts.고가
|
|
low = rows[4] # hts.저가
|
|
close = rows[5] # hts.종가
|
|
vol = rows[6] # hts.거래량
|
|
label = 0 if rows[7] is None else rows[7] # hts.매매구분
|
|
|
|
temp = datetime.strptime(str(ymd) + " " + str(hms).zfill(4) + "00", '%Y%m%d %H%M%S')
|
|
|
|
result["time"].append(temp)
|
|
result["open"].append(int(open))
|
|
result["close"].append(int(close))
|
|
result["high"].append(int(high))
|
|
result["low"].append(int(low))
|
|
result["vol"].append(int(vol))
|
|
result["label"].append(int(label))
|
|
|
|
# 분석을 통해서 볼린저밴드 상/하단을 계산한다.
|
|
df = self.buySellChecker.analyze(result)
|
|
minmax_df1 = (df - df.min()) / (df.max() - df.min())
|
|
minmax_df2 = minmax_df1.drop(["date"], axis="columns")
|
|
minmax_df = minmax_df2.join(df['date'])
|
|
|
|
minmax_df = minmax_df.fillna(0)
|
|
return df, minmax_df
|
|
|
|
def vectorize(self, data):
|
|
vector = []
|
|
for key in data:
|
|
if key == "date":
|
|
continue
|
|
vector.append(data[key].tolist())
|
|
|
|
return np.asarray(vector)
|
|
|
|
def getDataset2D(self, data, VECTOR_SIZE = 381):
|
|
|
|
df, minmax_df = self.preprocessData(data)
|
|
|
|
TOTAL_X, TOTAL_Y = [], []
|
|
for key in minmax_df:
|
|
if key == "date":
|
|
continue
|
|
elif key == "label":
|
|
TOTAL_Y.append(minmax_df[key].tolist())
|
|
else:
|
|
TOTAL_X.append(minmax_df[key].tolist())
|
|
|
|
SIZE_WIDTH = len(TOTAL_X[0])
|
|
SIZE_HEIGHT = len(TOTAL_X)
|
|
X, Y = [], []
|
|
for i in range(VECTOR_SIZE-1, SIZE_WIDTH):
|
|
temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0)
|
|
for j in range(SIZE_HEIGHT):
|
|
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE+1:i+1]
|
|
X.append(temp_X)
|
|
if TOTAL_Y[0][i] == 0:
|
|
Y.append(0)
|
|
elif TOTAL_Y[0][i] == 0.5:
|
|
Y.append(1)
|
|
else:
|
|
Y.append(2)
|
|
|
|
X = np.asarray(X)
|
|
Y = np.asarray(Y, dtype='int64')
|
|
return X, Y
|
|
|
|
def getVectorData(self, data, VECTOR_SIZE = 32):
|
|
return self.getVectorData_2(data, VECTOR_SIZE)
|
|
|
|
def getVectorData_1(self, data, VECTOR_SIZE):
|
|
df = self.buySellChecker.analyze(data)
|
|
|
|
# avg10, 볼린져밴드 상단과 하단의 차이, rsi, avg3만 이용한다.
|
|
# channel1: avg10, channel2: diff_upper_lower, channel3: abs_avg_2, channel4: abs_avg_3
|
|
avg10 = df['avg10'].tolist()
|
|
diff_upper_lower = df['diff_upper_lower'].tolist()
|
|
rsi = df['rsi'].tolist()
|
|
abs_avg_3 = df['abs_avg_3'].tolist()
|
|
|
|
size = len(avg10)
|
|
batch_X, batch_Y = [], []
|
|
X = np.zeros((4, VECTOR_SIZE, VECTOR_SIZE))
|
|
label = df['label'].tolist()
|
|
for i in range(VECTOR_SIZE*VECTOR_SIZE-1, size):
|
|
w, h = 0, 0
|
|
for j in range(i-VECTOR_SIZE*VECTOR_SIZE+1, i+1):
|
|
X[0, h, w] = avg10[j]
|
|
X[1, h, w] = diff_upper_lower[j]
|
|
X[2, h, w] = abs_avg_3[j]
|
|
X[3, h, w] = rsi[j]
|
|
w += 1
|
|
if w >= VECTOR_SIZE:
|
|
w = 0
|
|
h += 1
|
|
if h >= VECTOR_SIZE:
|
|
h = 0
|
|
batch_X.append(X)
|
|
batch_Y.append(label[i])
|
|
"""
|
|
if label[i] == 2:
|
|
batch_Y.append([0, 0, 1])
|
|
elif label[i] == 1:
|
|
batch_Y.append([0, 1, 0])
|
|
else:
|
|
batch_Y.append([1, 0, 0])
|
|
"""
|
|
batch_X = np.asarray(batch_X)
|
|
batch_Y = np.asarray(batch_Y, dtype='int64')
|
|
return batch_X, batch_Y
|
|
|
|
def getVectorData_2(self, data, VECTOR_SIZE = 32):
|
|
df = self.buySellChecker.analyze(data)
|
|
|
|
macd = df['macd'].tolist()
|
|
diff_avg27 = df['diff_avg27'].tolist()
|
|
diff_avg3_avg27 = df['diff_avg3_avg27'].tolist()
|
|
diff_avg3_avg54 = df['diff_avg3_avg54'].tolist()
|
|
diff_avg6_avg27 = df['diff_avg6_avg27'].tolist()
|
|
diff_avg6_avg54 = df['diff_avg6_avg54'].tolist()
|
|
diff_avg9_avg27 = df['diff_avg9_avg27'].tolist()
|
|
diff_avg9_avg54 = df['diff_avg9_avg54'].tolist()
|
|
diff_avg12_avg27 = df['diff_avg12_avg27'].tolist()
|
|
diff_avg12_avg54 = df['diff_avg12_avg54'].tolist()
|
|
diff_change_lead1 = df['diff_change_lead1'].tolist()
|
|
diff_open_lead1 = df['diff_open_lead1'].tolist()
|
|
diff_close_lead1 = df['diff_close_lead1'].tolist()
|
|
diff_high_lead1 = df['diff_high_lead1'].tolist()
|
|
diff_low_lead1 = df['diff_low_lead1'].tolist()
|
|
rsi = df['rsi'].tolist()
|
|
rsis = df['rsis'].tolist()
|
|
diff_avg54 = df['diff_avg54'].tolist()
|
|
diff_change_base = df['diff_change_base'].tolist()
|
|
diff_base_lead1 = df['diff_base_lead1'].tolist()
|
|
diff_open_base = df['diff_open_base'].tolist()
|
|
diff_close_base = df['diff_close_base'].tolist()
|
|
diff_high_base = df['diff_high_base'].tolist()
|
|
diff_low_base = df['diff_low_base'].tolist()
|
|
abs_avg_1 = df['abs_avg_1'].tolist()
|
|
abs_avg_2 = df['abs_avg_2'].tolist()
|
|
abs_avg_3 = df['abs_avg_3'].tolist()
|
|
abs_avg_4 = df['abs_avg_4'].tolist()
|
|
abs_avg_5 = df['abs_avg_5'].tolist()
|
|
diff_upper_lower = df['diff_upper_lower'].tolist()
|
|
diff_open_lower = df['diff_open_lower'].tolist()
|
|
diff_close_upper = df['diff_close_upper'].tolist()
|
|
label = df['label'].tolist()
|
|
|
|
size = len(label)
|
|
batch_X, batch_Y = [], []
|
|
CHANNEL_SIZE = 3
|
|
for i in range(VECTOR_SIZE*CHANNEL_SIZE-1, size):
|
|
X = np.zeros((CHANNEL_SIZE, VECTOR_SIZE, VECTOR_SIZE))
|
|
s = i - VECTOR_SIZE*CHANNEL_SIZE + 1
|
|
e = s+VECTOR_SIZE
|
|
for c in range(0, CHANNEL_SIZE):
|
|
if c > 0:
|
|
s = e
|
|
e += VECTOR_SIZE
|
|
|
|
X[c, 0] = macd[s: e]
|
|
X[c, 1] = diff_avg27[s: e]
|
|
X[c, 2] = diff_avg3_avg27[s: e]
|
|
X[c, 3] = diff_avg3_avg54[s: e]
|
|
X[c, 4] = diff_avg6_avg27[s: e]
|
|
X[c, 5] = diff_avg6_avg54[s: e]
|
|
X[c, 6] = diff_avg9_avg27[s: e]
|
|
X[c, 7] = diff_avg9_avg54[s: e]
|
|
X[c, 8] = diff_avg12_avg27[s: e]
|
|
X[c, 9] = diff_avg12_avg54[s: e]
|
|
X[c, 10] = diff_change_lead1[s: e]
|
|
X[c, 11] = diff_open_lead1[s: e]
|
|
X[c, 12] = diff_close_lead1[s: e]
|
|
X[c, 13] = diff_high_lead1[s: e]
|
|
X[c, 14] = diff_low_lead1[s: e]
|
|
X[c, 15] = rsi[s: e]
|
|
X[c, 16] = rsis[s: e]
|
|
X[c, 17] = diff_avg54[s: e]
|
|
X[c, 18] = diff_change_base[s: e]
|
|
X[c, 19] = diff_base_lead1[s: e]
|
|
X[c, 20] = diff_open_base[s: e]
|
|
X[c, 21] = diff_close_base[s: e]
|
|
X[c, 22] = diff_high_base[s: e]
|
|
X[c, 23] = diff_low_base[s: e]
|
|
X[c, 24] = abs_avg_1[s: e]
|
|
X[c, 25] = abs_avg_2[s: e]
|
|
X[c, 26] = abs_avg_3[s: e]
|
|
X[c, 27] = abs_avg_4[s: e]
|
|
X[c, 28] = abs_avg_5[s: e]
|
|
X[c, 29] = diff_upper_lower[s: e]
|
|
X[c, 30] = diff_open_lower[s: e]
|
|
X[c, 31] = diff_close_upper[s: e]
|
|
|
|
batch_X.append(X)
|
|
batch_Y.append(label[i])
|
|
|
|
batch_X = np.asarray(batch_X)
|
|
batch_Y = np.asarray(batch_Y, dtype='int64')
|
|
return batch_X, batch_Y
|
|
|
|
def getDataset3D(self, data, VECTOR_SIZE = 299):
|
|
df, minmax_df = self.preprocessData(data)
|
|
|
|
TOTAL_X, TOTAL_Y = [], []
|
|
for key in minmax_df:
|
|
if key == "date":
|
|
continue
|
|
elif key == "label":
|
|
TOTAL_Y.append(minmax_df[key].tolist())
|
|
else:
|
|
TOTAL_X.append(minmax_df[key].tolist())
|
|
|
|
SIZE_WIDTH = len(TOTAL_X[0])
|
|
SIZE_HEIGHT = len(TOTAL_X)
|
|
X, Y = [], []
|
|
for i in range(VECTOR_SIZE, SIZE_WIDTH):
|
|
temp_X, temp_Y = np.zeros((VECTOR_SIZE, VECTOR_SIZE)), np.zeros(0)
|
|
for j in range(SIZE_HEIGHT):
|
|
temp_X[j][0:VECTOR_SIZE] = TOTAL_X[j][i-VECTOR_SIZE:i]
|
|
temp_X = np.stack([temp_X, temp_X, temp_X], axis=-1)
|
|
X.append(temp_X)
|
|
if int(TOTAL_Y[0][i]) == 0:
|
|
Y.append([1, 0, 0])
|
|
elif int(TOTAL_Y[0][i]) == 0.5:
|
|
Y.append([0, 1, 0])
|
|
else:
|
|
Y.append([0, 0, 1])
|
|
|
|
X = np.asarray(X)
|
|
Y = np.asarray(Y)
|
|
return X, Y
|
|
|
|
if __name__ == "__main__":
|
|
PROJECT_HOME = os.path.join(os.path.dirname(os.path.join(os.path.dirname(os.path.join(os.path.dirname(__file__))))))
|
|
RESOURCE_PATH = os.path.join(PROJECT_HOME, "resources")
|
|
|
|
# to check bying
|
|
stock_codes = {
|
|
# 252670
|
|
# 122630
|
|
"252670": ['20220801', '20220802', '20220803', '20220804', '20220805']
|
|
}
|
|
|
|
for stock_code in stock_codes:
|
|
stock2Vector = Stock2Vector(RESOURCE_PATH)
|
|
|
|
# data = self.stock2Vector.getTrainData(stock_code, sDate, eDate)
|
|
# X, Y = self.stock2Vector.getDataset2D(data)
|
|
|
|
for given_day in stock_codes[stock_code]:
|
|
data, minmax_data = stock2Vector.makeData(given_day, stock_code)
|
|
vector = stock2Vector.vectorize(data)
|
|
minmax_vector = stock2Vector.vectorize(minmax_data)
|
|
print (given_day)
|
|
|
|
print ("done...")
|
|
|
|
|