Files
DeepStock/stock/analysis/JSDPattern_simulation.py
dsyoon 8b8b89c0a1 init
2024-03-20 19:51:17 +09:00

366 lines
19 KiB
Python

# https://bibot.tistory.com/63
# https://nonmeyet.tistory.com/entry/Python-TALib%EB%A5%BC-%ED%99%9C%EC%9A%A9%ED%95%9C-%EB%B9%84%ED%8A%B8%EC%BD%94%EC%9D%B8%EC%A3%BC%EA%B0%80%EA%B8%B0%EC%88%A0%EB%B6%84%EC%84%9D-%EB%B3%B4%EC%A1%B0%EC%A7%80%ED%91%9C-%EC%B6%94%EA%B0%80
# https://lunadaddy.tistory.com/122
# https://wikidocs.net/186885
import os
from scipy.signal import savgol_filter
import numpy as np
np.seterr(divide='ignore', invalid='ignore')
# https://www.lfd.uci.edu/~gohlke/pythonlibs/#ta-lib
# https://lunadaddy.tistory.com/122
import talib
import pandas as pd
from datetime import datetime, timedelta
from stock.analysis.IchimokuCloud import IchimokuCloud
from sklearn.preprocessing import StandardScaler
from stock.analysis.JSDPattern import JSDPattern
class JSDPattern_simulation (JSDPattern):
def __init__(self, RESOURCE_PATH=None):
super().__init__(RESOURCE_PATH)
return
def analyze(self, result, mins=1):
result["volume"] = [result["volume"][i] if 0 < result["volume"][i] else 1 for i in range(len(result["volume"]))]
# 기본 캔들 정보
open_df = pd.DataFrame(result["open"])
close_df = pd.DataFrame(result["close"])
high_df = pd.DataFrame(result["high"])
low_df = pd.DataFrame(result["low"])
volume_df = pd.DataFrame(result["volume"])
# 중복 제거
ymd_df = pd.DataFrame(result["ymd"])
data_dup = pd.concat([ymd_df, open_df, close_df, high_df, low_df, volume_df], axis=1)
data_dup.columns = ["ymd", "open", "close", "high", "low", "volume"]
data_dup.index = pd.DatetimeIndex(result["ymd"])
data_dup_sorted = data_dup.sort_index(ascending=True)
data_dup_sorted = data_dup_sorted.drop_duplicates()
ymd_df = data_dup_sorted["ymd"]
open_df = data_dup_sorted["open"]
close_df = data_dup_sorted["close"]
high_df = data_dup_sorted["high"]
low_df = data_dup_sorted["low"]
volume_df = data_dup_sorted["volume"]
ymd = ymd_df.tolist()
open = open_df.tolist()
close = close_df.tolist()
high = high_df.tolist()
low = low_df.tolist()
volume = volume_df.tolist()
# ichimokuCloud
df = pd.concat([ymd_df, open_df, close_df, high_df, low_df, volume_df], axis=1)
column_names = ['DATE', 'open', 'close', 'high', 'low', 'volume']
df.columns = column_names
c, b, l, s = 9, 26, 52, 26
# 1. 전환선 = (과거 9일 동안 최고가 + 최저가) / 2
# 당일을 포함한 9일 동안의 최고가와 최저가의 중간 값을 평균으로 나타낸다.
changeLine = (df.high.rolling(c).max() + df.low.rolling(c).min()) / 2
# 2. 기준선 = 과거 26일 동안 최고가 + 최저가) / 2
# 당일을 포함한 26일 동안의 최고가와 최저가의 중간 값을 평균으로 나타낸다.
baseLine = (df.high.rolling(b).max() + df.low.rolling(b).min()) / 2
# 3. 후행스팬 = 현재 close가격의 26일전 반영
laggingSpan = [df.close.values[i + s] for i in range(len(df.close) - s)]
laggingSpan += [None for i in range(s)]
laggingSpan = np.array(laggingSpan)
# 4. 선행스팬 1 = ((기준선 + 전환선) / 2)를 26일 선행하여 배치
# 전환선과 기준선의 평균값을 구해 당일 포함 26일 앞으로 이동시킨 선 (중-단기 구간의 힘을 보여줌)
tmp_leadingSpan1 = (changeLine + baseLine) / 2
""" S: 26일 선행시킴 """
leadingSpan1 = list(tmp_leadingSpan1.values)
for i in range(b - 1):
leadingSpan1.insert(0, None)
""" E: 26일 선행시킴 """
# 5. 선행스팬 2 = ((최근 52일 동안 최고가 + 최저가) / 2)를 26일 선행하여 배치
# 당일을 포함한 52일 동안의 최고가와 최저가의 평균을 26일 앞으로 이동시킨 선 (장기으로 형성된 선이기 때문에 가장 느리게 변함)
tmp_leadingSpan2 = (df.high.rolling(l).max() + df.low.rolling(l).min()) / 2
""" S: 52일 선행시킴 """
leadingSpan2 = list(tmp_leadingSpan2.values)
for i in range(l - 1):
leadingSpan2.insert(0, None)
""" S: 52일 선행시킴 """
baseLine = baseLine.tolist()
changeLine = changeLine.tolist()
laggingSpan = list(laggingSpan)
current_index = len(ymd)
for i in range(51):
if len(ymd) < len(leadingSpan2):
if mins==1440:
ymd.append(ymd[-1] + timedelta(days=1))
else:
ymd.append(ymd[-1] + timedelta(minutes=1))
if len(open) < len(leadingSpan2):
open.append(None)
if len(close) < len(leadingSpan2):
close.append(None)
if len(high) < len(leadingSpan2):
high.append(None)
if len(low) < len(leadingSpan2):
low.append(None)
if len(volume) < len(leadingSpan2):
volume.append(None)
if len(baseLine) < len(leadingSpan2):
baseLine.append(None)
if len(changeLine) < len(leadingSpan2):
changeLine.append(None)
if len(laggingSpan) < len(leadingSpan2):
laggingSpan.append(None)
for i in range(26):
if len(leadingSpan1) < len(leadingSpan2):
leadingSpan1.append(leadingSpan1[-1])
# 9일 신고가
new_high_9 = [0 for c in range(8)] + [1 if (changeLine[c-1] is not None and changeLine[c] is not None and changeLine[c-1] < changeLine[c]) and None not in close[c-8:c+1] and max(close[c-8:c]) < close[c] else 0 for c in range(8, len(close))]
# 26일 신고가
new_high_26 = [0 for c in range(25)] + [1 if (baseLine[c-1] is not None and baseLine[c] is not None and baseLine[c-1] < baseLine[c]) and None not in close[c-8:c+1] and max(close[c-25:c]) < close[c] else 0 for c in range(25, len(close))]
# 33일 신고가
new_high_33 = [0 for c in range(32)] + [1 if (leadingSpan1[c-1] is not None and leadingSpan1[c] is not None and leadingSpan1[c-1] < leadingSpan1[c]) and None not in close[c-8:c+1] and max(close[c-32:c]) < close[c] else 0 for c in range(32, len(close))]
# 52일 신고가
new_high_52 = [0 for c in range(51)] + [1 if (leadingSpan2[c-1] is not None and leadingSpan2[c] is not None and leadingSpan2[c-1] < leadingSpan2[c]) and None not in close[c-8:c+1] and max(close[c-51:c]) < close[c] else 0 for c in range(51, len(close))]
# 9일 신저가
new_low_9 = [0 for c in range(8)] + [1 if (changeLine[c-1] is not None and changeLine[c] is not None and changeLine[c-1] < changeLine[c]) and None not in close[c-8:c+1] and close[c-9] < min(close[c-8:c+1]) else 0 for c in range(8, len(close))]
# 26일 신저가
new_low_26 = [0 for c in range(25)] + [1 if (baseLine[c-1] is not None and baseLine[c] is not None and baseLine[c-1] < baseLine[c]) and None not in close[c-8:c+1] and close[c-26] < min(close[c-25:c+1]) else 0 for c in range(25, len(close))]
# 33일 신저가
new_low_33 = [0 for c in range(32)] + [1 if (leadingSpan1[c-1] is not None and leadingSpan1[c] is not None and leadingSpan1[c-1] < leadingSpan1[c]) and None not in close[c-8:c+1] and close[c-33] < min(close[c-32:c+1]) else 0 for c in range(32, len(close))]
# 52일 신저가
new_low_52 = [0 for c in range(51)] + [1 if (leadingSpan2[c-1] is not None and leadingSpan2[c] is not None and leadingSpan2[c-1] < leadingSpan2[c]) and None not in close[c-8:c+1] and close[c-52] < min(close[c-51:c+1]) else 0 for c in range(51, len(close))]
# 이동 평균
close_df = pd.DataFrame(close)
avg5 = list(np.reshape(close_df.ewm(5).mean().values, -1))
avg10 = list(np.reshape(close_df.ewm(10).mean().values, -1))
avg20 = list(np.reshape(close_df.ewm(20).mean().values, -1))
avg60 = list(np.reshape(close_df.ewm(60).mean().values, -1))
avg90 = list(np.reshape(close_df.ewm(90).mean().values, -1))
avg120 = list(np.reshape(close_df.ewm(120).mean().values, -1))
avg240 = list(np.reshape(close_df.ewm(240).mean().values, -1))
avg360 = list(np.reshape(close_df.ewm(360).mean().values, -1))
avg480 = list(np.reshape(close_df.ewm(480).mean().values, -1))
avg720 = list(np.reshape(close_df.ewm(720).mean().values, -1))
avg1440 = list(np.reshape(close_df.ewm(1440).mean().values, -1))
avg2880 = list(np.reshape(close_df.ewm(2880).mean().values, -1))
np_high, np_low, np_close = np.array(high, dtype=np.float64), np.array(low, dtype=np.float64), np.array(close, dtype=np.float64)
slowk_12_df, slowd_12_df = talib.STOCH(np_high, np_low, np_close, fastk_period=12, slowk_period=5, slowk_matype=0, slowd_period=5, slowd_matype=0)
slowk_26_df, slowd_26_df = talib.STOCH(np_high, np_low, np_close, fastk_period=26, slowk_period=16, slowk_matype=0, slowd_period=16, slowd_matype=0)
slowk_52_df, slowd_52_df = talib.STOCH(np_high, np_low, np_close, fastk_period=52, slowk_period=32, slowk_matype=0, slowd_period=32, slowd_matype=0)
# 최고/최저 위치
loc_240 = [None for i in range(len(close))]
for i in range(240, len(close)):
min_v = np.min(result["close"][i-239:i+1])
max_v = np.max(result["close"][i-239:i+1])
if close[i] is not None:
loc_240[i] = ((close[i] - min_v) / (max_v - min_v))
else:
loc_240[i] = None
loc_240 = pd.DataFrame(loc_240)
loc_240_k = loc_240.to_numpy().reshape(-1)
loc_240_d = loc_240.rolling(20).mean()
loc_240_s = loc_240.rolling(60).mean()
loc_240_d = loc_240_d.to_numpy().reshape(-1)
loc_240_s = loc_240_s.to_numpy().reshape(-1)
# 볼린저 밴드
n, t = 10, 2
max_10 = close_df.rolling(window=n).mean()
stddev_10 = close_df.rolling(window=n).std()
upper_10 = max_10 + (stddev_10 * t) # 상단 볼리저 밴드
lower_10 = max_10 - (stddev_10 * t) # 하단 볼리저 밴드
middle_10 = (upper_10 + lower_10) / 2
upper_10 = list(np.reshape(upper_10.values, -1))
lower_10 = list(np.reshape(lower_10.values, -1))
middle_10 = list(np.reshape(middle_10.values, -1))
n, t = 20, 2
max_20 = close_df.rolling(window=n).mean()
stddev_20 = close_df.rolling(window=n).std()
upper_20 = max_20 + (stddev_20 * t) # 상단 볼리저 밴드
lower_20 = max_20 - (stddev_20 * t) # 하단 볼리저 밴드
middle_20 = (upper_20 + lower_20) / 2
upper_20 = list(np.reshape(upper_20.values, -1))
lower_20 = list(np.reshape(lower_20.values, -1))
middle_20 = list(np.reshape(middle_20.values, -1))
duration = 1440
if mins == 1440:
duration = 360
laggingSpan_close_diff, laggingSpan_close_diff_rate = self.getDiff_Rate(laggingSpan, close, duration=duration)
laggingSpan_changeLine_diff, laggingSpan_changeLine_diff_rate = self.getDiff_Rate(laggingSpan, changeLine, duration=duration)
laggingSpan_baseLine_diff, laggingSpan_baseLine_diff_rate = self.getDiff_Rate(laggingSpan, baseLine, duration=duration)
laggingSpan_leadingSpan1_diff, laggingSpan_leadingSpan1_diff_rate = self.getDiff_Rate(laggingSpan, leadingSpan1, duration=duration)
laggingSpan_leadingSpan2_diff, laggingSpan_leadingSpan2_diff_rate = self.getDiff_Rate(laggingSpan, leadingSpan2, duration=duration)
laggingSpan_avg60_diff, laggingSpan_avg60_diff_rate = self.getDiff_Rate(laggingSpan, avg60, duration=duration)
laggingSpan_lower10_diff, laggingSpan_lower10_diff_rate = self.getDiff_Rate(laggingSpan, lower_10, duration=duration)
laggingSpan_middle10_diff, laggingSpan_middle10_diff_rate = self.getDiff_Rate(laggingSpan, middle_10, duration=duration)
laggingSpan_upper10_diff, laggingSpan_upper10_diff_rate = self.getDiff_Rate(laggingSpan, upper_10, duration=duration)
laggingSpan_lower20_diff, laggingSpan_lower20_diff_rate = self.getDiff_Rate(laggingSpan, lower_20, duration=duration)
laggingSpan_middle20_diff, laggingSpan_middle20_diff_rate = self.getDiff_Rate(laggingSpan, middle_20, duration=duration)
laggingSpan_upper20_diff, laggingSpan_upper20_diff_rate = self.getDiff_Rate(laggingSpan, upper_20, duration=duration)
baseLine_close_diff, baseLine_close_diff_rate = self.getDiff_Rate(baseLine, close, duration=duration)
changeLine_close_diff, changeLine_close_diff_rate = self.getDiff_Rate(changeLine, close, duration=duration)
changeLine_baseLine_diff, changeLine_baseLine_diff_rate = self.getDiff_Rate(changeLine, baseLine, duration=duration)
changeLine_leadingSpan1_diff, changeLine_leadingSpan1_diff_rate = self.getDiff_Rate(changeLine, leadingSpan1, duration=duration)
leadingSpan1_leadingSpan2_diff, leadingSpan1_leadingSpan2_diff_rate = self.getDiff_Rate(leadingSpan1, leadingSpan2, duration=duration)
df_list = [
pd.DataFrame(ymd),
pd.DataFrame(open), pd.DataFrame(close), pd.DataFrame(high), pd.DataFrame(low), pd.DataFrame(volume),
pd.DataFrame(changeLine), pd.DataFrame(baseLine), pd.DataFrame(laggingSpan), pd.DataFrame(leadingSpan1), pd.DataFrame(leadingSpan2),
pd.DataFrame(laggingSpan_close_diff),
pd.DataFrame(laggingSpan_changeLine_diff),
pd.DataFrame(laggingSpan_baseLine_diff),
pd.DataFrame(laggingSpan_leadingSpan1_diff),
pd.DataFrame(laggingSpan_leadingSpan2_diff),
pd.DataFrame(laggingSpan_avg60_diff),
pd.DataFrame(laggingSpan_lower10_diff),
pd.DataFrame(laggingSpan_middle10_diff),
pd.DataFrame(laggingSpan_upper10_diff),
pd.DataFrame(laggingSpan_lower20_diff),
pd.DataFrame(laggingSpan_middle20_diff),
pd.DataFrame(laggingSpan_upper20_diff),
pd.DataFrame(baseLine_close_diff),
pd.DataFrame(changeLine_close_diff),
pd.DataFrame(changeLine_baseLine_diff),
pd.DataFrame(changeLine_leadingSpan1_diff),
pd.DataFrame(leadingSpan1_leadingSpan2_diff),
pd.DataFrame(laggingSpan_close_diff_rate),
pd.DataFrame(laggingSpan_changeLine_diff_rate),
pd.DataFrame(laggingSpan_baseLine_diff_rate),
pd.DataFrame(laggingSpan_leadingSpan1_diff_rate),
pd.DataFrame(laggingSpan_leadingSpan2_diff_rate),
pd.DataFrame(laggingSpan_avg60_diff_rate),
pd.DataFrame(laggingSpan_lower10_diff_rate),
pd.DataFrame(laggingSpan_middle10_diff_rate),
pd.DataFrame(laggingSpan_upper10_diff_rate),
pd.DataFrame(laggingSpan_lower20_diff_rate),
pd.DataFrame(laggingSpan_middle20_diff_rate),
pd.DataFrame(laggingSpan_upper20_diff_rate),
pd.DataFrame(baseLine_close_diff_rate),
pd.DataFrame(changeLine_close_diff_rate),
pd.DataFrame(changeLine_baseLine_diff_rate),
pd.DataFrame(changeLine_leadingSpan1_diff_rate),
pd.DataFrame(leadingSpan1_leadingSpan2_diff_rate),
pd.DataFrame(loc_240_k), pd.DataFrame(loc_240_d), pd.DataFrame(loc_240_s),
pd.DataFrame(avg5), pd.DataFrame(avg10), pd.DataFrame(avg20), pd.DataFrame(avg60), pd.DataFrame(avg90), pd.DataFrame(avg120), pd.DataFrame(avg240), pd.DataFrame(avg360), pd.DataFrame(avg480), pd.DataFrame(avg720), pd.DataFrame(avg1440), pd.DataFrame(avg2880),
pd.DataFrame(upper_10), pd.DataFrame(lower_10), pd.DataFrame(middle_10),
pd.DataFrame(upper_20), pd.DataFrame(lower_20), pd.DataFrame(middle_20),
pd.DataFrame(new_high_9), pd.DataFrame(new_high_26), pd.DataFrame(new_high_33), pd.DataFrame(new_high_52),
pd.DataFrame(new_low_9), pd.DataFrame(new_low_26), pd.DataFrame(new_low_33), pd.DataFrame(new_low_52),
pd.DataFrame(slowk_12_df), pd.DataFrame(slowd_12_df),
pd.DataFrame(slowk_26_df), pd.DataFrame(slowd_26_df),
pd.DataFrame(slowk_52_df), pd.DataFrame(slowd_52_df),
]
data = pd.concat(df_list, axis=1)
column_names = [
'ymd',
'open', 'close', 'high', 'low', 'volume',
'changeLine', 'baseLine', 'laggingSpan', 'leadingSpan1', 'leadingSpan2',
'laggingSpan_close_diff',
'laggingSpan_changeLine_diff',
'laggingSpan_baseLine_diff',
'laggingSpan_leadingSpan1_diff',
'laggingSpan_leadingSpan2_diff',
'laggingSpan_avg60_diff',
'laggingSpan_lower10_diff',
'laggingSpan_middle10_diff',
'laggingSpan_upper10_diff',
'laggingSpan_lower20_diff',
'laggingSpan_middle20_diff',
'laggingSpan_upper20_diff',
'baseLine_close_diff',
'changeLine_close_diff',
'changeLine_baseLine_diff',
'changeLine_leadingSpan1_diff',
'leadingSpan1_leadingSpan2_diff',
'laggingSpan_close_diff_rate',
'laggingSpan_changeLine_diff_rate',
'laggingSpan_baseLine_diff_rate',
'laggingSpan_leadingSpan1_diff_rate',
'laggingSpan_leadingSpan2_diff_rate',
'laggingSpan_avg60_diff_rate',
'laggingSpan_lower10_diff_rate',
'laggingSpan_middle10_diff_rate',
'laggingSpan_upper10_diff_rate',
'laggingSpan_lower20_diff_rate',
'laggingSpan_middle20_diff_rate',
'laggingSpan_upper20_diff_rate',
'baseLine_close_diff_rate',
'changeLine_close_diff_rate',
'changeLine_baseLine_diff_rate',
'changeLine_leadingSpan1_diff_rate',
'leadingSpan1_leadingSpan2_diff_rate',
'loc_240_k', 'loc_240_d', 'loc_240_s',
'avg5', 'avg10', 'avg20', 'avg60', 'avg90', 'avg120', 'avg240', 'avg360', 'avg480', 'avg720', 'avg1440', 'avg2880',
'upper_10', 'lower_10', 'middle_10',
'upper_20', 'lower_20', 'middle_20',
'new_high_9', 'new_high_26', 'new_high_33', 'new_high_52',
'new_low_9', 'new_low_26', 'new_low_33', 'new_low_52',
'slowk_12', 'slowd_12',
'slowk_26', 'slowd_26',
'slowk_52', 'slowd_52',
]
data.columns = column_names
data.index = pd.DatetimeIndex(ymd)
return data, current_index
def getData(self, ticker, ymd=None, get_days=14):
if ymd is None:
result = self.getCoinData(ticker, get_days=get_days)
else:
result = self.getCoinData(ticker, ymd=ymd, get_days=get_days)
if len(result['ymd']) < 1:
return None, None
#result_tic = self.makeTickData(result_m1, mins=minute)
data, current_index = self.analyze(result)
return data, current_index
if __name__ == "__main__":
def min_max_normalize(data):
min_val = min(data)
max_val = max(data)
normalized_data = [(x - min_val) / (max_val - min_val) for x in data]
return normalized_data
# 예시 데이터
original_data = [-4, -3, -2, -1, 0]
normalized_data = min_max_normalize(original_data)
print(np.asarray(normalized_data)-1)
original_data = [0, 2,4,6,8,10]
normalized_data = min_max_normalize(original_data)
print(normalized_data)