Files
DeepLottery/final_filterTest.py

113 lines
4.1 KiB
Python

# -*- coding: utf-8 -*-
"""
학습(1~800) / 검증(801~1000) / 테스트(1001~) 구간별 필터 통과(당첨번호가 필터를 통과하는지) 분석.
1_FilterTest_25.py 와 동일한 흐름이며 BallFilter 대신 final_BallFilter.BallFilter 를 사용합니다.
실행: miniconda 환경 ncue 에서 `python final_filterTest.py` (README 참고).
"""
from __future__ import annotations
import datetime
import os
import time
import pandas as pd
from final_BallFilter import BallFilter
# PROMPT.txt 기준 구간
TRAIN_NO = (1, 800)
VALID_NO = (801, 1000)
TEST_NO = (1001, 10**9)
class FilterTest:
def __init__(self, resources_path: str):
lotto_json = os.path.join(resources_path, "lotto_history.json")
self.ballFilter = BallFilter(lotto_json)
def find_filter_method(self, df_ball, filter_ball=None, no_min=None, no_max=None):
"""no_min~no_max 회차만 역순으로 검사 (None 이면 전체)."""
win_count = 0
no_filter_ball = {}
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
idx_list = list(range(len(df_ball) - 1, 19, -1))
for i in idx_list:
no = int(df_ball["no"].iloc[i])
if no_min is not None and no < no_min:
continue
if no_max is not None and no > no_max:
continue
answer = df_ball[df_ball["no"] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
elif size == 1:
key = filter_type[0]
filter_dic_1[key] = filter_dic_1.get(key, 0) + 1
elif size == 2:
key = ",".join(filter_type)
filter_dic_2[key] = filter_dic_2.get(key, 0) + 1
else:
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
filter_dic[f_t] = filter_dic.get(f_t, 0) + 1
print("\n\t[구간 {}~{}] 필터에 걸리지 않은 회차 (당첨 조합 통과)]".format(no_min, no_max))
print("\tcount: {:,} (통과)".format(len(no_filter_ball)))
for no in sorted(no_filter_ball.keys()):
print("\t\t>", no, no_filter_ball[no])
return win_count, no_filter_ball
def report_split(self, df_ball, name: str, lo: int, hi: int):
print("\n" + "=" * 60)
print(" {} | 회차 {} ~ {}".format(name, lo, hi))
print("=" * 60)
t0 = time.time()
wc, _ = self.find_filter_method(df_ball, no_min=lo, no_max=hi)
elapsed = datetime.timedelta(seconds=time.time() - t0)
span = hi - lo + 1
rate = (wc / span * 100) if span else 0
print("\t처리 시간: {}".format(elapsed))
print("\t통과 회차 수: {} / {} ({:.2f}%)".format(wc, span, rate))
if lo >= TRAIN_NO[0] and hi <= TRAIN_NO[1]:
need = max(1, span // 100)
print("\t(참고) 100회당 최소 1회 기준 대략 {}회 이상이면 충족".format(need))
if lo >= VALID_NO[0] and hi <= VALID_NO[1]:
print("\t(참고) 검증 200회 구간에서 최소 3회 이상이면 요구사항 예시 충족")
return wc
if __name__ == "__main__":
resources_path = os.path.join(os.path.dirname(__file__), "resources")
csv_path = os.path.join(resources_path, "lotto_history.txt")
df_ball = pd.read_csv(csv_path, header=None)
df_ball.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"]
ft = FilterTest(resources_path)
ft.report_split(df_ball, "학습 TRAIN", TRAIN_NO[0], TRAIN_NO[1])
ft.report_split(df_ball, "검증 VALID", VALID_NO[0], min(VALID_NO[1], int(df_ball["no"].max())))
if int(df_ball["no"].max()) >= TEST_NO[0]:
ft.report_split(
df_ball,
"테스트 TEST",
TEST_NO[0],
int(df_ball["no"].max()),
)