113 lines
4.1 KiB
Python
113 lines
4.1 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
학습(1~800) / 검증(801~1000) / 테스트(1001~) 구간별 필터 통과(당첨번호가 필터를 통과하는지) 분석.
|
|
1_FilterTest_25.py 와 동일한 흐름이며 BallFilter 대신 final_BallFilter.BallFilter 를 사용합니다.
|
|
|
|
실행: miniconda 환경 ncue 에서 `python final_filterTest.py` (README 참고).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import datetime
|
|
import os
|
|
import time
|
|
|
|
import pandas as pd
|
|
|
|
from final_BallFilter import BallFilter
|
|
|
|
# PROMPT.txt 기준 구간
|
|
TRAIN_NO = (1, 800)
|
|
VALID_NO = (801, 1000)
|
|
TEST_NO = (1001, 10**9)
|
|
|
|
|
|
class FilterTest:
|
|
def __init__(self, resources_path: str):
|
|
lotto_json = os.path.join(resources_path, "lotto_history.json")
|
|
self.ballFilter = BallFilter(lotto_json)
|
|
|
|
def find_filter_method(self, df_ball, filter_ball=None, no_min=None, no_max=None):
|
|
"""no_min~no_max 회차만 역순으로 검사 (None 이면 전체)."""
|
|
win_count = 0
|
|
no_filter_ball = {}
|
|
filter_dic = {}
|
|
filter_dic_len = {}
|
|
filter_dic_1 = {}
|
|
filter_dic_2 = {}
|
|
|
|
idx_list = list(range(len(df_ball) - 1, 19, -1))
|
|
for i in idx_list:
|
|
no = int(df_ball["no"].iloc[i])
|
|
if no_min is not None and no < no_min:
|
|
continue
|
|
if no_max is not None and no > no_max:
|
|
continue
|
|
|
|
answer = df_ball[df_ball["no"] == no].values.tolist()[0]
|
|
answer = answer[1:7]
|
|
|
|
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
|
filter_type = list(filter_type)
|
|
size = len(filter_type)
|
|
|
|
if size == 0:
|
|
win_count += 1
|
|
no_filter_ball[no] = answer
|
|
elif size == 1:
|
|
key = filter_type[0]
|
|
filter_dic_1[key] = filter_dic_1.get(key, 0) + 1
|
|
elif size == 2:
|
|
key = ",".join(filter_type)
|
|
filter_dic_2[key] = filter_dic_2.get(key, 0) + 1
|
|
else:
|
|
if size not in filter_dic_len:
|
|
filter_dic_len[size] = []
|
|
filter_dic_len[size].append(filter_type)
|
|
|
|
for f_t in filter_type:
|
|
filter_dic[f_t] = filter_dic.get(f_t, 0) + 1
|
|
|
|
print("\n\t[구간 {}~{}] 필터에 걸리지 않은 회차 (당첨 조합 통과)]".format(no_min, no_max))
|
|
print("\tcount: {:,} (통과)".format(len(no_filter_ball)))
|
|
for no in sorted(no_filter_ball.keys()):
|
|
print("\t\t>", no, no_filter_ball[no])
|
|
|
|
return win_count, no_filter_ball
|
|
|
|
def report_split(self, df_ball, name: str, lo: int, hi: int):
|
|
print("\n" + "=" * 60)
|
|
print(" {} | 회차 {} ~ {}".format(name, lo, hi))
|
|
print("=" * 60)
|
|
t0 = time.time()
|
|
wc, _ = self.find_filter_method(df_ball, no_min=lo, no_max=hi)
|
|
elapsed = datetime.timedelta(seconds=time.time() - t0)
|
|
span = hi - lo + 1
|
|
rate = (wc / span * 100) if span else 0
|
|
print("\t처리 시간: {}".format(elapsed))
|
|
print("\t통과 회차 수: {} / {} ({:.2f}%)".format(wc, span, rate))
|
|
if lo >= TRAIN_NO[0] and hi <= TRAIN_NO[1]:
|
|
need = max(1, span // 100)
|
|
print("\t(참고) 100회당 최소 1회 기준 대략 {}회 이상이면 충족".format(need))
|
|
if lo >= VALID_NO[0] and hi <= VALID_NO[1]:
|
|
print("\t(참고) 검증 200회 구간에서 최소 3회 이상이면 요구사항 예시 충족")
|
|
return wc
|
|
|
|
|
|
if __name__ == "__main__":
|
|
resources_path = os.path.join(os.path.dirname(__file__), "resources")
|
|
csv_path = os.path.join(resources_path, "lotto_history.txt")
|
|
df_ball = pd.read_csv(csv_path, header=None)
|
|
df_ball.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"]
|
|
|
|
ft = FilterTest(resources_path)
|
|
|
|
ft.report_split(df_ball, "학습 TRAIN", TRAIN_NO[0], TRAIN_NO[1])
|
|
ft.report_split(df_ball, "검증 VALID", VALID_NO[0], min(VALID_NO[1], int(df_ball["no"].max())))
|
|
if int(df_ball["no"].max()) >= TEST_NO[0]:
|
|
ft.report_split(
|
|
df_ball,
|
|
"테스트 TEST",
|
|
TEST_NO[0],
|
|
int(df_ball["no"].max()),
|
|
)
|