# -*- coding: utf-8 -*- """ 학습(1~800) / 검증(801~1000) / 테스트(1001~) 구간별 필터 통과(당첨번호가 필터를 통과하는지) 분석. 1_FilterTest_25.py 와 동일한 흐름이며 BallFilter 대신 final_BallFilter.BallFilter 를 사용합니다. 실행: miniconda 환경 ncue 에서 `python final_filterTest.py` (README 참고). """ from __future__ import annotations import datetime import os import time import pandas as pd from final_BallFilter import BallFilter # PROMPT.txt 기준 구간 TRAIN_NO = (1, 800) VALID_NO = (801, 1000) TEST_NO = (1001, 10**9) class FilterTest: def __init__(self, resources_path: str): lotto_json = os.path.join(resources_path, "lotto_history.json") self.ballFilter = BallFilter(lotto_json) def find_filter_method(self, df_ball, filter_ball=None, no_min=None, no_max=None): """no_min~no_max 회차만 역순으로 검사 (None 이면 전체).""" win_count = 0 no_filter_ball = {} filter_dic = {} filter_dic_len = {} filter_dic_1 = {} filter_dic_2 = {} idx_list = list(range(len(df_ball) - 1, 19, -1)) for i in idx_list: no = int(df_ball["no"].iloc[i]) if no_min is not None and no < no_min: continue if no_max is not None and no > no_max: continue answer = df_ball[df_ball["no"] == no].values.tolist()[0] answer = answer[1:7] filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball) filter_type = list(filter_type) size = len(filter_type) if size == 0: win_count += 1 no_filter_ball[no] = answer elif size == 1: key = filter_type[0] filter_dic_1[key] = filter_dic_1.get(key, 0) + 1 elif size == 2: key = ",".join(filter_type) filter_dic_2[key] = filter_dic_2.get(key, 0) + 1 else: if size not in filter_dic_len: filter_dic_len[size] = [] filter_dic_len[size].append(filter_type) for f_t in filter_type: filter_dic[f_t] = filter_dic.get(f_t, 0) + 1 print("\n\t[구간 {}~{}] 필터에 걸리지 않은 회차 (당첨 조합 통과)]".format(no_min, no_max)) print("\tcount: {:,} (통과)".format(len(no_filter_ball))) for no in sorted(no_filter_ball.keys()): print("\t\t>", no, no_filter_ball[no]) return win_count, no_filter_ball def report_split(self, df_ball, name: str, lo: int, hi: int): print("\n" + "=" * 60) print(" {} | 회차 {} ~ {}".format(name, lo, hi)) print("=" * 60) t0 = time.time() wc, _ = self.find_filter_method(df_ball, no_min=lo, no_max=hi) elapsed = datetime.timedelta(seconds=time.time() - t0) span = hi - lo + 1 rate = (wc / span * 100) if span else 0 print("\t처리 시간: {}".format(elapsed)) print("\t통과 회차 수: {} / {} ({:.2f}%)".format(wc, span, rate)) if lo >= TRAIN_NO[0] and hi <= TRAIN_NO[1]: need = max(1, span // 100) print("\t(참고) 100회당 최소 1회 기준 대략 {}회 이상이면 충족".format(need)) if lo >= VALID_NO[0] and hi <= VALID_NO[1]: print("\t(참고) 검증 200회 구간에서 최소 3회 이상이면 요구사항 예시 충족") return wc if __name__ == "__main__": resources_path = os.path.join(os.path.dirname(__file__), "resources") csv_path = os.path.join(resources_path, "lotto_history.txt") df_ball = pd.read_csv(csv_path, header=None) df_ball.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"] ft = FilterTest(resources_path) ft.report_split(df_ball, "학습 TRAIN", TRAIN_NO[0], TRAIN_NO[1]) ft.report_split(df_ball, "검증 VALID", VALID_NO[0], min(VALID_NO[1], int(df_ball["no"].max()))) if int(df_ball["no"].max()) >= TEST_NO[0]: ft.report_split( df_ball, "테스트 TEST", TEST_NO[0], int(df_ball["no"].max()), )