DeepLottery/final_filterTest.py

# -*- coding: utf-8 -*-
"""
학습(1~800) / 검증(801~1000) / 테스트(1001~) 구간별 필터 통과(당첨번호가 필터를 통과하는지) 분석.
1_FilterTest_25.py 와 동일한 흐름이며 BallFilter 대신 final_BallFilter.BallFilter 를 사용합니다.

실행: miniconda 환경 ncue 에서 `python final_filterTest.py` (README 참고).
"""
from __future__ import annotations

import datetime
import os
import time

import pandas as pd

from final_BallFilter import BallFilter

# PROMPT.txt 기준 구간
TRAIN_NO = (1, 800)
VALID_NO = (801, 1000)
TEST_NO = (1001, 10**9)


class FilterTest:
    def __init__(self, resources_path: str):
        lotto_json = os.path.join(resources_path, "lotto_history.json")
        self.ballFilter = BallFilter(lotto_json)

    def find_filter_method(self, df_ball, filter_ball=None, no_min=None, no_max=None):
        """no_min~no_max 회차만 역순으로 검사 (None 이면 전체)."""
        win_count = 0
        no_filter_ball = {}
        filter_dic = {}
        filter_dic_len = {}
        filter_dic_1 = {}
        filter_dic_2 = {}

        idx_list = list(range(len(df_ball) - 1, 19, -1))
        for i in idx_list:
            no = int(df_ball["no"].iloc[i])
            if no_min is not None and no < no_min:
                continue
            if no_max is not None and no > no_max:
                continue

            answer = df_ball[df_ball["no"] == no].values.tolist()[0]
            answer = answer[1:7]

            filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
            filter_type = list(filter_type)
            size = len(filter_type)

            if size == 0:
                win_count += 1
                no_filter_ball[no] = answer
            elif size == 1:
                key = filter_type[0]
                filter_dic_1[key] = filter_dic_1.get(key, 0) + 1
            elif size == 2:
                key = ",".join(filter_type)
                filter_dic_2[key] = filter_dic_2.get(key, 0) + 1
            else:
                if size not in filter_dic_len:
                    filter_dic_len[size] = []
                filter_dic_len[size].append(filter_type)

            for f_t in filter_type:
                filter_dic[f_t] = filter_dic.get(f_t, 0) + 1

        print("\n\t[구간 {}~{}] 필터에 걸리지 않은 회차 (당첨 조합 통과)]".format(no_min, no_max))
        print("\tcount: {:,} (통과)".format(len(no_filter_ball)))
        for no in sorted(no_filter_ball.keys()):
            print("\t\t>", no, no_filter_ball[no])

        return win_count, no_filter_ball

    def report_split(self, df_ball, name: str, lo: int, hi: int):
        print("\n" + "=" * 60)
        print(" {} | 회차 {} ~ {}".format(name, lo, hi))
        print("=" * 60)
        t0 = time.time()
        wc, _ = self.find_filter_method(df_ball, no_min=lo, no_max=hi)
        elapsed = datetime.timedelta(seconds=time.time() - t0)
        span = hi - lo + 1
        rate = (wc / span * 100) if span else 0
        print("\t처리 시간: {}".format(elapsed))
        print("\t통과 회차 수: {} / {} ({:.2f}%)".format(wc, span, rate))
        if lo >= TRAIN_NO[0] and hi <= TRAIN_NO[1]:
            need = max(1, span // 100)
            print("\t(참고) 100회당 최소 1회 기준 대략 {}회 이상이면 충족".format(need))
        if lo >= VALID_NO[0] and hi <= VALID_NO[1]:
            print("\t(참고) 검증 200회 구간에서 최소 3회 이상이면 요구사항 예시 충족")
        return wc


if __name__ == "__main__":
    resources_path = os.path.join(os.path.dirname(__file__), "resources")
    csv_path = os.path.join(resources_path, "lotto_history.txt")
    df_ball = pd.read_csv(csv_path, header=None)
    df_ball.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"]

    ft = FilterTest(resources_path)

    ft.report_split(df_ball, "학습 TRAIN", TRAIN_NO[0], TRAIN_NO[1])
    ft.report_split(df_ball, "검증 VALID", VALID_NO[0], min(VALID_NO[1], int(df_ball["no"].max())))
    if int(df_ball["no"].max()) >= TEST_NO[0]:
        ft.report_split(
            df_ball,
            "테스트 TEST",
            TEST_NO[0],
            int(df_ball["no"].max()),
        )