init

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-25 18:32:11 +09:00
commit c611b400ae
40 changed files with 24532 additions and 0 deletions
--- a/test_2.py
+++ b/test_2.py
@@ -0,0 +1,236 @@
+import os
+import argparse
+import pandas as pd
+import itertools
+from filter_model_2 import BallFilter
+import time
+import datetime
+
+class FilterTest:
+
+    ballFilter = None
+
+    def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
+        # test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
+        lottoHistoryFileName = os.path.join(resources_path, history_json)
+        self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
+
+        return
+
+    def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
+        win_count = 0
+
+        no_filter_ball = {}
+
+        printLog = True
+        filter_dic = {}
+        filter_dic_len = {}
+        filter_dic_1 = {}
+        filter_dic_2 = {}
+        # df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
+        for i in range(len(df_ball) - 1, -1, -1):
+
+            no = df_ball['no'].iloc[i]
+            no = int(no)
+            if no < start_no or end_no < no:
+                continue
+            answer = df_ball[df_ball['no'] == no].values.tolist()[0]
+            answer = answer[1:7]
+
+            filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
+            filter_type = list(filter_type)
+            size = len(filter_type)
+
+            if size == 0:
+                win_count += 1
+                no_filter_ball[no] = answer
+                print("\t", no)
+            elif size == 1:
+                key = filter_type[0]
+                if key not in filter_dic_1:
+                    filter_dic_1[key] = 1
+                else:
+                    filter_dic_1[key] += 1
+
+                if printLog:
+                    print("\t", no, filter_type)
+            elif size == 2:
+                key = ','.join(filter_type)
+                if key not in filter_dic_2:
+                    filter_dic_2[key] = 1
+                else:
+                    filter_dic_2[key] += 1
+
+                if printLog:
+                    print("\t", no, filter_type)
+            else:
+                if printLog:
+                    print("\t", no, filter_type)
+
+                # 회차별 필터개수가 적은 것을 정렬하기 위함
+                if size not in filter_dic_len:
+                    filter_dic_len[size] = []
+                filter_dic_len[size].append(filter_type)
+
+            for f_t in filter_type:
+                if f_t not in filter_dic:
+                    filter_dic[f_t] = 1
+                else:
+                    filter_dic[f_t] += 1
+
+        print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
+        sorted_filter_dic_len = sorted(filter_dic_len.keys())
+        for filter_count in sorted_filter_dic_len:
+            for filter_type in filter_dic_len[filter_count]:
+                print("\t\t>{} > {}".format(filter_count, filter_type))
+
+        print("\n\t[걸러진 유일 필터]")
+        sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
+        for i in range(len(sorted_filter_dic_1)):
+            print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
+
+        print("\n\t[2개 필터에 걸린 경우]")
+        sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
+        for i in range(len(sorted_filter_dic_2)):
+            print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
+
+        print("\n\t[Filter 유형 별 걸린 개수]")
+        sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
+        for i in range(len(sorted_filter_dic)):
+            print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
+
+        print("\n\t# 필터에 걸리지 않고 당첨된 회차")
+        total = max(0, end_no - start_no + 1)
+        rate = (100 * len(no_filter_ball) / total) if total else 0.0
+        print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
+        for no in no_filter_ball:
+            print("\t\t>", no, no_filter_ball[no])
+        print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
+
+        return win_count
+
+    def find_final_candidates(self, no, df_ball, filter_ball=None):
+        final_candidates = []
+
+        generation_balls = list(range(1, 46))
+
+        nCr = list(itertools.combinations(generation_balls, 6))
+        for idx, ball in enumerate(nCr):
+
+            if idx % 1000000 == 0:
+                print("   - {} processed...".format(idx))
+
+            if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
+                continue
+
+            filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
+            filter_size = len(filter_type)
+
+            if filter_size:
+                continue
+
+            final_candidates.append(ball)
+
+        return final_candidates
+
+    def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
+
+        win_count = 0
+        for i in range(len(df_ball)-1, 0, -1):
+
+            no = df_ball['no'].iloc[i]
+            answer = df_ball[df_ball['no'] == no].values.tolist()[0]
+            answer = answer[1:7]
+
+            if filter_ball is not None and len(set(answer) & set(filter_ball)):
+                continue
+
+            filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
+
+            if len(filter_type) == 0:
+                win_count += 1
+                print("\t\t>{}. {}".format(no, answer))
+
+        print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
+
+        return
+
+    def validate(self, df_ball, nos=None):
+        win_history = {}
+
+        for no in nos:
+            print(no, "processing...")
+            answer = df_ball[df_ball['no'] == no].values.tolist()[0]
+            answer = answer[1:7]
+
+            generation_balls = list(range(1, 46))
+            nCr = list(itertools.combinations(generation_balls, 6))
+            for idx, ball in enumerate(nCr):
+                if idx % 1000000 == 0:
+                    print("   - {} processed...".format(idx))
+                ball = list(ball)
+                filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
+                if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
+                    win_history[no] = answer
+                    print("win.. no: {}, answer: {}".format(no, str(answer)))
+                    break
+
+        return win_history
+
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--resources", default="resources")
+    parser.add_argument(
+        "--ruleset",
+        default=None,
+        help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
+    )
+    parser.add_argument("--start-no", type=int, default=1001)
+    parser.add_argument("--end-no", type=int, default=1204)
+    args = parser.parse_args()
+
+    resources_path = args.resources
+
+    # 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
+    lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
+    df_ball = pd.read_csv(lottoHistoryFileName, header=None)
+    df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
+
+    filter_ball=[]
+    filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
+
+    print("STEP #1. 필터 방법 추출")
+    start = time.time()
+    win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
+    process_time = datetime.timedelta(seconds=time.time() - start)
+    print("process_time: ", process_time)
+
+    """
+    print("\n\n")
+    no = df_ball['no'].values[-1]
+    ball = df_ball[df_ball['no'] == no].values.tolist()[0]
+    answer = ball[1:7]
+
+    print("STEP #0. 최종 후보 선정")
+    start = time.time()
+    final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
+    process_time = datetime.timedelta(seconds=time.time() - start)
+    print("process_time: ", process_time)
+
+    print("   > size: {}".format(len(final_candidates)))
+    file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
+    with open(file_name, 'w+') as outFp:
+        for ball in final_candidates:
+            ball_str = [str(b) for b in answer]
+            outFp.write("{}\n".format(','.join(ball_str)))
+
+    print('{}회, 정답: {}\n'.format(no, str(answer)))
+    """
+
+    #print("\n\n")
+    #print("STEP #2. 당첨 회수 확인")
+    #filterTest.check_filter_method(df_ball, win_count)
+
+    # 오리지널 버전 (자질 파일에 고정): 당첨 22개