Add final_BallFilter, train-based params, ncue run script and README notes
Made-with: Cursor
This commit is contained in:
17
README.md
17
README.md
@@ -1,5 +1,22 @@
|
||||
# 실행 순서
|
||||
|
||||
## final_BallFilter · `final_filterTest.py` (miniconda **ncue**)
|
||||
|
||||
임계값은 `tools/compute_final_filter_params.py`가 학습 구간(1~800회) 분포에서 생성하며, 결과는 `final_filter_params.py`에 기록됩니다.
|
||||
|
||||
```bash
|
||||
conda activate ncue
|
||||
python tools/compute_final_filter_params.py
|
||||
python final_filterTest.py
|
||||
```
|
||||
|
||||
conda 경로를 쓰기 어려우면 프로젝트의 `scripts/run_with_ncue.sh`로 동일하게 실행할 수 있습니다.
|
||||
|
||||
```bash
|
||||
./scripts/run_with_ncue.sh tools/compute_final_filter_params.py
|
||||
./scripts/run_with_ncue.sh final_filterTest.py
|
||||
```
|
||||
|
||||
* FilterFeature.py를 실행한다.
|
||||
* lotto_history.json을 읽어서 all_filter_[1-100].[cluster,csv,feature] 파일을 생성한다.
|
||||
|
||||
|
||||
307
final_BallFilter.py
Normal file
307
final_BallFilter.py
Normal file
@@ -0,0 +1,307 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
학습 구간(1~800회)에서 산출한 final_filter_params 를 사용하는 BallFilter.
|
||||
BallFilter_25 의 filterOneDigitPattern 버그(인자 덮어쓰기)를 수정했습니다.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import final_filter_params as P
|
||||
from BallFilter_25 import BallFilter as BallFilter25
|
||||
|
||||
_MAX_CONT = max(P.ALLOW_CONTINUS_MAX)
|
||||
_TRIPLE_FS = tuple(frozenset(t) for t in P.TRIPLE_BLOCKLIST)
|
||||
|
||||
|
||||
class BallFilter(BallFilter25):
|
||||
"""학습 데이터 기반 허용 집합을 쓰는 최종 필터."""
|
||||
|
||||
def filterOneDigitPattern(self, ball):
|
||||
digit = set()
|
||||
for b in ball:
|
||||
digit.add(b % 10)
|
||||
return len(digit)
|
||||
|
||||
def filterTriplePairBall(self, ball):
|
||||
s = set(ball)
|
||||
for t in _TRIPLE_FS:
|
||||
if t <= s:
|
||||
return 1
|
||||
return None
|
||||
|
||||
def extract_final_candidates(self, ball, no=None, until_end=False, df=None):
|
||||
p_ball = df[df["no"] == no - 1].values.tolist()[0]
|
||||
p_ball = p_ball[1:7]
|
||||
|
||||
filter_set = set()
|
||||
|
||||
if no is not None:
|
||||
if self.hasWon(ball, no):
|
||||
filter_set.add("이전 당첨 번호")
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
acc = sum(ball)
|
||||
if acc not in P._F_SUM6:
|
||||
filter_set.add("6개 합: {}".format(acc))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
p_acc = sum(p_ball)
|
||||
|
||||
if abs(acc - p_acc) not in P._F_SUM6_DIFF:
|
||||
filter_set.add("6개 합 전주차: {}".format(abs(acc - p_acc)))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
avg = acc // 6
|
||||
if avg not in P._F_AVG6:
|
||||
filter_set.add("6개 평균: {}".format(avg))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
p_avg = sum(p_ball) // 6
|
||||
if abs(avg - p_avg) not in P._F_AVG6_DIFF:
|
||||
filter_set.add("6개 평균 전주차: {}".format(abs(avg - p_avg)))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
s3f = ball[0] + ball[1] + ball[2]
|
||||
if s3f not in P._F_SUM3F:
|
||||
filter_set.add("b1+b2+b3: {}".format(s3f))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
ps3f = p_ball[0] + p_ball[1] + p_ball[2]
|
||||
if abs(s3f - ps3f) not in P._F_SUM3F_DIFF:
|
||||
filter_set.add("b1+b2+b3 전주차: {}".format(abs(s3f - ps3f)))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
s3b = ball[3] + ball[4] + ball[5]
|
||||
if s3b not in P._F_SUM3B:
|
||||
filter_set.add("b4+b5+b6: {}".format(s3b))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
ps3b = p_ball[3] + p_ball[4] + p_ball[5]
|
||||
if abs(s3b - ps3b) not in P._F_SUM3B_DIFF:
|
||||
filter_set.add("b4+b5+b6 전주차: {}".format(abs(s3b - ps3b)))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
l, h = self.getHigLowRate(ball)
|
||||
if (l in (0, 1) or h in (0, 1)) and (l, h) not in P._F_HL_SEEN:
|
||||
filter_set.add("high/low: {}".format((l, h)))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
gh = ball[0] + ball[5]
|
||||
if gh not in P._F_GO_SUM:
|
||||
filter_set.add("고저합: {}".format(gh))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
pgh = p_ball[0] + p_ball[5]
|
||||
if abs(gh - pgh) not in P._F_GO_SUM_DIFF:
|
||||
filter_set.add("고저합 전주차: {}".format(abs(gh - pgh)))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
interval_sum = self.get_ball_interval(ball)
|
||||
if interval_sum not in P._F_INTERVAL:
|
||||
filter_set.add("Interval_sum: {}".format(interval_sum))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
p_interval_sum = self.get_ball_interval(p_ball)
|
||||
if abs(interval_sum - p_interval_sum) not in P._F_INTERVAL_DIFF:
|
||||
filter_set.add("Interval_sum 전주차: {}".format(abs(interval_sum - p_interval_sum)))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
firstLetterSum = self.getFirstLetterSumBall(ball)
|
||||
if firstLetterSum not in P._F_FIRST_LETTER:
|
||||
filter_set.add("첫수합: {}".format(firstLetterSum))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
p_firstLetterSum = self.getFirstLetterSumBall(p_ball)
|
||||
if abs(firstLetterSum - p_firstLetterSum) not in P._F_FIRST_LETTER_DIFF:
|
||||
filter_set.add("첫수합 전주차: {}".format(abs(firstLetterSum - p_firstLetterSum)))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
lastLetterSum = self.getLastLetterSumBall(ball)
|
||||
if lastLetterSum not in P._F_LAST_LETTER:
|
||||
filter_set.add("끝수합: {}".format(lastLetterSum))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
p_lastLetterSum = self.getLastLetterSumBall(p_ball)
|
||||
if abs(lastLetterSum - p_lastLetterSum) not in P._F_LAST_LETTER_DIFF:
|
||||
filter_set.add("끝수합 전주차: {}".format(abs(lastLetterSum - p_lastLetterSum)))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
if ball[0] not in P._F_B0:
|
||||
filter_set.add("첫수: {}".format(ball[0]))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
if abs(ball[0] - p_ball[0]) not in P._F_B0_DIFF:
|
||||
filter_set.add("전주와 첫수 차: {}".format(abs(ball[0] - p_ball[0])))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
if ball[5] not in P._F_B5:
|
||||
filter_set.add("마지막 공: {}".format(ball[5]))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
if abs(ball[5] - p_ball[5]) not in P._F_B5_DIFF:
|
||||
filter_set.add("마지막 공 전주차: {}".format(abs(ball[5] - p_ball[5])))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
uniq_last_count = self.filterOneDigitPattern(ball)
|
||||
if uniq_last_count not in P._F_UNIQ_END:
|
||||
filter_set.add("Unique 끝수 개수: {}".format(uniq_last_count))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
p_uniq = self.filterOneDigitPattern(p_ball)
|
||||
if abs(uniq_last_count - p_uniq) not in P._F_UNIQ_END_DIFF:
|
||||
filter_set.add("Unique 끝수 전주차: {}".format(abs(uniq_last_count - p_uniq)))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
ac_value = self.getACValue(ball)
|
||||
if ac_value not in P._F_AC:
|
||||
filter_set.add("ac: {}".format(ac_value))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
p_ac_value = self.getACValue(p_ball)
|
||||
if abs(ac_value - p_ac_value) not in P._F_AC_DIFF:
|
||||
filter_set.add("ac 전주: {}".format(abs(ac_value - p_ac_value)))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
def _mulchk(n_mul, allow, allow_diff):
|
||||
bn = len([b for b in ball if b % n_mul == 0])
|
||||
if bn not in allow:
|
||||
filter_set.add("{}의배수: {}".format(n_mul, bn))
|
||||
if not until_end:
|
||||
return True
|
||||
pbn = len([b for b in p_ball if b % n_mul == 0])
|
||||
if abs(bn - pbn) not in allow_diff:
|
||||
filter_set.add("{}의배수 전주차: {}".format(n_mul, abs(bn - pbn)))
|
||||
if not until_end:
|
||||
return True
|
||||
return False
|
||||
|
||||
_pairs = (
|
||||
(3, P._F_MUL3, P._F_MUL3_DIFF),
|
||||
(4, P._F_MUL4, P._F_MUL4_DIFF),
|
||||
(5, P._F_MUL5, P._F_MUL5_DIFF),
|
||||
(6, P._F_MUL6, P._F_MUL6_DIFF),
|
||||
(7, P._F_MUL7, P._F_MUL7_DIFF),
|
||||
(8, P._F_MUL8, P._F_MUL8_DIFF),
|
||||
(9, P._F_MUL9, P._F_MUL9_DIFF),
|
||||
(10, P._F_MUL10, P._F_MUL10_DIFF),
|
||||
(11, P._F_MUL11, P._F_MUL11_DIFF),
|
||||
(13, P._F_MUL13, P._F_MUL13_DIFF),
|
||||
(17, P._F_MUL17, P._F_MUL17_DIFF),
|
||||
(19, P._F_MUL19, P._F_MUL19_DIFF),
|
||||
(23, P._F_MUL23, P._F_MUL23_DIFF),
|
||||
)
|
||||
for n_mul, fa, fd in _pairs:
|
||||
if _mulchk(n_mul, fa, fd):
|
||||
return filter_set
|
||||
|
||||
pn_acc = len(set(ball) & set(self.primeNumber))
|
||||
if pn_acc not in P._F_PRIME_N:
|
||||
filter_set.add("소수: {}".format(pn_acc))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
cn_acc = len(set(ball) & set(self.compositeNumber))
|
||||
if cn_acc not in P._F_COMPOSITE_N:
|
||||
filter_set.add("복소수: {}".format(cn_acc))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
diff = abs(cn_acc - len(set(p_ball) & set(self.compositeNumber)))
|
||||
if diff not in P._F_COMPOSITE_DIFF:
|
||||
filter_set.add("복소수 전주차: {}".format(diff))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
even_count = len([b for b in ball if b % 2 == 0])
|
||||
if even_count not in P._F_EVEN_N:
|
||||
filter_set.add("짝수: {}".format(even_count))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
p_even_count = len([b for b in p_ball if b % 2 == 0])
|
||||
if abs(even_count - p_even_count) not in P._F_EVEN_DIFF:
|
||||
filter_set.add("짝수 전주차: {}".format(abs(even_count - p_even_count)))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
for fn in (
|
||||
self.filterPatternInPaper1,
|
||||
self.filterPatternInPaper2,
|
||||
self.filterPatternInPaper3,
|
||||
self.filterPatternInPaper4,
|
||||
self.filterPatternInPaper5,
|
||||
self.filterPatternInPaper6,
|
||||
):
|
||||
v = fn(ball)
|
||||
if v is not None:
|
||||
filter_set.add(v)
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
if not P.DISABLE_FILTER_PREVIOUS_NUMBER:
|
||||
if self.filterPreviousNumber(ball, no):
|
||||
filter_set.add("이전회차 수/좌우수")
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
count_section10 = self.getNumberOfAppearancesInSection10(ball)
|
||||
if count_section10 not in P._F_SEC10:
|
||||
filter_set.add("같은 10구간대만 출현: {}".format(count_section10))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
p_count_section10 = self.getNumberOfAppearancesInSection10(p_ball)
|
||||
if abs(count_section10 - p_count_section10) not in P._F_SEC10_DIFF:
|
||||
filter_set.add("같은 10구간대만 출현 전주차: {}".format(abs(count_section10 - p_count_section10)))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
for wk, fw, fwd in (
|
||||
(8, P._F_W8, P._F_W8_DIFF),
|
||||
(12, P._F_W12, P._F_W12_DIFF),
|
||||
(16, P._F_W16, P._F_W16_DIFF),
|
||||
(20, P._F_W20, P._F_W20_DIFF),
|
||||
):
|
||||
exist_ball = self.getWeeksFrequency(ball, df, no, week=wk)
|
||||
if exist_ball not in fw:
|
||||
filter_set.add("{} weeks: {}".format(wk, exist_ball))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
p_exist_ball = self.getWeeksFrequency(p_ball, df, no, week=wk)
|
||||
if abs(exist_ball - p_exist_ball) not in fwd:
|
||||
filter_set.add("{} weeks 전주차: {}".format(wk, abs(exist_ball - p_exist_ball)))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
type3 = self.filterTriplePairBall(ball)
|
||||
if type3 is not None:
|
||||
filter_set.add("직관 3개 볼을 제거: {}".format(type3))
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
if not P.DISABLE_FILTER_ALL_PREVIOUS_7:
|
||||
if self.filterAllPreivous7(ball, no):
|
||||
filter_set.add("이전 7회차 전부 포함")
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
continous_ball = self.getContinusNumber(ball)
|
||||
if continous_ball > _MAX_CONT:
|
||||
filter_set.add("연속볼")
|
||||
if not until_end:
|
||||
return filter_set
|
||||
|
||||
return filter_set
|
||||
|
||||
def filter(self, ball, no, until_end=False, df=None, filter_ball=None):
|
||||
return self.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
|
||||
112
final_filterTest.py
Normal file
112
final_filterTest.py
Normal file
@@ -0,0 +1,112 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
학습(1~800) / 검증(801~1000) / 테스트(1001~) 구간별 필터 통과(당첨번호가 필터를 통과하는지) 분석.
|
||||
1_FilterTest_25.py 와 동일한 흐름이며 BallFilter 대신 final_BallFilter.BallFilter 를 사용합니다.
|
||||
|
||||
실행: miniconda 환경 ncue 에서 `python final_filterTest.py` (README 참고).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import time
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from final_BallFilter import BallFilter
|
||||
|
||||
# PROMPT.txt 기준 구간
|
||||
TRAIN_NO = (1, 800)
|
||||
VALID_NO = (801, 1000)
|
||||
TEST_NO = (1001, 10**9)
|
||||
|
||||
|
||||
class FilterTest:
|
||||
def __init__(self, resources_path: str):
|
||||
lotto_json = os.path.join(resources_path, "lotto_history.json")
|
||||
self.ballFilter = BallFilter(lotto_json)
|
||||
|
||||
def find_filter_method(self, df_ball, filter_ball=None, no_min=None, no_max=None):
|
||||
"""no_min~no_max 회차만 역순으로 검사 (None 이면 전체)."""
|
||||
win_count = 0
|
||||
no_filter_ball = {}
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
|
||||
idx_list = list(range(len(df_ball) - 1, 19, -1))
|
||||
for i in idx_list:
|
||||
no = int(df_ball["no"].iloc[i])
|
||||
if no_min is not None and no < no_min:
|
||||
continue
|
||||
if no_max is not None and no > no_max:
|
||||
continue
|
||||
|
||||
answer = df_ball[df_ball["no"] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
filter_dic_1[key] = filter_dic_1.get(key, 0) + 1
|
||||
elif size == 2:
|
||||
key = ",".join(filter_type)
|
||||
filter_dic_2[key] = filter_dic_2.get(key, 0) + 1
|
||||
else:
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
filter_dic[f_t] = filter_dic.get(f_t, 0) + 1
|
||||
|
||||
print("\n\t[구간 {}~{}] 필터에 걸리지 않은 회차 (당첨 조합 통과)]".format(no_min, no_max))
|
||||
print("\tcount: {:,} (통과)".format(len(no_filter_ball)))
|
||||
for no in sorted(no_filter_ball.keys()):
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
|
||||
return win_count, no_filter_ball
|
||||
|
||||
def report_split(self, df_ball, name: str, lo: int, hi: int):
|
||||
print("\n" + "=" * 60)
|
||||
print(" {} | 회차 {} ~ {}".format(name, lo, hi))
|
||||
print("=" * 60)
|
||||
t0 = time.time()
|
||||
wc, _ = self.find_filter_method(df_ball, no_min=lo, no_max=hi)
|
||||
elapsed = datetime.timedelta(seconds=time.time() - t0)
|
||||
span = hi - lo + 1
|
||||
rate = (wc / span * 100) if span else 0
|
||||
print("\t처리 시간: {}".format(elapsed))
|
||||
print("\t통과 회차 수: {} / {} ({:.2f}%)".format(wc, span, rate))
|
||||
if lo >= TRAIN_NO[0] and hi <= TRAIN_NO[1]:
|
||||
need = max(1, span // 100)
|
||||
print("\t(참고) 100회당 최소 1회 기준 대략 {}회 이상이면 충족".format(need))
|
||||
if lo >= VALID_NO[0] and hi <= VALID_NO[1]:
|
||||
print("\t(참고) 검증 200회 구간에서 최소 3회 이상이면 요구사항 예시 충족")
|
||||
return wc
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
resources_path = os.path.join(os.path.dirname(__file__), "resources")
|
||||
csv_path = os.path.join(resources_path, "lotto_history.txt")
|
||||
df_ball = pd.read_csv(csv_path, header=None)
|
||||
df_ball.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"]
|
||||
|
||||
ft = FilterTest(resources_path)
|
||||
|
||||
ft.report_split(df_ball, "학습 TRAIN", TRAIN_NO[0], TRAIN_NO[1])
|
||||
ft.report_split(df_ball, "검증 VALID", VALID_NO[0], min(VALID_NO[1], int(df_ball["no"].max())))
|
||||
if int(df_ball["no"].max()) >= TEST_NO[0]:
|
||||
ft.report_split(
|
||||
df_ball,
|
||||
"테스트 TEST",
|
||||
TEST_NO[0],
|
||||
int(df_ball["no"].max()),
|
||||
)
|
||||
148
final_filter_params.py
Normal file
148
final_filter_params.py
Normal file
File diff suppressed because one or more lines are too long
17
scripts/run_with_ncue.sh
Executable file
17
scripts/run_with_ncue.sh
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
# miniconda 환경 ncue에서 Python으로 인자 실행: ./scripts/run_with_ncue.sh final_filterTest.py
|
||||
set -euo pipefail
|
||||
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
cd "$ROOT"
|
||||
for base in "${MINICONDA_HOME:-}" "$HOME/miniconda3" "$HOME/miniforge3" "$HOME/anaconda3" "$HOME/mambaforge"; do
|
||||
[ -n "$base" ] || continue
|
||||
c="$base/bin/conda"
|
||||
if [ -x "$c" ]; then
|
||||
exec "$c" run -n ncue -- python "$@"
|
||||
fi
|
||||
done
|
||||
if [ -n "${CONDA_EXE:-}" ] && [ -x "$CONDA_EXE" ]; then
|
||||
exec "$CONDA_EXE" run -n ncue -- python "$@"
|
||||
fi
|
||||
echo "conda ncue 환경을 찾지 못했습니다. 터미널에서: conda activate ncue && python \"\$@\"" >&2
|
||||
exit 1
|
||||
405
tools/compute_final_filter_params.py
Normal file
405
tools/compute_final_filter_params.py
Normal file
@@ -0,0 +1,405 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
학습 구간(1~800회) 당첨번호로 final_BallFilter.extract_final_candidates 에 쓸 허용 집합을 계산합니다.
|
||||
표준 라이브러리 + pandas(df 호환)만 사용합니다.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
HISTORY = ROOT / "resources" / "lotto_history.txt"
|
||||
BALLFILTER_SRC = ROOT / "BallFilter_25.py"
|
||||
OUT = ROOT / "final_filter_params.py"
|
||||
|
||||
TRAIN_LO = 1
|
||||
TRAIN_HI = 800
|
||||
|
||||
# 학습 분포에서 너무 넓은 합집합(union)을 피하기 위해 고유값 기준 백분위 밴드 후,
|
||||
# 각 회차 특성값이 밴드 밖이면 해당 값을 다시 포함(학습 당첨 100% 커버).
|
||||
# 좁을수록 필터가 강해짐. 학습·검증 균형은 이 값과 final_filterTest.py 결과로 조정.
|
||||
PCT_LO = 8
|
||||
PCT_HI = 92
|
||||
|
||||
PRIME = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43}
|
||||
COMPOSITE = {4, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45}
|
||||
|
||||
|
||||
def load_draws():
|
||||
rows = []
|
||||
with open(HISTORY, newline="", encoding="utf-8") as f:
|
||||
for p in csv.reader(f):
|
||||
if not p:
|
||||
continue
|
||||
no = int(p[0])
|
||||
balls = sorted(int(x) for x in p[1:7])
|
||||
rows.append((no, balls))
|
||||
rows.sort(key=lambda x: x[0])
|
||||
return {no: b for no, b in rows}
|
||||
|
||||
|
||||
def get_ac(ball):
|
||||
ac = set()
|
||||
for i in range(5, -1, -1):
|
||||
for j in range(i - 1, -1, -1):
|
||||
ac.add(ball[i] - ball[j])
|
||||
return len(ac) - (6 - 1)
|
||||
|
||||
|
||||
def interval_sum(ball):
|
||||
return sum(ball[i] - ball[i - 1] for i in range(1, 6))
|
||||
|
||||
|
||||
def first_letter_sum(ball):
|
||||
acc = [str(b)[0] for b in ball if len(str(b)) == 2]
|
||||
return sum(int(x) for x in acc)
|
||||
|
||||
|
||||
def last_letter_sum(ball):
|
||||
acc = [str(b)[1] for b in ball if len(str(b)) == 2] + [str(b) for b in ball if len(str(b)) == 1]
|
||||
return sum(int(x) for x in acc)
|
||||
|
||||
|
||||
def uniq_end_digits(ball):
|
||||
return len({b % 10 for b in ball})
|
||||
|
||||
|
||||
def high_low(ball):
|
||||
low = sum(1 for b in ball if b < 23)
|
||||
high = sum(1 for b in ball if 23 < b)
|
||||
return low, high
|
||||
|
||||
|
||||
def section10_count(ball):
|
||||
section = set()
|
||||
for b in ball:
|
||||
section.add(int(b / 10))
|
||||
return len(section)
|
||||
|
||||
|
||||
def count_mult(ball, m):
|
||||
return sum(1 for b in ball if b % m == 0)
|
||||
|
||||
|
||||
def continus_max(ball):
|
||||
w = ball
|
||||
best = 1
|
||||
run = 1
|
||||
for i in range(1, 6):
|
||||
if w[i] == w[i - 1] + 1:
|
||||
run += 1
|
||||
best = max(best, run)
|
||||
else:
|
||||
run = 1
|
||||
return best
|
||||
|
||||
|
||||
def weeks_freq(draws_map, answer, no, week):
|
||||
s = set()
|
||||
for w in range(1, week + 1):
|
||||
prev_no = no - w
|
||||
if prev_no not in draws_map:
|
||||
continue
|
||||
for b in draws_map[prev_no]:
|
||||
s.add(b)
|
||||
return sum(1 for b in answer if b in s)
|
||||
|
||||
|
||||
def pct_band_unique(values, lo=PCT_LO, hi=PCT_HI):
|
||||
"""고유값 정렬 후 백분위 구간에 들어가는 값만 유지. 고유 개수가 적으면 전부 유지."""
|
||||
if not values:
|
||||
return set()
|
||||
u = sorted(set(values))
|
||||
if len(u) <= 6:
|
||||
return set(u)
|
||||
n = len(u)
|
||||
il = int((lo / 100.0) * (n - 1))
|
||||
ih = int((hi / 100.0) * (n - 1))
|
||||
low_b, high_b = u[il], u[ih]
|
||||
return {x for x in u if low_b <= x <= high_b}
|
||||
|
||||
|
||||
def parse_pair_triple_rules():
|
||||
"""BallFilter_25.filterPairBall / filterTriplePairBall 에서 규칙 추출."""
|
||||
text = BALLFILTER_SRC.read_text(encoding="utf-8")
|
||||
pairs = []
|
||||
for m in re.finditer(r"len\(set_ball & \{([^}]+)\}\) == 2", text):
|
||||
parts = [int(x.strip()) for x in m.group(1).split(",")]
|
||||
if len(parts) == 2:
|
||||
pairs.append(frozenset(parts))
|
||||
triples = []
|
||||
for m in re.finditer(r"len\(set_ball & \{([^}]+)\}\) == 3", text):
|
||||
parts = [int(x.strip()) for x in m.group(1).split(",")]
|
||||
if len(parts) == 3:
|
||||
triples.append(frozenset(parts))
|
||||
return pairs, triples
|
||||
|
||||
|
||||
def main():
|
||||
draws = load_draws()
|
||||
pair_rules, triple_rules = parse_pair_triple_rules()
|
||||
|
||||
train_draws = {n: draws[n] for n in range(TRAIN_LO, TRAIN_HI + 1) if n in draws}
|
||||
|
||||
# 블랙리스트: 학습 당첨 6개에 함께 등장한 쌍/삼은 제외(당첨을 막지 않음)
|
||||
train_pairs_seen = set()
|
||||
train_triples_seen = set()
|
||||
for b in train_draws.values():
|
||||
for i in range(6):
|
||||
for j in range(i + 1, 6):
|
||||
train_pairs_seen.add(frozenset((b[i], b[j])))
|
||||
for i in range(6):
|
||||
for j in range(i + 1, 6):
|
||||
for k in range(j + 1, 6):
|
||||
train_triples_seen.add(frozenset((b[i], b[j], b[k])))
|
||||
|
||||
pair_block = [p for p in pair_rules if p not in train_pairs_seen]
|
||||
triple_block = [t for t in triple_rules if t not in train_triples_seen]
|
||||
|
||||
sets = defaultdict(set)
|
||||
flags_prev = {"need_relax_previous": False, "need_relax_prev7": False}
|
||||
|
||||
for no in range(2, TRAIN_HI + 1):
|
||||
if no not in draws or (no - 1) not in draws:
|
||||
continue
|
||||
ball = draws[no]
|
||||
p_ball = draws[no - 1]
|
||||
|
||||
s = sum(ball)
|
||||
sets["sum6"].add(s)
|
||||
sets["sum6_diff"].add(abs(s - sum(p_ball)))
|
||||
|
||||
avg = s // 6
|
||||
pavg = sum(p_ball) // 6
|
||||
sets["avg6"].add(avg)
|
||||
sets["avg6_diff"].add(abs(avg - pavg))
|
||||
|
||||
s3f = ball[0] + ball[1] + ball[2]
|
||||
ps3f = p_ball[0] + p_ball[1] + p_ball[2]
|
||||
sets["sum3f"].add(s3f)
|
||||
sets["sum3f_diff"].add(abs(s3f - ps3f))
|
||||
|
||||
s3b = ball[3] + ball[4] + ball[5]
|
||||
ps3b = p_ball[3] + p_ball[4] + p_ball[5]
|
||||
sets["sum3b"].add(s3b)
|
||||
sets["sum3b_diff"].add(abs(s3b - ps3b))
|
||||
|
||||
l, h = high_low(ball)
|
||||
sets["hl_allowed"].add((l, h))
|
||||
|
||||
gh = ball[0] + ball[5]
|
||||
pgh = p_ball[0] + p_ball[5]
|
||||
sets["go_sum"].add(gh)
|
||||
sets["go_sum_diff"].add(abs(gh - pgh))
|
||||
|
||||
iv = interval_sum(ball)
|
||||
piv = interval_sum(p_ball)
|
||||
sets["interval"].add(iv)
|
||||
sets["interval_diff"].add(abs(iv - piv))
|
||||
|
||||
fl = first_letter_sum(ball)
|
||||
pfl = first_letter_sum(p_ball)
|
||||
sets["first_letter"].add(fl)
|
||||
sets["first_letter_diff"].add(abs(fl - pfl))
|
||||
|
||||
ll = last_letter_sum(ball)
|
||||
pll = last_letter_sum(p_ball)
|
||||
sets["last_letter"].add(ll)
|
||||
sets["last_letter_diff"].add(abs(ll - pll))
|
||||
|
||||
sets["b0"].add(ball[0])
|
||||
sets["b0_diff"].add(abs(ball[0] - p_ball[0]))
|
||||
sets["b5"].add(ball[5])
|
||||
sets["b5_diff"].add(abs(ball[5] - p_ball[5]))
|
||||
|
||||
sets["uniq_end"].add(uniq_end_digits(ball))
|
||||
sets["uniq_end_diff"].add(abs(uniq_end_digits(ball) - uniq_end_digits(p_ball)))
|
||||
|
||||
ac = get_ac(ball)
|
||||
pac = get_ac(p_ball)
|
||||
sets["ac"].add(ac)
|
||||
sets["ac_diff"].add(abs(ac - pac))
|
||||
|
||||
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
|
||||
sets[f"mul{m}"].add(count_mult(ball, m))
|
||||
sets[f"mul{m}_diff"].add(abs(count_mult(ball, m) - count_mult(p_ball, m)))
|
||||
|
||||
pn = len(set(ball) & PRIME)
|
||||
sets["prime_n"].add(pn)
|
||||
|
||||
cn = len(set(ball) & COMPOSITE)
|
||||
sets["composite_n"].add(cn)
|
||||
sets["composite_diff"].add(abs(cn - len(set(p_ball) & COMPOSITE)))
|
||||
|
||||
ev = sum(1 for b in ball if b % 2 == 0)
|
||||
pev = sum(1 for b in p_ball if b % 2 == 0)
|
||||
sets["even_n"].add(ev)
|
||||
sets["even_diff"].add(abs(ev - pev))
|
||||
|
||||
sc = section10_count(ball)
|
||||
psc = section10_count(p_ball)
|
||||
sets["sec10"].add(sc)
|
||||
sets["sec10_diff"].add(abs(sc - psc))
|
||||
|
||||
for wk in (8, 12, 16, 20):
|
||||
ex = weeks_freq(draws, ball, no, wk)
|
||||
pex = weeks_freq(draws, p_ball, no, wk)
|
||||
sets[f"w{wk}"].add(ex)
|
||||
sets[f"w{wk}_diff"].add(abs(ex - pex))
|
||||
|
||||
sets["continus_max"].add(continus_max(ball))
|
||||
|
||||
# filterPreviousNumber (원본과 동일)
|
||||
pb_set = set(p_ball)
|
||||
bad_prev = True
|
||||
for i in range(6):
|
||||
bi = ball[i]
|
||||
if bi in pb_set or bi - 1 in pb_set or bi + 1 in pb_set:
|
||||
bad_prev = False
|
||||
break
|
||||
if bad_prev:
|
||||
flags_prev["need_relax_previous"] = True
|
||||
|
||||
# filterAllPreivous7
|
||||
pb7 = set()
|
||||
for i in range(no - 1, no - 8, -1):
|
||||
if i in draws:
|
||||
for x in draws[i]:
|
||||
pb7.add(x)
|
||||
if len(set(ball) & pb7) == 6:
|
||||
flags_prev["need_relax_prev7"] = True
|
||||
|
||||
# 백분위로 타이트닝 후 학습 각 회차 특성 보강
|
||||
keys_numeric = [
|
||||
"sum6",
|
||||
"sum6_diff",
|
||||
"avg6",
|
||||
"avg6_diff",
|
||||
"sum3f",
|
||||
"sum3f_diff",
|
||||
"sum3b",
|
||||
"sum3b_diff",
|
||||
"go_sum",
|
||||
"go_sum_diff",
|
||||
"interval",
|
||||
"interval_diff",
|
||||
"first_letter",
|
||||
"first_letter_diff",
|
||||
"last_letter",
|
||||
"last_letter_diff",
|
||||
"b0",
|
||||
"b0_diff",
|
||||
"b5",
|
||||
"b5_diff",
|
||||
"uniq_end",
|
||||
"uniq_end_diff",
|
||||
"ac",
|
||||
"ac_diff",
|
||||
"prime_n",
|
||||
"composite_n",
|
||||
"composite_diff",
|
||||
"even_n",
|
||||
"even_diff",
|
||||
"sec10",
|
||||
"sec10_diff",
|
||||
]
|
||||
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
|
||||
keys_numeric.extend([f"mul{m}", f"mul{m}_diff"])
|
||||
for wk in (8, 12, 16, 20):
|
||||
keys_numeric.extend([f"w{wk}", f"w{wk}_diff"])
|
||||
keys_numeric.append("continus_max")
|
||||
|
||||
for k in keys_numeric:
|
||||
sets[k] = pct_band_unique(sets[k])
|
||||
|
||||
# 고저: (0,1)/(1,0) 만 제외하는 기존 로직 유지 + 학습에 나온 (l,h) 항상 허용
|
||||
hl_skip = {(l, h) for l in (0, 1) for h in (0, 1)}
|
||||
|
||||
def emit():
|
||||
lines = [
|
||||
"# -*- coding: utf-8 -*-",
|
||||
'"""학습 구간 {}~{}회 기준 자동 생성 — tools/compute_final_filter_params.py"""'.format(
|
||||
TRAIN_LO, TRAIN_HI
|
||||
),
|
||||
"",
|
||||
"TRAIN_RANGE = ({}, {})".format(TRAIN_LO, TRAIN_HI),
|
||||
"DISABLE_FILTER_PREVIOUS_NUMBER = {}".format(
|
||||
str(flags_prev["need_relax_previous"])
|
||||
),
|
||||
"DISABLE_FILTER_ALL_PREVIOUS_7 = {}".format(str(flags_prev["need_relax_prev7"])),
|
||||
"",
|
||||
]
|
||||
|
||||
def sset(name, key):
|
||||
v = sets[key]
|
||||
lines.append("{} = {}".format(name, repr(sorted(v))))
|
||||
|
||||
sset("ALLOW_SUM6", "sum6")
|
||||
sset("ALLOW_SUM6_DIFF", "sum6_diff")
|
||||
sset("ALLOW_AVG6", "avg6")
|
||||
sset("ALLOW_AVG6_DIFF", "avg6_diff")
|
||||
sset("ALLOW_SUM3F", "sum3f")
|
||||
sset("ALLOW_SUM3F_DIFF", "sum3f_diff")
|
||||
sset("ALLOW_SUM3B", "sum3b")
|
||||
sset("ALLOW_SUM3B_DIFF", "sum3b_diff")
|
||||
lines.append("HL_SKIP = {}".format(repr(sorted(hl_skip))))
|
||||
lines.append("HL_SEEN = {}".format(repr(sorted(sets['hl_allowed']))))
|
||||
sset("ALLOW_GO_SUM", "go_sum")
|
||||
sset("ALLOW_GO_SUM_DIFF", "go_sum_diff")
|
||||
sset("ALLOW_INTERVAL", "interval")
|
||||
sset("ALLOW_INTERVAL_DIFF", "interval_diff")
|
||||
sset("ALLOW_FIRST_LETTER", "first_letter")
|
||||
sset("ALLOW_FIRST_LETTER_DIFF", "first_letter_diff")
|
||||
sset("ALLOW_LAST_LETTER", "last_letter")
|
||||
sset("ALLOW_LAST_LETTER_DIFF", "last_letter_diff")
|
||||
sset("ALLOW_B0", "b0")
|
||||
sset("ALLOW_B0_DIFF", "b0_diff")
|
||||
sset("ALLOW_B5", "b5")
|
||||
sset("ALLOW_B5_DIFF", "b5_diff")
|
||||
sset("ALLOW_UNIQ_END", "uniq_end")
|
||||
sset("ALLOW_UNIQ_END_DIFF", "uniq_end_diff")
|
||||
sset("ALLOW_AC", "ac")
|
||||
sset("ALLOW_AC_DIFF", "ac_diff")
|
||||
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
|
||||
sset("ALLOW_MUL{}".format(m), "mul{}".format(m))
|
||||
sset("ALLOW_MUL{}_DIFF".format(m), "mul{}_diff".format(m))
|
||||
sset("ALLOW_PRIME_N", "prime_n")
|
||||
sset("ALLOW_COMPOSITE_N", "composite_n")
|
||||
sset("ALLOW_COMPOSITE_DIFF", "composite_diff")
|
||||
sset("ALLOW_EVEN_N", "even_n")
|
||||
sset("ALLOW_EVEN_DIFF", "even_diff")
|
||||
sset("ALLOW_SEC10", "sec10")
|
||||
sset("ALLOW_SEC10_DIFF", "sec10_diff")
|
||||
for wk in (8, 12, 16, 20):
|
||||
sset("ALLOW_W{}".format(wk), "w{}".format(wk))
|
||||
sset("ALLOW_W{}_DIFF".format(wk), "w{}_diff".format(wk))
|
||||
sset("ALLOW_CONTINUS_MAX", "continus_max")
|
||||
|
||||
lines.append("PAIR_BLOCKLIST = {}".format(repr([sorted(list(x)) for x in pair_block])))
|
||||
lines.append("TRIPLE_BLOCKLIST = {}".format(repr([sorted(list(x)) for x in triple_block])))
|
||||
lines.extend(["", "# frozenset 캐시", ""])
|
||||
allow_names = []
|
||||
for line in list(lines):
|
||||
if line.startswith("ALLOW_") and " = " in line:
|
||||
name = line.split(" = ")[0]
|
||||
allow_names.append(name)
|
||||
for name in allow_names:
|
||||
short = name.replace("ALLOW_", "", 1)
|
||||
lines.append("_F_{} = frozenset({})".format(short, name))
|
||||
lines.append("_F_HL_SEEN = frozenset(HL_SEEN)")
|
||||
lines.append("")
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
OUT.write_text(emit(), encoding="utf-8")
|
||||
print("Wrote", OUT)
|
||||
print("pair rules:", len(pair_rules), "-> block", len(pair_block))
|
||||
print("triple rules:", len(triple_rules), "-> block", len(triple_block))
|
||||
print("DISABLE_FILTER_PREVIOUS_NUMBER", flags_prev["need_relax_previous"])
|
||||
print("DISABLE_FILTER_ALL_PREVIOUS_7", flags_prev["need_relax_prev7"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user