Add final_BallFilter, train-based params, ncue run script and README notes

Made-with: Cursor
This commit is contained in:
2026-04-08 19:18:31 +09:00
parent 2bd4ad8fcb
commit 013206ef67
6 changed files with 1006 additions and 0 deletions

View File

@@ -1,5 +1,22 @@
# 실행 순서
## final_BallFilter · `final_filterTest.py` (miniconda **ncue**)
임계값은 `tools/compute_final_filter_params.py`가 학습 구간(1~800회) 분포에서 생성하며, 결과는 `final_filter_params.py`에 기록됩니다.
```bash
conda activate ncue
python tools/compute_final_filter_params.py
python final_filterTest.py
```
conda 경로를 쓰기 어려우면 프로젝트의 `scripts/run_with_ncue.sh`로 동일하게 실행할 수 있습니다.
```bash
./scripts/run_with_ncue.sh tools/compute_final_filter_params.py
./scripts/run_with_ncue.sh final_filterTest.py
```
* FilterFeature.py를 실행한다.
* lotto_history.json을 읽어서 all_filter_[1-100].[cluster,csv,feature] 파일을 생성한다.

307
final_BallFilter.py Normal file
View File

@@ -0,0 +1,307 @@
# -*- coding: utf-8 -*-
"""
학습 구간(1~800회)에서 산출한 final_filter_params 를 사용하는 BallFilter.
BallFilter_25 의 filterOneDigitPattern 버그(인자 덮어쓰기)를 수정했습니다.
"""
from __future__ import annotations
import final_filter_params as P
from BallFilter_25 import BallFilter as BallFilter25
_MAX_CONT = max(P.ALLOW_CONTINUS_MAX)
_TRIPLE_FS = tuple(frozenset(t) for t in P.TRIPLE_BLOCKLIST)
class BallFilter(BallFilter25):
"""학습 데이터 기반 허용 집합을 쓰는 최종 필터."""
def filterOneDigitPattern(self, ball):
digit = set()
for b in ball:
digit.add(b % 10)
return len(digit)
def filterTriplePairBall(self, ball):
s = set(ball)
for t in _TRIPLE_FS:
if t <= s:
return 1
return None
def extract_final_candidates(self, ball, no=None, until_end=False, df=None):
p_ball = df[df["no"] == no - 1].values.tolist()[0]
p_ball = p_ball[1:7]
filter_set = set()
if no is not None:
if self.hasWon(ball, no):
filter_set.add("이전 당첨 번호")
if not until_end:
return filter_set
acc = sum(ball)
if acc not in P._F_SUM6:
filter_set.add("6개 합: {}".format(acc))
if not until_end:
return filter_set
p_acc = sum(p_ball)
if abs(acc - p_acc) not in P._F_SUM6_DIFF:
filter_set.add("6개 합 전주차: {}".format(abs(acc - p_acc)))
if not until_end:
return filter_set
avg = acc // 6
if avg not in P._F_AVG6:
filter_set.add("6개 평균: {}".format(avg))
if not until_end:
return filter_set
p_avg = sum(p_ball) // 6
if abs(avg - p_avg) not in P._F_AVG6_DIFF:
filter_set.add("6개 평균 전주차: {}".format(abs(avg - p_avg)))
if not until_end:
return filter_set
s3f = ball[0] + ball[1] + ball[2]
if s3f not in P._F_SUM3F:
filter_set.add("b1+b2+b3: {}".format(s3f))
if not until_end:
return filter_set
ps3f = p_ball[0] + p_ball[1] + p_ball[2]
if abs(s3f - ps3f) not in P._F_SUM3F_DIFF:
filter_set.add("b1+b2+b3 전주차: {}".format(abs(s3f - ps3f)))
if not until_end:
return filter_set
s3b = ball[3] + ball[4] + ball[5]
if s3b not in P._F_SUM3B:
filter_set.add("b4+b5+b6: {}".format(s3b))
if not until_end:
return filter_set
ps3b = p_ball[3] + p_ball[4] + p_ball[5]
if abs(s3b - ps3b) not in P._F_SUM3B_DIFF:
filter_set.add("b4+b5+b6 전주차: {}".format(abs(s3b - ps3b)))
if not until_end:
return filter_set
l, h = self.getHigLowRate(ball)
if (l in (0, 1) or h in (0, 1)) and (l, h) not in P._F_HL_SEEN:
filter_set.add("high/low: {}".format((l, h)))
if not until_end:
return filter_set
gh = ball[0] + ball[5]
if gh not in P._F_GO_SUM:
filter_set.add("고저합: {}".format(gh))
if not until_end:
return filter_set
pgh = p_ball[0] + p_ball[5]
if abs(gh - pgh) not in P._F_GO_SUM_DIFF:
filter_set.add("고저합 전주차: {}".format(abs(gh - pgh)))
if not until_end:
return filter_set
interval_sum = self.get_ball_interval(ball)
if interval_sum not in P._F_INTERVAL:
filter_set.add("Interval_sum: {}".format(interval_sum))
if not until_end:
return filter_set
p_interval_sum = self.get_ball_interval(p_ball)
if abs(interval_sum - p_interval_sum) not in P._F_INTERVAL_DIFF:
filter_set.add("Interval_sum 전주차: {}".format(abs(interval_sum - p_interval_sum)))
if not until_end:
return filter_set
firstLetterSum = self.getFirstLetterSumBall(ball)
if firstLetterSum not in P._F_FIRST_LETTER:
filter_set.add("첫수합: {}".format(firstLetterSum))
if not until_end:
return filter_set
p_firstLetterSum = self.getFirstLetterSumBall(p_ball)
if abs(firstLetterSum - p_firstLetterSum) not in P._F_FIRST_LETTER_DIFF:
filter_set.add("첫수합 전주차: {}".format(abs(firstLetterSum - p_firstLetterSum)))
if not until_end:
return filter_set
lastLetterSum = self.getLastLetterSumBall(ball)
if lastLetterSum not in P._F_LAST_LETTER:
filter_set.add("끝수합: {}".format(lastLetterSum))
if not until_end:
return filter_set
p_lastLetterSum = self.getLastLetterSumBall(p_ball)
if abs(lastLetterSum - p_lastLetterSum) not in P._F_LAST_LETTER_DIFF:
filter_set.add("끝수합 전주차: {}".format(abs(lastLetterSum - p_lastLetterSum)))
if not until_end:
return filter_set
if ball[0] not in P._F_B0:
filter_set.add("첫수: {}".format(ball[0]))
if not until_end:
return filter_set
if abs(ball[0] - p_ball[0]) not in P._F_B0_DIFF:
filter_set.add("전주와 첫수 차: {}".format(abs(ball[0] - p_ball[0])))
if not until_end:
return filter_set
if ball[5] not in P._F_B5:
filter_set.add("마지막 공: {}".format(ball[5]))
if not until_end:
return filter_set
if abs(ball[5] - p_ball[5]) not in P._F_B5_DIFF:
filter_set.add("마지막 공 전주차: {}".format(abs(ball[5] - p_ball[5])))
if not until_end:
return filter_set
uniq_last_count = self.filterOneDigitPattern(ball)
if uniq_last_count not in P._F_UNIQ_END:
filter_set.add("Unique 끝수 개수: {}".format(uniq_last_count))
if not until_end:
return filter_set
p_uniq = self.filterOneDigitPattern(p_ball)
if abs(uniq_last_count - p_uniq) not in P._F_UNIQ_END_DIFF:
filter_set.add("Unique 끝수 전주차: {}".format(abs(uniq_last_count - p_uniq)))
if not until_end:
return filter_set
ac_value = self.getACValue(ball)
if ac_value not in P._F_AC:
filter_set.add("ac: {}".format(ac_value))
if not until_end:
return filter_set
p_ac_value = self.getACValue(p_ball)
if abs(ac_value - p_ac_value) not in P._F_AC_DIFF:
filter_set.add("ac 전주: {}".format(abs(ac_value - p_ac_value)))
if not until_end:
return filter_set
def _mulchk(n_mul, allow, allow_diff):
bn = len([b for b in ball if b % n_mul == 0])
if bn not in allow:
filter_set.add("{}의배수: {}".format(n_mul, bn))
if not until_end:
return True
pbn = len([b for b in p_ball if b % n_mul == 0])
if abs(bn - pbn) not in allow_diff:
filter_set.add("{}의배수 전주차: {}".format(n_mul, abs(bn - pbn)))
if not until_end:
return True
return False
_pairs = (
(3, P._F_MUL3, P._F_MUL3_DIFF),
(4, P._F_MUL4, P._F_MUL4_DIFF),
(5, P._F_MUL5, P._F_MUL5_DIFF),
(6, P._F_MUL6, P._F_MUL6_DIFF),
(7, P._F_MUL7, P._F_MUL7_DIFF),
(8, P._F_MUL8, P._F_MUL8_DIFF),
(9, P._F_MUL9, P._F_MUL9_DIFF),
(10, P._F_MUL10, P._F_MUL10_DIFF),
(11, P._F_MUL11, P._F_MUL11_DIFF),
(13, P._F_MUL13, P._F_MUL13_DIFF),
(17, P._F_MUL17, P._F_MUL17_DIFF),
(19, P._F_MUL19, P._F_MUL19_DIFF),
(23, P._F_MUL23, P._F_MUL23_DIFF),
)
for n_mul, fa, fd in _pairs:
if _mulchk(n_mul, fa, fd):
return filter_set
pn_acc = len(set(ball) & set(self.primeNumber))
if pn_acc not in P._F_PRIME_N:
filter_set.add("소수: {}".format(pn_acc))
if not until_end:
return filter_set
cn_acc = len(set(ball) & set(self.compositeNumber))
if cn_acc not in P._F_COMPOSITE_N:
filter_set.add("복소수: {}".format(cn_acc))
if not until_end:
return filter_set
diff = abs(cn_acc - len(set(p_ball) & set(self.compositeNumber)))
if diff not in P._F_COMPOSITE_DIFF:
filter_set.add("복소수 전주차: {}".format(diff))
if not until_end:
return filter_set
even_count = len([b for b in ball if b % 2 == 0])
if even_count not in P._F_EVEN_N:
filter_set.add("짝수: {}".format(even_count))
if not until_end:
return filter_set
p_even_count = len([b for b in p_ball if b % 2 == 0])
if abs(even_count - p_even_count) not in P._F_EVEN_DIFF:
filter_set.add("짝수 전주차: {}".format(abs(even_count - p_even_count)))
if not until_end:
return filter_set
for fn in (
self.filterPatternInPaper1,
self.filterPatternInPaper2,
self.filterPatternInPaper3,
self.filterPatternInPaper4,
self.filterPatternInPaper5,
self.filterPatternInPaper6,
):
v = fn(ball)
if v is not None:
filter_set.add(v)
if not until_end:
return filter_set
if not P.DISABLE_FILTER_PREVIOUS_NUMBER:
if self.filterPreviousNumber(ball, no):
filter_set.add("이전회차 수/좌우수")
if not until_end:
return filter_set
count_section10 = self.getNumberOfAppearancesInSection10(ball)
if count_section10 not in P._F_SEC10:
filter_set.add("같은 10구간대만 출현: {}".format(count_section10))
if not until_end:
return filter_set
p_count_section10 = self.getNumberOfAppearancesInSection10(p_ball)
if abs(count_section10 - p_count_section10) not in P._F_SEC10_DIFF:
filter_set.add("같은 10구간대만 출현 전주차: {}".format(abs(count_section10 - p_count_section10)))
if not until_end:
return filter_set
for wk, fw, fwd in (
(8, P._F_W8, P._F_W8_DIFF),
(12, P._F_W12, P._F_W12_DIFF),
(16, P._F_W16, P._F_W16_DIFF),
(20, P._F_W20, P._F_W20_DIFF),
):
exist_ball = self.getWeeksFrequency(ball, df, no, week=wk)
if exist_ball not in fw:
filter_set.add("{} weeks: {}".format(wk, exist_ball))
if not until_end:
return filter_set
p_exist_ball = self.getWeeksFrequency(p_ball, df, no, week=wk)
if abs(exist_ball - p_exist_ball) not in fwd:
filter_set.add("{} weeks 전주차: {}".format(wk, abs(exist_ball - p_exist_ball)))
if not until_end:
return filter_set
type3 = self.filterTriplePairBall(ball)
if type3 is not None:
filter_set.add("직관 3개 볼을 제거: {}".format(type3))
if not until_end:
return filter_set
if not P.DISABLE_FILTER_ALL_PREVIOUS_7:
if self.filterAllPreivous7(ball, no):
filter_set.add("이전 7회차 전부 포함")
if not until_end:
return filter_set
continous_ball = self.getContinusNumber(ball)
if continous_ball > _MAX_CONT:
filter_set.add("연속볼")
if not until_end:
return filter_set
return filter_set
def filter(self, ball, no, until_end=False, df=None, filter_ball=None):
return self.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)

112
final_filterTest.py Normal file
View File

@@ -0,0 +1,112 @@
# -*- coding: utf-8 -*-
"""
학습(1~800) / 검증(801~1000) / 테스트(1001~) 구간별 필터 통과(당첨번호가 필터를 통과하는지) 분석.
1_FilterTest_25.py 와 동일한 흐름이며 BallFilter 대신 final_BallFilter.BallFilter 를 사용합니다.
실행: miniconda 환경 ncue 에서 `python final_filterTest.py` (README 참고).
"""
from __future__ import annotations
import datetime
import os
import time
import pandas as pd
from final_BallFilter import BallFilter
# PROMPT.txt 기준 구간
TRAIN_NO = (1, 800)
VALID_NO = (801, 1000)
TEST_NO = (1001, 10**9)
class FilterTest:
def __init__(self, resources_path: str):
lotto_json = os.path.join(resources_path, "lotto_history.json")
self.ballFilter = BallFilter(lotto_json)
def find_filter_method(self, df_ball, filter_ball=None, no_min=None, no_max=None):
"""no_min~no_max 회차만 역순으로 검사 (None 이면 전체)."""
win_count = 0
no_filter_ball = {}
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
idx_list = list(range(len(df_ball) - 1, 19, -1))
for i in idx_list:
no = int(df_ball["no"].iloc[i])
if no_min is not None and no < no_min:
continue
if no_max is not None and no > no_max:
continue
answer = df_ball[df_ball["no"] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
elif size == 1:
key = filter_type[0]
filter_dic_1[key] = filter_dic_1.get(key, 0) + 1
elif size == 2:
key = ",".join(filter_type)
filter_dic_2[key] = filter_dic_2.get(key, 0) + 1
else:
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
filter_dic[f_t] = filter_dic.get(f_t, 0) + 1
print("\n\t[구간 {}~{}] 필터에 걸리지 않은 회차 (당첨 조합 통과)]".format(no_min, no_max))
print("\tcount: {:,} (통과)".format(len(no_filter_ball)))
for no in sorted(no_filter_ball.keys()):
print("\t\t>", no, no_filter_ball[no])
return win_count, no_filter_ball
def report_split(self, df_ball, name: str, lo: int, hi: int):
print("\n" + "=" * 60)
print(" {} | 회차 {} ~ {}".format(name, lo, hi))
print("=" * 60)
t0 = time.time()
wc, _ = self.find_filter_method(df_ball, no_min=lo, no_max=hi)
elapsed = datetime.timedelta(seconds=time.time() - t0)
span = hi - lo + 1
rate = (wc / span * 100) if span else 0
print("\t처리 시간: {}".format(elapsed))
print("\t통과 회차 수: {} / {} ({:.2f}%)".format(wc, span, rate))
if lo >= TRAIN_NO[0] and hi <= TRAIN_NO[1]:
need = max(1, span // 100)
print("\t(참고) 100회당 최소 1회 기준 대략 {}회 이상이면 충족".format(need))
if lo >= VALID_NO[0] and hi <= VALID_NO[1]:
print("\t(참고) 검증 200회 구간에서 최소 3회 이상이면 요구사항 예시 충족")
return wc
if __name__ == "__main__":
resources_path = os.path.join(os.path.dirname(__file__), "resources")
csv_path = os.path.join(resources_path, "lotto_history.txt")
df_ball = pd.read_csv(csv_path, header=None)
df_ball.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"]
ft = FilterTest(resources_path)
ft.report_split(df_ball, "학습 TRAIN", TRAIN_NO[0], TRAIN_NO[1])
ft.report_split(df_ball, "검증 VALID", VALID_NO[0], min(VALID_NO[1], int(df_ball["no"].max())))
if int(df_ball["no"].max()) >= TEST_NO[0]:
ft.report_split(
df_ball,
"테스트 TEST",
TEST_NO[0],
int(df_ball["no"].max()),
)

148
final_filter_params.py Normal file

File diff suppressed because one or more lines are too long

17
scripts/run_with_ncue.sh Executable file
View File

@@ -0,0 +1,17 @@
#!/usr/bin/env bash
# miniconda 환경 ncue에서 Python으로 인자 실행: ./scripts/run_with_ncue.sh final_filterTest.py
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$ROOT"
for base in "${MINICONDA_HOME:-}" "$HOME/miniconda3" "$HOME/miniforge3" "$HOME/anaconda3" "$HOME/mambaforge"; do
[ -n "$base" ] || continue
c="$base/bin/conda"
if [ -x "$c" ]; then
exec "$c" run -n ncue -- python "$@"
fi
done
if [ -n "${CONDA_EXE:-}" ] && [ -x "$CONDA_EXE" ]; then
exec "$CONDA_EXE" run -n ncue -- python "$@"
fi
echo "conda ncue 환경을 찾지 못했습니다. 터미널에서: conda activate ncue && python \"\$@\"" >&2
exit 1

View File

@@ -0,0 +1,405 @@
#!/usr/bin/env python3
"""
학습 구간(1~800회) 당첨번호로 final_BallFilter.extract_final_candidates 에 쓸 허용 집합을 계산합니다.
표준 라이브러리 + pandas(df 호환)만 사용합니다.
"""
from __future__ import annotations
import csv
import re
from collections import defaultdict
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
HISTORY = ROOT / "resources" / "lotto_history.txt"
BALLFILTER_SRC = ROOT / "BallFilter_25.py"
OUT = ROOT / "final_filter_params.py"
TRAIN_LO = 1
TRAIN_HI = 800
# 학습 분포에서 너무 넓은 합집합(union)을 피하기 위해 고유값 기준 백분위 밴드 후,
# 각 회차 특성값이 밴드 밖이면 해당 값을 다시 포함(학습 당첨 100% 커버).
# 좁을수록 필터가 강해짐. 학습·검증 균형은 이 값과 final_filterTest.py 결과로 조정.
PCT_LO = 8
PCT_HI = 92
PRIME = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43}
COMPOSITE = {4, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45}
def load_draws():
rows = []
with open(HISTORY, newline="", encoding="utf-8") as f:
for p in csv.reader(f):
if not p:
continue
no = int(p[0])
balls = sorted(int(x) for x in p[1:7])
rows.append((no, balls))
rows.sort(key=lambda x: x[0])
return {no: b for no, b in rows}
def get_ac(ball):
ac = set()
for i in range(5, -1, -1):
for j in range(i - 1, -1, -1):
ac.add(ball[i] - ball[j])
return len(ac) - (6 - 1)
def interval_sum(ball):
return sum(ball[i] - ball[i - 1] for i in range(1, 6))
def first_letter_sum(ball):
acc = [str(b)[0] for b in ball if len(str(b)) == 2]
return sum(int(x) for x in acc)
def last_letter_sum(ball):
acc = [str(b)[1] for b in ball if len(str(b)) == 2] + [str(b) for b in ball if len(str(b)) == 1]
return sum(int(x) for x in acc)
def uniq_end_digits(ball):
return len({b % 10 for b in ball})
def high_low(ball):
low = sum(1 for b in ball if b < 23)
high = sum(1 for b in ball if 23 < b)
return low, high
def section10_count(ball):
section = set()
for b in ball:
section.add(int(b / 10))
return len(section)
def count_mult(ball, m):
return sum(1 for b in ball if b % m == 0)
def continus_max(ball):
w = ball
best = 1
run = 1
for i in range(1, 6):
if w[i] == w[i - 1] + 1:
run += 1
best = max(best, run)
else:
run = 1
return best
def weeks_freq(draws_map, answer, no, week):
s = set()
for w in range(1, week + 1):
prev_no = no - w
if prev_no not in draws_map:
continue
for b in draws_map[prev_no]:
s.add(b)
return sum(1 for b in answer if b in s)
def pct_band_unique(values, lo=PCT_LO, hi=PCT_HI):
"""고유값 정렬 후 백분위 구간에 들어가는 값만 유지. 고유 개수가 적으면 전부 유지."""
if not values:
return set()
u = sorted(set(values))
if len(u) <= 6:
return set(u)
n = len(u)
il = int((lo / 100.0) * (n - 1))
ih = int((hi / 100.0) * (n - 1))
low_b, high_b = u[il], u[ih]
return {x for x in u if low_b <= x <= high_b}
def parse_pair_triple_rules():
"""BallFilter_25.filterPairBall / filterTriplePairBall 에서 규칙 추출."""
text = BALLFILTER_SRC.read_text(encoding="utf-8")
pairs = []
for m in re.finditer(r"len\(set_ball & \{([^}]+)\}\) == 2", text):
parts = [int(x.strip()) for x in m.group(1).split(",")]
if len(parts) == 2:
pairs.append(frozenset(parts))
triples = []
for m in re.finditer(r"len\(set_ball & \{([^}]+)\}\) == 3", text):
parts = [int(x.strip()) for x in m.group(1).split(",")]
if len(parts) == 3:
triples.append(frozenset(parts))
return pairs, triples
def main():
draws = load_draws()
pair_rules, triple_rules = parse_pair_triple_rules()
train_draws = {n: draws[n] for n in range(TRAIN_LO, TRAIN_HI + 1) if n in draws}
# 블랙리스트: 학습 당첨 6개에 함께 등장한 쌍/삼은 제외(당첨을 막지 않음)
train_pairs_seen = set()
train_triples_seen = set()
for b in train_draws.values():
for i in range(6):
for j in range(i + 1, 6):
train_pairs_seen.add(frozenset((b[i], b[j])))
for i in range(6):
for j in range(i + 1, 6):
for k in range(j + 1, 6):
train_triples_seen.add(frozenset((b[i], b[j], b[k])))
pair_block = [p for p in pair_rules if p not in train_pairs_seen]
triple_block = [t for t in triple_rules if t not in train_triples_seen]
sets = defaultdict(set)
flags_prev = {"need_relax_previous": False, "need_relax_prev7": False}
for no in range(2, TRAIN_HI + 1):
if no not in draws or (no - 1) not in draws:
continue
ball = draws[no]
p_ball = draws[no - 1]
s = sum(ball)
sets["sum6"].add(s)
sets["sum6_diff"].add(abs(s - sum(p_ball)))
avg = s // 6
pavg = sum(p_ball) // 6
sets["avg6"].add(avg)
sets["avg6_diff"].add(abs(avg - pavg))
s3f = ball[0] + ball[1] + ball[2]
ps3f = p_ball[0] + p_ball[1] + p_ball[2]
sets["sum3f"].add(s3f)
sets["sum3f_diff"].add(abs(s3f - ps3f))
s3b = ball[3] + ball[4] + ball[5]
ps3b = p_ball[3] + p_ball[4] + p_ball[5]
sets["sum3b"].add(s3b)
sets["sum3b_diff"].add(abs(s3b - ps3b))
l, h = high_low(ball)
sets["hl_allowed"].add((l, h))
gh = ball[0] + ball[5]
pgh = p_ball[0] + p_ball[5]
sets["go_sum"].add(gh)
sets["go_sum_diff"].add(abs(gh - pgh))
iv = interval_sum(ball)
piv = interval_sum(p_ball)
sets["interval"].add(iv)
sets["interval_diff"].add(abs(iv - piv))
fl = first_letter_sum(ball)
pfl = first_letter_sum(p_ball)
sets["first_letter"].add(fl)
sets["first_letter_diff"].add(abs(fl - pfl))
ll = last_letter_sum(ball)
pll = last_letter_sum(p_ball)
sets["last_letter"].add(ll)
sets["last_letter_diff"].add(abs(ll - pll))
sets["b0"].add(ball[0])
sets["b0_diff"].add(abs(ball[0] - p_ball[0]))
sets["b5"].add(ball[5])
sets["b5_diff"].add(abs(ball[5] - p_ball[5]))
sets["uniq_end"].add(uniq_end_digits(ball))
sets["uniq_end_diff"].add(abs(uniq_end_digits(ball) - uniq_end_digits(p_ball)))
ac = get_ac(ball)
pac = get_ac(p_ball)
sets["ac"].add(ac)
sets["ac_diff"].add(abs(ac - pac))
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
sets[f"mul{m}"].add(count_mult(ball, m))
sets[f"mul{m}_diff"].add(abs(count_mult(ball, m) - count_mult(p_ball, m)))
pn = len(set(ball) & PRIME)
sets["prime_n"].add(pn)
cn = len(set(ball) & COMPOSITE)
sets["composite_n"].add(cn)
sets["composite_diff"].add(abs(cn - len(set(p_ball) & COMPOSITE)))
ev = sum(1 for b in ball if b % 2 == 0)
pev = sum(1 for b in p_ball if b % 2 == 0)
sets["even_n"].add(ev)
sets["even_diff"].add(abs(ev - pev))
sc = section10_count(ball)
psc = section10_count(p_ball)
sets["sec10"].add(sc)
sets["sec10_diff"].add(abs(sc - psc))
for wk in (8, 12, 16, 20):
ex = weeks_freq(draws, ball, no, wk)
pex = weeks_freq(draws, p_ball, no, wk)
sets[f"w{wk}"].add(ex)
sets[f"w{wk}_diff"].add(abs(ex - pex))
sets["continus_max"].add(continus_max(ball))
# filterPreviousNumber (원본과 동일)
pb_set = set(p_ball)
bad_prev = True
for i in range(6):
bi = ball[i]
if bi in pb_set or bi - 1 in pb_set or bi + 1 in pb_set:
bad_prev = False
break
if bad_prev:
flags_prev["need_relax_previous"] = True
# filterAllPreivous7
pb7 = set()
for i in range(no - 1, no - 8, -1):
if i in draws:
for x in draws[i]:
pb7.add(x)
if len(set(ball) & pb7) == 6:
flags_prev["need_relax_prev7"] = True
# 백분위로 타이트닝 후 학습 각 회차 특성 보강
keys_numeric = [
"sum6",
"sum6_diff",
"avg6",
"avg6_diff",
"sum3f",
"sum3f_diff",
"sum3b",
"sum3b_diff",
"go_sum",
"go_sum_diff",
"interval",
"interval_diff",
"first_letter",
"first_letter_diff",
"last_letter",
"last_letter_diff",
"b0",
"b0_diff",
"b5",
"b5_diff",
"uniq_end",
"uniq_end_diff",
"ac",
"ac_diff",
"prime_n",
"composite_n",
"composite_diff",
"even_n",
"even_diff",
"sec10",
"sec10_diff",
]
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
keys_numeric.extend([f"mul{m}", f"mul{m}_diff"])
for wk in (8, 12, 16, 20):
keys_numeric.extend([f"w{wk}", f"w{wk}_diff"])
keys_numeric.append("continus_max")
for k in keys_numeric:
sets[k] = pct_band_unique(sets[k])
# 고저: (0,1)/(1,0) 만 제외하는 기존 로직 유지 + 학습에 나온 (l,h) 항상 허용
hl_skip = {(l, h) for l in (0, 1) for h in (0, 1)}
def emit():
lines = [
"# -*- coding: utf-8 -*-",
'"""학습 구간 {}~{}회 기준 자동 생성 — tools/compute_final_filter_params.py"""'.format(
TRAIN_LO, TRAIN_HI
),
"",
"TRAIN_RANGE = ({}, {})".format(TRAIN_LO, TRAIN_HI),
"DISABLE_FILTER_PREVIOUS_NUMBER = {}".format(
str(flags_prev["need_relax_previous"])
),
"DISABLE_FILTER_ALL_PREVIOUS_7 = {}".format(str(flags_prev["need_relax_prev7"])),
"",
]
def sset(name, key):
v = sets[key]
lines.append("{} = {}".format(name, repr(sorted(v))))
sset("ALLOW_SUM6", "sum6")
sset("ALLOW_SUM6_DIFF", "sum6_diff")
sset("ALLOW_AVG6", "avg6")
sset("ALLOW_AVG6_DIFF", "avg6_diff")
sset("ALLOW_SUM3F", "sum3f")
sset("ALLOW_SUM3F_DIFF", "sum3f_diff")
sset("ALLOW_SUM3B", "sum3b")
sset("ALLOW_SUM3B_DIFF", "sum3b_diff")
lines.append("HL_SKIP = {}".format(repr(sorted(hl_skip))))
lines.append("HL_SEEN = {}".format(repr(sorted(sets['hl_allowed']))))
sset("ALLOW_GO_SUM", "go_sum")
sset("ALLOW_GO_SUM_DIFF", "go_sum_diff")
sset("ALLOW_INTERVAL", "interval")
sset("ALLOW_INTERVAL_DIFF", "interval_diff")
sset("ALLOW_FIRST_LETTER", "first_letter")
sset("ALLOW_FIRST_LETTER_DIFF", "first_letter_diff")
sset("ALLOW_LAST_LETTER", "last_letter")
sset("ALLOW_LAST_LETTER_DIFF", "last_letter_diff")
sset("ALLOW_B0", "b0")
sset("ALLOW_B0_DIFF", "b0_diff")
sset("ALLOW_B5", "b5")
sset("ALLOW_B5_DIFF", "b5_diff")
sset("ALLOW_UNIQ_END", "uniq_end")
sset("ALLOW_UNIQ_END_DIFF", "uniq_end_diff")
sset("ALLOW_AC", "ac")
sset("ALLOW_AC_DIFF", "ac_diff")
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
sset("ALLOW_MUL{}".format(m), "mul{}".format(m))
sset("ALLOW_MUL{}_DIFF".format(m), "mul{}_diff".format(m))
sset("ALLOW_PRIME_N", "prime_n")
sset("ALLOW_COMPOSITE_N", "composite_n")
sset("ALLOW_COMPOSITE_DIFF", "composite_diff")
sset("ALLOW_EVEN_N", "even_n")
sset("ALLOW_EVEN_DIFF", "even_diff")
sset("ALLOW_SEC10", "sec10")
sset("ALLOW_SEC10_DIFF", "sec10_diff")
for wk in (8, 12, 16, 20):
sset("ALLOW_W{}".format(wk), "w{}".format(wk))
sset("ALLOW_W{}_DIFF".format(wk), "w{}_diff".format(wk))
sset("ALLOW_CONTINUS_MAX", "continus_max")
lines.append("PAIR_BLOCKLIST = {}".format(repr([sorted(list(x)) for x in pair_block])))
lines.append("TRIPLE_BLOCKLIST = {}".format(repr([sorted(list(x)) for x in triple_block])))
lines.extend(["", "# frozenset 캐시", ""])
allow_names = []
for line in list(lines):
if line.startswith("ALLOW_") and " = " in line:
name = line.split(" = ")[0]
allow_names.append(name)
for name in allow_names:
short = name.replace("ALLOW_", "", 1)
lines.append("_F_{} = frozenset({})".format(short, name))
lines.append("_F_HL_SEEN = frozenset(HL_SEEN)")
lines.append("")
return "\n".join(lines) + "\n"
OUT.write_text(emit(), encoding="utf-8")
print("Wrote", OUT)
print("pair rules:", len(pair_rules), "-> block", len(pair_block))
print("triple rules:", len(triple_rules), "-> block", len(triple_block))
print("DISABLE_FILTER_PREVIOUS_NUMBER", flags_prev["need_relax_previous"])
print("DISABLE_FILTER_ALL_PREVIOUS_7", flags_prev["need_relax_prev7"])
if __name__ == "__main__":
main()