451
fixed10.py
Normal file
451
fixed10.py
Normal file
@@ -0,0 +1,451 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
fixed10.py
|
||||
|
||||
요구사항
|
||||
- "지금까지 당첨되지 않은(=과거 1등 조합으로 나온 적 없는)" 조합만 추천
|
||||
- 앞으로 10개 조합을 꾸준히 구매할 수 있도록 10개만 출력
|
||||
- filter_model_1/2/3와 무관한 새로운 최적화 방법
|
||||
|
||||
중요한 사실
|
||||
- 로또는 통계적으로 독립/균등(무작위) 가정이 기본이라 미래 1등을 '예측'할 수는 없습니다.
|
||||
- 대신 이 코드는 과거 1등 조합들의 전형적 분포(합/홀짝/구간/연속/끝수 등)에
|
||||
"가까운" 조합을 찾고, 10개 조합 간 중복(겹침)을 줄이는 방향으로 최적화합니다.
|
||||
|
||||
동작 개요
|
||||
1) 히스토리( resources/lotto_history.txt )로부터 과거 1등 조합 집합을 로드
|
||||
2) 과거 1등들의 feature 분포를 구축(라플라스 스무딩)
|
||||
3) 고정 seed로 랜덤 샘플 풀을 생성하고, 분포 적합도 + 제약(겹침/최근회차 유사도 등)으로 스코어링
|
||||
4) 상위 후보에서 다양성 제약을 만족하도록 greedy하게 10개 선택
|
||||
|
||||
사용 예:
|
||||
python fixed10.py
|
||||
python fixed10.py --history resources/lotto_history.txt --count 10 --seed 42 --pool 300000
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
from collections import Counter, defaultdict
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, Iterable, List, Sequence, Set, Tuple, Optional
|
||||
|
||||
|
||||
Ball = Tuple[int, int, int, int, int, int]
|
||||
|
||||
|
||||
def parse_history_txt(path: str) -> List[Ball]:
|
||||
"""
|
||||
Parse lotto_history.txt rows: no,b1,b2,b3,b4,b5,b6,bn
|
||||
Returns list of sorted 6-number tuples (Ball), in file order.
|
||||
"""
|
||||
balls: List[Ball] = []
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
reader = csv.reader(f)
|
||||
for row in reader:
|
||||
if not row:
|
||||
continue
|
||||
# tolerate whitespace
|
||||
row = [c.strip() for c in row]
|
||||
if len(row) < 7:
|
||||
continue
|
||||
nums = sorted(int(x) for x in row[1:7])
|
||||
balls.append(tuple(nums)) # type: ignore[arg-type]
|
||||
return balls
|
||||
|
||||
|
||||
def max_consecutive_len(nums: Sequence[int]) -> int:
|
||||
m = 1
|
||||
cur = 1
|
||||
for i in range(1, len(nums)):
|
||||
if nums[i] == nums[i - 1] + 1:
|
||||
cur += 1
|
||||
m = max(m, cur)
|
||||
else:
|
||||
cur = 1
|
||||
return m
|
||||
|
||||
|
||||
def decade_bucket(n: int) -> int:
|
||||
# 1~45 -> 0..4 (1-10, 11-20, 21-30, 31-40, 41-45)
|
||||
if 1 <= n <= 10:
|
||||
return 0
|
||||
if 11 <= n <= 20:
|
||||
return 1
|
||||
if 21 <= n <= 30:
|
||||
return 2
|
||||
if 31 <= n <= 40:
|
||||
return 3
|
||||
return 4
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Features:
|
||||
sum6: int
|
||||
odd: int
|
||||
low: int # <=22
|
||||
max_run: int
|
||||
uniq_last_digit: int
|
||||
decade_sig: Tuple[int, int, int, int, int] # counts per bucket
|
||||
|
||||
|
||||
def features_of(ball: Ball) -> Features:
|
||||
nums = ball
|
||||
s = sum(nums)
|
||||
odd = sum(1 for x in nums if x % 2 == 1)
|
||||
low = sum(1 for x in nums if x <= 22)
|
||||
max_run = max_consecutive_len(nums)
|
||||
uniq_last = len({x % 10 for x in nums})
|
||||
buckets = [0, 0, 0, 0, 0]
|
||||
for x in nums:
|
||||
buckets[decade_bucket(x)] += 1
|
||||
return Features(
|
||||
sum6=s,
|
||||
odd=odd,
|
||||
low=low,
|
||||
max_run=max_run,
|
||||
uniq_last_digit=uniq_last,
|
||||
decade_sig=tuple(buckets), # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
|
||||
class SmoothedDist:
|
||||
"""
|
||||
Discrete distribution with Laplace smoothing:
|
||||
P(v) = (count(v) + alpha) / (N + alpha*|V|)
|
||||
where V is the observed support.
|
||||
"""
|
||||
|
||||
def __init__(self, counts: Counter, alpha: float = 1.0):
|
||||
self.counts = counts
|
||||
self.alpha = float(alpha)
|
||||
self.n = sum(counts.values())
|
||||
self.k = max(1, len(counts))
|
||||
|
||||
def logp(self, v) -> float:
|
||||
c = self.counts.get(v, 0)
|
||||
return math.log((c + self.alpha) / (self.n + self.alpha * self.k))
|
||||
|
||||
|
||||
def build_feature_dists(history: Sequence[Ball]) -> Dict[str, SmoothedDist]:
|
||||
feats = [features_of(b) for b in history]
|
||||
return {
|
||||
"sum6": SmoothedDist(Counter(f.sum6 for f in feats), alpha=1.0),
|
||||
"odd": SmoothedDist(Counter(f.odd for f in feats), alpha=1.0),
|
||||
"low": SmoothedDist(Counter(f.low for f in feats), alpha=1.0),
|
||||
"max_run": SmoothedDist(Counter(f.max_run for f in feats), alpha=1.0),
|
||||
"uniq_last_digit": SmoothedDist(Counter(f.uniq_last_digit for f in feats), alpha=1.0),
|
||||
"decade_sig": SmoothedDist(Counter(f.decade_sig for f in feats), alpha=1.0),
|
||||
}
|
||||
|
||||
|
||||
def overlap(a: Ball, b: Ball) -> int:
|
||||
return len(set(a) & set(b))
|
||||
|
||||
|
||||
def recent_overlap_penalty(ball: Ball, recent: Sequence[Ball]) -> float:
|
||||
"""
|
||||
Penalize candidates that look too similar to very recent winning draws.
|
||||
This does NOT mean such candidates can't win; it's just a diversification heuristic.
|
||||
"""
|
||||
# if overlaps >=4 with any recent draw -> strong penalty
|
||||
mx = 0
|
||||
for rb in recent:
|
||||
mx = max(mx, overlap(ball, rb))
|
||||
if mx >= 4:
|
||||
break
|
||||
if mx >= 4:
|
||||
return 6.0
|
||||
if mx == 3:
|
||||
return 1.0
|
||||
return 0.0
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Tuning:
|
||||
# sampling / search
|
||||
pool: int
|
||||
top_k: int
|
||||
# diversification
|
||||
recent_window: int
|
||||
max_pair_overlap: int
|
||||
# penalty weights
|
||||
recent_penalty_3: float
|
||||
recent_penalty_4plus: float
|
||||
max_run_penalty: float
|
||||
decade_concentration_penalty: float
|
||||
|
||||
|
||||
PRESETS: Dict[str, Tuning] = {
|
||||
# balanced: 기본값(지금까지 사용) - 분포 적합 + 적당한 다양성
|
||||
"balanced": Tuning(
|
||||
pool=250_000,
|
||||
top_k=5_000,
|
||||
recent_window=52,
|
||||
max_pair_overlap=2,
|
||||
recent_penalty_3=1.0,
|
||||
recent_penalty_4plus=6.0,
|
||||
max_run_penalty=1.5,
|
||||
decade_concentration_penalty=2.0,
|
||||
),
|
||||
# aggressive: 후보를 더 "분포에 딱 맞게" + 최근 유사도 더 강하게 회피 + 서로 겹침 더 엄격
|
||||
"aggressive": Tuning(
|
||||
pool=500_000,
|
||||
top_k=7_500,
|
||||
recent_window=80,
|
||||
max_pair_overlap=1,
|
||||
recent_penalty_3=2.0,
|
||||
recent_penalty_4plus=10.0,
|
||||
max_run_penalty=2.5,
|
||||
decade_concentration_penalty=3.0,
|
||||
),
|
||||
# conservative: 후보를 더 넓게(덜 가혹) + 다양성 제약 완화
|
||||
"conservative": Tuning(
|
||||
pool=150_000,
|
||||
top_k=5_000,
|
||||
recent_window=26,
|
||||
max_pair_overlap=3,
|
||||
recent_penalty_3=0.3,
|
||||
recent_penalty_4plus=2.0,
|
||||
max_run_penalty=0.8,
|
||||
decade_concentration_penalty=1.0,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def max_recent_overlap(ball: Ball, recent: Sequence[Ball]) -> int:
|
||||
mx = 0
|
||||
for rb in recent:
|
||||
mx = max(mx, overlap(ball, rb))
|
||||
if mx >= 6:
|
||||
break
|
||||
return mx
|
||||
|
||||
|
||||
def score_ball(
|
||||
ball: Ball,
|
||||
dists: Dict[str, SmoothedDist],
|
||||
history_set: Set[Ball],
|
||||
recent: Sequence[Ball],
|
||||
tuning: Tuning,
|
||||
) -> float:
|
||||
# hard reject: already won in history
|
||||
if ball in history_set:
|
||||
return float("-inf")
|
||||
|
||||
f = features_of(ball)
|
||||
|
||||
# distribution-fit score (higher is better)
|
||||
s = 0.0
|
||||
s += dists["sum6"].logp(f.sum6)
|
||||
s += dists["odd"].logp(f.odd)
|
||||
s += dists["low"].logp(f.low)
|
||||
s += dists["max_run"].logp(f.max_run)
|
||||
s += dists["uniq_last_digit"].logp(f.uniq_last_digit)
|
||||
s += dists["decade_sig"].logp(f.decade_sig)
|
||||
|
||||
# mild, human-sensible constraints (soft)
|
||||
# - avoid very long consecutive runs (>=4)
|
||||
if f.max_run >= 4:
|
||||
s -= tuning.max_run_penalty
|
||||
# - avoid extremely concentrated decades (e.g. 5+ numbers in same bucket)
|
||||
if max(f.decade_sig) >= 5:
|
||||
s -= tuning.decade_concentration_penalty
|
||||
|
||||
# diversify away from recent draws (soft)
|
||||
mx = max_recent_overlap(ball, recent)
|
||||
if mx >= 4:
|
||||
s -= tuning.recent_penalty_4plus
|
||||
elif mx == 3:
|
||||
s -= tuning.recent_penalty_3
|
||||
|
||||
return s
|
||||
|
||||
|
||||
def select_diverse(
|
||||
candidates: Sequence[Ball],
|
||||
scores: Dict[Ball, float],
|
||||
count: int,
|
||||
max_pair_overlap: int,
|
||||
) -> List[Ball]:
|
||||
"""
|
||||
Greedy selection:
|
||||
- iterate candidates in descending score
|
||||
- pick if it doesn't overlap too much with already chosen ones
|
||||
"""
|
||||
chosen: List[Ball] = []
|
||||
for b in candidates:
|
||||
if len(chosen) >= count:
|
||||
break
|
||||
ok = True
|
||||
for c in chosen:
|
||||
if overlap(b, c) > max_pair_overlap:
|
||||
ok = False
|
||||
break
|
||||
if ok:
|
||||
chosen.append(b)
|
||||
return chosen
|
||||
|
||||
|
||||
def generate_fixed10(
|
||||
history: Sequence[Ball],
|
||||
count: int = 10,
|
||||
seed: int = 42,
|
||||
pool: int = 250_000,
|
||||
top_k: int = 5_000,
|
||||
recent_window: int = 52,
|
||||
max_pair_overlap: int = 2,
|
||||
recent_penalty_3: float = 1.0,
|
||||
recent_penalty_4plus: float = 6.0,
|
||||
max_run_penalty: float = 1.5,
|
||||
decade_concentration_penalty: float = 2.0,
|
||||
) -> List[Ball]:
|
||||
rng = random.Random(seed)
|
||||
history_set = set(history)
|
||||
dists = build_feature_dists(history)
|
||||
recent = list(history[-recent_window:]) if len(history) >= recent_window else list(history)
|
||||
|
||||
tuning = Tuning(
|
||||
pool=pool,
|
||||
top_k=top_k,
|
||||
recent_window=recent_window,
|
||||
max_pair_overlap=max_pair_overlap,
|
||||
recent_penalty_3=recent_penalty_3,
|
||||
recent_penalty_4plus=recent_penalty_4plus,
|
||||
max_run_penalty=max_run_penalty,
|
||||
decade_concentration_penalty=decade_concentration_penalty,
|
||||
)
|
||||
|
||||
scored: List[Tuple[float, Ball]] = []
|
||||
seen: Set[Ball] = set()
|
||||
|
||||
# sample pool
|
||||
for _ in range(pool):
|
||||
ball = tuple(sorted(rng.sample(range(1, 46), 6))) # type: ignore[assignment]
|
||||
if ball in seen:
|
||||
continue
|
||||
seen.add(ball)
|
||||
sc = score_ball(ball, dists, history_set, recent, tuning)
|
||||
if sc == float("-inf"):
|
||||
continue
|
||||
scored.append((sc, ball))
|
||||
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
top = [b for _, b in scored[: top_k]]
|
||||
scores_map = {b: sc for sc, b in scored[: top_k]}
|
||||
|
||||
chosen = select_diverse(top, scores_map, count=count, max_pair_overlap=max_pair_overlap)
|
||||
|
||||
# If we couldn't pick enough due to overlap constraints, relax progressively.
|
||||
if len(chosen) < count:
|
||||
for relax in [3, 4, 5]:
|
||||
chosen = select_diverse(top, scores_map, count=count, max_pair_overlap=relax)
|
||||
if len(chosen) >= count:
|
||||
chosen = chosen[:count]
|
||||
break
|
||||
|
||||
return chosen
|
||||
|
||||
|
||||
def summarize(picks: Sequence[Ball], recent: Sequence[Ball]) -> Dict[str, object]:
|
||||
# pairwise overlap stats
|
||||
mx_pair = 0
|
||||
pair_hist = Counter()
|
||||
for i in range(len(picks)):
|
||||
for j in range(i + 1, len(picks)):
|
||||
o = overlap(picks[i], picks[j])
|
||||
mx_pair = max(mx_pair, o)
|
||||
pair_hist[o] += 1
|
||||
|
||||
# overlap with recent draws
|
||||
mx_recent = 0
|
||||
recent_hist = Counter()
|
||||
for b in picks:
|
||||
o = max_recent_overlap(b, recent)
|
||||
mx_recent = max(mx_recent, o)
|
||||
recent_hist[o] += 1
|
||||
|
||||
return {
|
||||
"max_pair_overlap": mx_pair,
|
||||
"pair_overlap_hist": dict(sorted(pair_hist.items())),
|
||||
"max_recent_overlap": mx_recent,
|
||||
"recent_overlap_hist": dict(sorted(recent_hist.items())),
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--history", default=os.path.join("resources", "lotto_history.txt"))
|
||||
p.add_argument("--count", type=int, default=10)
|
||||
p.add_argument("--seed", type=int, default=42)
|
||||
p.add_argument(
|
||||
"--profile",
|
||||
choices=sorted(PRESETS.keys()),
|
||||
default="balanced",
|
||||
help="Tuning preset. You can still override any individual knob below.",
|
||||
)
|
||||
p.add_argument("--pool", type=int, default=None, help="Number of random candidates to sample.")
|
||||
p.add_argument("--top-k", type=int, default=None, help="Keep top-K scored candidates before diversification.")
|
||||
p.add_argument("--recent-window", type=int, default=None, help="Recent draw window size for overlap penalty.")
|
||||
p.add_argument("--max-pair-overlap", type=int, default=None, help="Max allowed overlap between chosen picks (greedy).")
|
||||
p.add_argument("--recent-penalty-3", type=float, default=None, help="Penalty if overlaps 3 with any recent draw.")
|
||||
p.add_argument("--recent-penalty-4plus", type=float, default=None, help="Penalty if overlaps >=4 with any recent draw.")
|
||||
p.add_argument("--max-run-penalty", type=float, default=None, help="Penalty if max consecutive run >=4.")
|
||||
p.add_argument("--decade-concentration-penalty", type=float, default=None, help="Penalty if >=5 numbers in a decade bucket.")
|
||||
p.add_argument("--no-report", action="store_true", help="Do not print overlap summary.")
|
||||
args = p.parse_args()
|
||||
|
||||
history = parse_history_txt(args.history)
|
||||
if not history:
|
||||
raise SystemExit(f"History is empty or not readable: {args.history}")
|
||||
|
||||
preset = PRESETS[args.profile]
|
||||
pool = int(args.pool) if args.pool is not None else preset.pool
|
||||
top_k = int(args.top_k) if args.top_k is not None else preset.top_k
|
||||
recent_window = int(args.recent_window) if args.recent_window is not None else preset.recent_window
|
||||
max_pair_overlap = int(args.max_pair_overlap) if args.max_pair_overlap is not None else preset.max_pair_overlap
|
||||
recent_penalty_3 = float(args.recent_penalty_3) if args.recent_penalty_3 is not None else preset.recent_penalty_3
|
||||
recent_penalty_4plus = float(args.recent_penalty_4plus) if args.recent_penalty_4plus is not None else preset.recent_penalty_4plus
|
||||
max_run_penalty = float(args.max_run_penalty) if args.max_run_penalty is not None else preset.max_run_penalty
|
||||
decade_concentration_penalty = float(args.decade_concentration_penalty) if args.decade_concentration_penalty is not None else preset.decade_concentration_penalty
|
||||
|
||||
picks = generate_fixed10(
|
||||
history=history,
|
||||
count=args.count,
|
||||
seed=args.seed,
|
||||
pool=pool,
|
||||
top_k=top_k,
|
||||
recent_window=recent_window,
|
||||
max_pair_overlap=max_pair_overlap,
|
||||
recent_penalty_3=recent_penalty_3,
|
||||
recent_penalty_4plus=recent_penalty_4plus,
|
||||
max_run_penalty=max_run_penalty,
|
||||
decade_concentration_penalty=decade_concentration_penalty,
|
||||
)
|
||||
|
||||
print(f"history draws: {len(history)}")
|
||||
print(
|
||||
"fixed picks "
|
||||
f"(profile={args.profile}, count={len(picks)}, seed={args.seed}, "
|
||||
f"pool={pool}, top_k={top_k}, recent_window={recent_window}, max_pair_overlap={max_pair_overlap}):"
|
||||
)
|
||||
for i, b in enumerate(picks, start=1):
|
||||
print(f"{i:2d}. {list(b)}")
|
||||
|
||||
if not args.no_report:
|
||||
recent = list(history[-recent_window:]) if len(history) >= recent_window else list(history)
|
||||
rep = summarize(picks, recent)
|
||||
print("\nsummary:")
|
||||
print(f"- max_pair_overlap: {rep['max_pair_overlap']}")
|
||||
print(f"- pair_overlap_hist: {rep['pair_overlap_hist']}")
|
||||
print(f"- max_recent_overlap: {rep['max_recent_overlap']}")
|
||||
print(f"- recent_overlap_hist: {rep['recent_overlap_hist']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user