init

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-25 18:32:11 +09:00
commit c611b400ae
40 changed files with 24532 additions and 0 deletions
--- a/fixed10.py
+++ b/fixed10.py
@@ -0,0 +1,451 @@
+#!/usr/bin/env python3
+"""
+fixed10.py
+
+요구사항
+- "지금까지 당첨되지 않은(=과거 1등 조합으로 나온 적 없는)" 조합만 추천
+- 앞으로 10개 조합을 꾸준히 구매할 수 있도록 10개만 출력
+- filter_model_1/2/3와 무관한 새로운 최적화 방법
+
+중요한 사실
+- 로또는 통계적으로 독립/균등(무작위) 가정이 기본이라 미래 1등을 '예측'할 수는 없습니다.
+- 대신 이 코드는 과거 1등 조합들의 전형적 분포(합/홀짝/구간/연속/끝수 등)에
+  "가까운" 조합을 찾고, 10개 조합 간 중복(겹침)을 줄이는 방향으로 최적화합니다.
+
+동작 개요
+1) 히스토리( resources/lotto_history.txt )로부터 과거 1등 조합 집합을 로드
+2) 과거 1등들의 feature 분포를 구축(라플라스 스무딩)
+3) 고정 seed로 랜덤 샘플 풀을 생성하고, 분포 적합도 + 제약(겹침/최근회차 유사도 등)으로 스코어링
+4) 상위 후보에서 다양성 제약을 만족하도록 greedy하게 10개 선택
+
+사용 예:
+  python fixed10.py
+  python fixed10.py --history resources/lotto_history.txt --count 10 --seed 42 --pool 300000
+"""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import math
+import os
+import random
+from collections import Counter, defaultdict
+from dataclasses import dataclass
+from typing import Dict, Iterable, List, Sequence, Set, Tuple, Optional
+
+
+Ball = Tuple[int, int, int, int, int, int]
+
+
+def parse_history_txt(path: str) -> List[Ball]:
+    """
+    Parse lotto_history.txt rows: no,b1,b2,b3,b4,b5,b6,bn
+    Returns list of sorted 6-number tuples (Ball), in file order.
+    """
+    balls: List[Ball] = []
+    with open(path, "r", encoding="utf-8") as f:
+        reader = csv.reader(f)
+        for row in reader:
+            if not row:
+                continue
+            # tolerate whitespace
+            row = [c.strip() for c in row]
+            if len(row) < 7:
+                continue
+            nums = sorted(int(x) for x in row[1:7])
+            balls.append(tuple(nums))  # type: ignore[arg-type]
+    return balls
+
+
+def max_consecutive_len(nums: Sequence[int]) -> int:
+    m = 1
+    cur = 1
+    for i in range(1, len(nums)):
+        if nums[i] == nums[i - 1] + 1:
+            cur += 1
+            m = max(m, cur)
+        else:
+            cur = 1
+    return m
+
+
+def decade_bucket(n: int) -> int:
+    # 1~45 -> 0..4 (1-10, 11-20, 21-30, 31-40, 41-45)
+    if 1 <= n <= 10:
+        return 0
+    if 11 <= n <= 20:
+        return 1
+    if 21 <= n <= 30:
+        return 2
+    if 31 <= n <= 40:
+        return 3
+    return 4
+
+
+@dataclass(frozen=True)
+class Features:
+    sum6: int
+    odd: int
+    low: int  # <=22
+    max_run: int
+    uniq_last_digit: int
+    decade_sig: Tuple[int, int, int, int, int]  # counts per bucket
+
+
+def features_of(ball: Ball) -> Features:
+    nums = ball
+    s = sum(nums)
+    odd = sum(1 for x in nums if x % 2 == 1)
+    low = sum(1 for x in nums if x <= 22)
+    max_run = max_consecutive_len(nums)
+    uniq_last = len({x % 10 for x in nums})
+    buckets = [0, 0, 0, 0, 0]
+    for x in nums:
+        buckets[decade_bucket(x)] += 1
+    return Features(
+        sum6=s,
+        odd=odd,
+        low=low,
+        max_run=max_run,
+        uniq_last_digit=uniq_last,
+        decade_sig=tuple(buckets),  # type: ignore[arg-type]
+    )
+
+
+class SmoothedDist:
+    """
+    Discrete distribution with Laplace smoothing:
+      P(v) = (count(v) + alpha) / (N + alpha*|V|)
+    where V is the observed support.
+    """
+
+    def __init__(self, counts: Counter, alpha: float = 1.0):
+        self.counts = counts
+        self.alpha = float(alpha)
+        self.n = sum(counts.values())
+        self.k = max(1, len(counts))
+
+    def logp(self, v) -> float:
+        c = self.counts.get(v, 0)
+        return math.log((c + self.alpha) / (self.n + self.alpha * self.k))
+
+
+def build_feature_dists(history: Sequence[Ball]) -> Dict[str, SmoothedDist]:
+    feats = [features_of(b) for b in history]
+    return {
+        "sum6": SmoothedDist(Counter(f.sum6 for f in feats), alpha=1.0),
+        "odd": SmoothedDist(Counter(f.odd for f in feats), alpha=1.0),
+        "low": SmoothedDist(Counter(f.low for f in feats), alpha=1.0),
+        "max_run": SmoothedDist(Counter(f.max_run for f in feats), alpha=1.0),
+        "uniq_last_digit": SmoothedDist(Counter(f.uniq_last_digit for f in feats), alpha=1.0),
+        "decade_sig": SmoothedDist(Counter(f.decade_sig for f in feats), alpha=1.0),
+    }
+
+
+def overlap(a: Ball, b: Ball) -> int:
+    return len(set(a) & set(b))
+
+
+def recent_overlap_penalty(ball: Ball, recent: Sequence[Ball]) -> float:
+    """
+    Penalize candidates that look too similar to very recent winning draws.
+    This does NOT mean such candidates can't win; it's just a diversification heuristic.
+    """
+    # if overlaps >=4 with any recent draw -> strong penalty
+    mx = 0
+    for rb in recent:
+        mx = max(mx, overlap(ball, rb))
+        if mx >= 4:
+            break
+    if mx >= 4:
+        return 6.0
+    if mx == 3:
+        return 1.0
+    return 0.0
+
+
+@dataclass(frozen=True)
+class Tuning:
+    # sampling / search
+    pool: int
+    top_k: int
+    # diversification
+    recent_window: int
+    max_pair_overlap: int
+    # penalty weights
+    recent_penalty_3: float
+    recent_penalty_4plus: float
+    max_run_penalty: float
+    decade_concentration_penalty: float
+
+
+PRESETS: Dict[str, Tuning] = {
+    # balanced: 기본값(지금까지 사용) - 분포 적합 + 적당한 다양성
+    "balanced": Tuning(
+        pool=250_000,
+        top_k=5_000,
+        recent_window=52,
+        max_pair_overlap=2,
+        recent_penalty_3=1.0,
+        recent_penalty_4plus=6.0,
+        max_run_penalty=1.5,
+        decade_concentration_penalty=2.0,
+    ),
+    # aggressive: 후보를 더 "분포에 딱 맞게" + 최근 유사도 더 강하게 회피 + 서로 겹침 더 엄격
+    "aggressive": Tuning(
+        pool=500_000,
+        top_k=7_500,
+        recent_window=80,
+        max_pair_overlap=1,
+        recent_penalty_3=2.0,
+        recent_penalty_4plus=10.0,
+        max_run_penalty=2.5,
+        decade_concentration_penalty=3.0,
+    ),
+    # conservative: 후보를 더 넓게(덜 가혹) + 다양성 제약 완화
+    "conservative": Tuning(
+        pool=150_000,
+        top_k=5_000,
+        recent_window=26,
+        max_pair_overlap=3,
+        recent_penalty_3=0.3,
+        recent_penalty_4plus=2.0,
+        max_run_penalty=0.8,
+        decade_concentration_penalty=1.0,
+    ),
+}
+
+
+def max_recent_overlap(ball: Ball, recent: Sequence[Ball]) -> int:
+    mx = 0
+    for rb in recent:
+        mx = max(mx, overlap(ball, rb))
+        if mx >= 6:
+            break
+    return mx
+
+
+def score_ball(
+    ball: Ball,
+    dists: Dict[str, SmoothedDist],
+    history_set: Set[Ball],
+    recent: Sequence[Ball],
+    tuning: Tuning,
+) -> float:
+    # hard reject: already won in history
+    if ball in history_set:
+        return float("-inf")
+
+    f = features_of(ball)
+
+    # distribution-fit score (higher is better)
+    s = 0.0
+    s += dists["sum6"].logp(f.sum6)
+    s += dists["odd"].logp(f.odd)
+    s += dists["low"].logp(f.low)
+    s += dists["max_run"].logp(f.max_run)
+    s += dists["uniq_last_digit"].logp(f.uniq_last_digit)
+    s += dists["decade_sig"].logp(f.decade_sig)
+
+    # mild, human-sensible constraints (soft)
+    # - avoid very long consecutive runs (>=4)
+    if f.max_run >= 4:
+        s -= tuning.max_run_penalty
+    # - avoid extremely concentrated decades (e.g. 5+ numbers in same bucket)
+    if max(f.decade_sig) >= 5:
+        s -= tuning.decade_concentration_penalty
+
+    # diversify away from recent draws (soft)
+    mx = max_recent_overlap(ball, recent)
+    if mx >= 4:
+        s -= tuning.recent_penalty_4plus
+    elif mx == 3:
+        s -= tuning.recent_penalty_3
+
+    return s
+
+
+def select_diverse(
+    candidates: Sequence[Ball],
+    scores: Dict[Ball, float],
+    count: int,
+    max_pair_overlap: int,
+) -> List[Ball]:
+    """
+    Greedy selection:
+    - iterate candidates in descending score
+    - pick if it doesn't overlap too much with already chosen ones
+    """
+    chosen: List[Ball] = []
+    for b in candidates:
+        if len(chosen) >= count:
+            break
+        ok = True
+        for c in chosen:
+            if overlap(b, c) > max_pair_overlap:
+                ok = False
+                break
+        if ok:
+            chosen.append(b)
+    return chosen
+
+
+def generate_fixed10(
+    history: Sequence[Ball],
+    count: int = 10,
+    seed: int = 42,
+    pool: int = 250_000,
+    top_k: int = 5_000,
+    recent_window: int = 52,
+    max_pair_overlap: int = 2,
+    recent_penalty_3: float = 1.0,
+    recent_penalty_4plus: float = 6.0,
+    max_run_penalty: float = 1.5,
+    decade_concentration_penalty: float = 2.0,
+) -> List[Ball]:
+    rng = random.Random(seed)
+    history_set = set(history)
+    dists = build_feature_dists(history)
+    recent = list(history[-recent_window:]) if len(history) >= recent_window else list(history)
+
+    tuning = Tuning(
+        pool=pool,
+        top_k=top_k,
+        recent_window=recent_window,
+        max_pair_overlap=max_pair_overlap,
+        recent_penalty_3=recent_penalty_3,
+        recent_penalty_4plus=recent_penalty_4plus,
+        max_run_penalty=max_run_penalty,
+        decade_concentration_penalty=decade_concentration_penalty,
+    )
+
+    scored: List[Tuple[float, Ball]] = []
+    seen: Set[Ball] = set()
+
+    # sample pool
+    for _ in range(pool):
+        ball = tuple(sorted(rng.sample(range(1, 46), 6)))  # type: ignore[assignment]
+        if ball in seen:
+            continue
+        seen.add(ball)
+        sc = score_ball(ball, dists, history_set, recent, tuning)
+        if sc == float("-inf"):
+            continue
+        scored.append((sc, ball))
+
+    scored.sort(key=lambda x: x[0], reverse=True)
+    top = [b for _, b in scored[: top_k]]
+    scores_map = {b: sc for sc, b in scored[: top_k]}
+
+    chosen = select_diverse(top, scores_map, count=count, max_pair_overlap=max_pair_overlap)
+
+    # If we couldn't pick enough due to overlap constraints, relax progressively.
+    if len(chosen) < count:
+        for relax in [3, 4, 5]:
+            chosen = select_diverse(top, scores_map, count=count, max_pair_overlap=relax)
+            if len(chosen) >= count:
+                chosen = chosen[:count]
+                break
+
+    return chosen
+
+
+def summarize(picks: Sequence[Ball], recent: Sequence[Ball]) -> Dict[str, object]:
+    # pairwise overlap stats
+    mx_pair = 0
+    pair_hist = Counter()
+    for i in range(len(picks)):
+        for j in range(i + 1, len(picks)):
+            o = overlap(picks[i], picks[j])
+            mx_pair = max(mx_pair, o)
+            pair_hist[o] += 1
+
+    # overlap with recent draws
+    mx_recent = 0
+    recent_hist = Counter()
+    for b in picks:
+        o = max_recent_overlap(b, recent)
+        mx_recent = max(mx_recent, o)
+        recent_hist[o] += 1
+
+    return {
+        "max_pair_overlap": mx_pair,
+        "pair_overlap_hist": dict(sorted(pair_hist.items())),
+        "max_recent_overlap": mx_recent,
+        "recent_overlap_hist": dict(sorted(recent_hist.items())),
+    }
+
+
+def main():
+    p = argparse.ArgumentParser()
+    p.add_argument("--history", default=os.path.join("resources", "lotto_history.txt"))
+    p.add_argument("--count", type=int, default=10)
+    p.add_argument("--seed", type=int, default=42)
+    p.add_argument(
+        "--profile",
+        choices=sorted(PRESETS.keys()),
+        default="balanced",
+        help="Tuning preset. You can still override any individual knob below.",
+    )
+    p.add_argument("--pool", type=int, default=None, help="Number of random candidates to sample.")
+    p.add_argument("--top-k", type=int, default=None, help="Keep top-K scored candidates before diversification.")
+    p.add_argument("--recent-window", type=int, default=None, help="Recent draw window size for overlap penalty.")
+    p.add_argument("--max-pair-overlap", type=int, default=None, help="Max allowed overlap between chosen picks (greedy).")
+    p.add_argument("--recent-penalty-3", type=float, default=None, help="Penalty if overlaps 3 with any recent draw.")
+    p.add_argument("--recent-penalty-4plus", type=float, default=None, help="Penalty if overlaps >=4 with any recent draw.")
+    p.add_argument("--max-run-penalty", type=float, default=None, help="Penalty if max consecutive run >=4.")
+    p.add_argument("--decade-concentration-penalty", type=float, default=None, help="Penalty if >=5 numbers in a decade bucket.")
+    p.add_argument("--no-report", action="store_true", help="Do not print overlap summary.")
+    args = p.parse_args()
+
+    history = parse_history_txt(args.history)
+    if not history:
+        raise SystemExit(f"History is empty or not readable: {args.history}")
+
+    preset = PRESETS[args.profile]
+    pool = int(args.pool) if args.pool is not None else preset.pool
+    top_k = int(args.top_k) if args.top_k is not None else preset.top_k
+    recent_window = int(args.recent_window) if args.recent_window is not None else preset.recent_window
+    max_pair_overlap = int(args.max_pair_overlap) if args.max_pair_overlap is not None else preset.max_pair_overlap
+    recent_penalty_3 = float(args.recent_penalty_3) if args.recent_penalty_3 is not None else preset.recent_penalty_3
+    recent_penalty_4plus = float(args.recent_penalty_4plus) if args.recent_penalty_4plus is not None else preset.recent_penalty_4plus
+    max_run_penalty = float(args.max_run_penalty) if args.max_run_penalty is not None else preset.max_run_penalty
+    decade_concentration_penalty = float(args.decade_concentration_penalty) if args.decade_concentration_penalty is not None else preset.decade_concentration_penalty
+
+    picks = generate_fixed10(
+        history=history,
+        count=args.count,
+        seed=args.seed,
+        pool=pool,
+        top_k=top_k,
+        recent_window=recent_window,
+        max_pair_overlap=max_pair_overlap,
+        recent_penalty_3=recent_penalty_3,
+        recent_penalty_4plus=recent_penalty_4plus,
+        max_run_penalty=max_run_penalty,
+        decade_concentration_penalty=decade_concentration_penalty,
+    )
+
+    print(f"history draws: {len(history)}")
+    print(
+        "fixed picks "
+        f"(profile={args.profile}, count={len(picks)}, seed={args.seed}, "
+        f"pool={pool}, top_k={top_k}, recent_window={recent_window}, max_pair_overlap={max_pair_overlap}):"
+    )
+    for i, b in enumerate(picks, start=1):
+        print(f"{i:2d}. {list(b)}")
+
+    if not args.no_report:
+        recent = list(history[-recent_window:]) if len(history) >= recent_window else list(history)
+        rep = summarize(picks, recent)
+        print("\nsummary:")
+        print(f"- max_pair_overlap: {rep['max_pair_overlap']}")
+        print(f"- pair_overlap_hist: {rep['pair_overlap_hist']}")
+        print(f"- max_recent_overlap: {rep['max_recent_overlap']}")
+        print(f"- recent_overlap_hist: {rep['recent_overlap_hist']}")
+
+
+if __name__ == "__main__":
+    main()
+