refactor: apply portfolio cap and align project docs

Keep the fixed 11-number set intact while adding a second-stage portfolio selection that caps final recommendations to the 70,000 KRW budget, and update docs/data/scripts to match the current project structure and runtime flow.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-05-08 10:37:03 +09:00
parent bd9eea2aee
commit 919f2e19bb
11 changed files with 492 additions and 343 deletions

View File

@@ -4,6 +4,9 @@ import time
import requests import requests
# JSON 포맷을 다루기 위한 라이브러리를 호출합니다. # JSON 포맷을 다루기 위한 라이브러리를 호출합니다.
import json import json
from datetime import datetime, timedelta
import random
import socket
import urllib3 import urllib3
@@ -19,8 +22,15 @@ except ModuleNotFoundError:
def sendMsg(self, msg): def sendMsg(self, msg):
pass pass
_LOTTO_URL = "https://www.dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}" _LOTTO_URLS = (
_REQUEST_TIMEOUT = 15 "https://www.dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}",
"https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}",
)
_REQUEST_TIMEOUT = float(os.environ.get("LOTTO_REQUEST_TIMEOUT", "12"))
_FETCH_RETRIES_PER_DRAW = int(os.environ.get("LOTTO_FETCH_RETRIES", "3"))
_BACKOFF_BASE_SECONDS = float(os.environ.get("LOTTO_BACKOFF_BASE", "0.7"))
_MAX_CONSECUTIVE_FETCH_FAILURES = int(os.environ.get("LOTTO_MAX_CONSEC_FAIL", "8"))
_CONNECTION_PROBE_TIMEOUT = float(os.environ.get("LOTTO_PROBE_TIMEOUT", "3"))
_BROWSER_HEADERS = { _BROWSER_HEADERS = {
"User-Agent": ( "User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 " "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
@@ -51,11 +61,30 @@ class DataCrawler:
self.bot = TelegramBot() self.bot = TelegramBot()
self._session = requests.Session() self._session = requests.Session()
self._session.headers.update(_BROWSER_HEADERS) self._session.headers.update(_BROWSER_HEADERS)
self._last_fetch_error = ""
def _can_reach_lottery_host(self):
"""
API 호스트 TCP 연결 가능 여부를 빠르게 확인합니다.
완전한 보장은 아니지만, 완전 차단 상태를 조기 감지해 불필요한 대기 시간을 줄입니다.
"""
for host in ("www.dhlottery.co.kr", "dhlottery.co.kr"):
try:
with socket.create_connection((host, 443), timeout=_CONNECTION_PROBE_TIMEOUT):
return True
except OSError:
continue
return False
def _fetch_draw(self, drw_no): def _fetch_draw(self, drw_no):
"""동행복권 API에서 단일 회차 결과를 가져옵니다. 실패 시 None.""" """동행복권 API에서 단일 회차 결과를 가져옵니다. 실패 시 None."""
url = _LOTTO_URL.format(int(drw_no)) self._last_fetch_error = ""
for verify in (_ssl_verify_arg(), False): verify_options = (_ssl_verify_arg(), False)
last_error = "unknown"
for attempt in range(1, _FETCH_RETRIES_PER_DRAW + 1):
for raw_url in _LOTTO_URLS:
url = raw_url.format(int(drw_no))
for verify in verify_options:
for method in ("POST", "GET"): for method in ("POST", "GET"):
try: try:
res = self._session.request( res = self._session.request(
@@ -65,19 +94,29 @@ class DataCrawler:
verify=verify, verify=verify,
) )
if res.status_code != 200: if res.status_code != 200:
last_error = "http {}".format(res.status_code)
continue continue
text = res.text.strip() text = res.text.strip()
if not text.startswith("{"): if not text.startswith("{"):
last_error = "non-json response"
continue continue
result = json.loads(text) result = json.loads(text)
except ( except (
requests.RequestException, requests.RequestException,
ValueError, ValueError,
json.JSONDecodeError, json.JSONDecodeError,
): ) as ex:
last_error = str(ex)
continue continue
if isinstance(result, dict) and result.get("returnValue") == "success": if isinstance(result, dict) and result.get("returnValue") == "success":
return result return result
rv = result.get("returnValue") if isinstance(result, dict) else "unknown"
last_error = "api returnValue={}".format(rv)
if attempt < _FETCH_RETRIES_PER_DRAW:
# 지수 백오프 + 지터로 일시적 네트워크 혼잡 완화
delay = _BACKOFF_BASE_SECONDS * (2 ** (attempt - 1)) + random.uniform(0, 0.25)
time.sleep(delay)
self._last_fetch_error = last_error
return None return None
def _append_draw_files(self, lottoHistoryFile, result): def _append_draw_files(self, lottoHistoryFile, result):
@@ -120,6 +159,99 @@ class DataCrawler:
return None return None
return last_json.get("drwNo") return last_json.get("drwNo")
def _read_draw_map_from_json(self, json_path):
"""
JSONL 전체를 읽어 drwNo -> record 맵으로 반환합니다.
잘못된 라인/중복 라인은 정리 대상이며, 마지막 유효값을 유지합니다.
"""
draw_map = {}
if not os.path.isfile(json_path) or os.path.getsize(json_path) == 0:
return draw_map
with open(json_path, "r", encoding="utf-8") as fp:
for line in fp:
line = line.strip()
if not line:
continue
try:
data = json.loads(line)
except json.JSONDecodeError:
continue
if (
isinstance(data, dict)
and data.get("returnValue") == "success"
and isinstance(data.get("drwNo"), int)
):
draw_map[data["drwNo"]] = data
return draw_map
def _write_draw_map_files(self, lottoHistoryFile, draw_map):
"""
drwNo 오름차순으로 json/txt를 재생성합니다.
누락 회차 보강/중복 제거 후 일관된 파일 상태를 보장합니다.
"""
json_path = lottoHistoryFile + ".json"
txt_path = lottoHistoryFile + ".txt"
with open(json_path, "w", encoding="utf-8") as json_fp, open(
txt_path, "w", encoding="utf-8"
) as text_fp:
for drw_no in sorted(draw_map.keys()):
result = draw_map[drw_no]
json_fp.write(json.dumps(result, ensure_ascii=False) + "\n")
text_fp.write(
"%d,%d,%d,%d,%d,%d,%d,%d\n"
% (
drw_no,
result["drwtNo1"],
result["drwtNo2"],
result["drwtNo3"],
result["drwtNo4"],
result["drwtNo5"],
result["drwtNo6"],
result["bnusNo"],
)
)
def _get_last_week_draw_date(self):
"""
'지난 주' 기준 토요일 날짜를 반환합니다.
예: 금요일(2026-05-08) 실행 시 직전 토요일(2026-05-02)
"""
now = datetime.now()
days_since_saturday = (now.weekday() - 5) % 7
latest_saturday = now.date() - timedelta(days=days_since_saturday)
# 토요일이면서 추첨 전(20시 이전)이라면 지난 주 토요일을 목표로 사용
if now.weekday() == 5 and now.hour < 20:
latest_saturday = latest_saturday - timedelta(days=7)
return latest_saturday
def _estimate_target_draw_no(self, draw_map):
"""
기존 데이터의 마지막 drwNoDate와 지난 주 토요일을 비교해
이번 실행에서 확보해야 할 목표 회차를 계산합니다.
"""
if not draw_map:
return None
last_no = max(draw_map.keys())
last_data = draw_map[last_no]
last_date_str = last_data.get("drwNoDate", "")
try:
last_date = datetime.strptime(last_date_str, "%Y-%m-%d").date()
except ValueError:
return last_no
target_date = self._get_last_week_draw_date()
if target_date <= last_date:
return last_no
week_gap = (target_date - last_date).days // 7
if week_gap <= 0:
return last_no
return last_no + week_gap
# 로또 당첨 데이터를 수집해서 파일로 저장합니다. # 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일 (확장자 제외) # lottoHistoryFile: 로또 당첨 데이터를 저장할 파일 (확장자 제외)
def craw(self, lottoHistoryFile, drwNo=None): def craw(self, lottoHistoryFile, drwNo=None):
@@ -161,44 +293,97 @@ class DataCrawler:
def excute(self, resource_path): def excute(self, resource_path):
""" """
resources/lotto_history.* 를 마지막 회차 이후까지 연속으로 갱신합니다. resources/lotto_history.* 를 지난 주 기준으로 누락 없이 동기화합니다.
(기존: 날짜 비교 + 1회차만 수집 → 누락·정지가 잦음) - 마지막 회차+1만 확인하지 않고, 1~목표회차 범위에서 누락 회차를 탐지/보강
- 중복/깨진 라인을 정리해 json/txt를 일관 상태로 재생성
""" """
lottoHistoryFile = os.path.join(resource_path, "lotto_history") lottoHistoryFile = os.path.join(resource_path, "lotto_history")
json_path = lottoHistoryFile + ".json" json_path = lottoHistoryFile + ".json"
last_no = self._read_last_draw_from_json(json_path) draw_map = self._read_draw_map_from_json(json_path)
if last_no is None:
self.craw(lottoHistoryFile) # 기존 이력이 비었거나 깨졌으면 전체 재수집(기존 동작 유지)
if not draw_map:
try: try:
self.craw(lottoHistoryFile)
self.bot.sendMsg("[Lottery Crawler] full history rebuilt (no valid json).") self.bot.sendMsg("[Lottery Crawler] full history rebuilt (no valid json).")
except Exception: except Exception:
pass pass
return True return True
added = 0 target_no = self._estimate_target_draw_no(draw_map)
next_no = last_no + 1 if target_no is None:
while True: target_no = max(draw_map.keys())
result = self._fetch_draw(next_no)
if result is None:
break
self._append_draw_files(lottoHistoryFile, result)
added += 1
next_no += 1
time.sleep(0.35)
if added == 0: if not self._can_reach_lottery_host():
msg = "[Lottery Crawler] network blocked: cannot reach dhlottery host."
print(msg)
try:
self.bot.sendMsg(msg)
except Exception:
pass
return False
missing_nos = [no for no in range(1, target_no + 1) if no not in draw_map]
added = 0
failed = []
aborted_missing_nos = []
consecutive_failure = 0
fail_reasons = {}
for no in missing_nos:
result = self._fetch_draw(no)
if result is None:
failed.append(no)
reason = self._last_fetch_error or "unknown"
fail_reasons[reason] = fail_reasons.get(reason, 0) + 1
consecutive_failure += 1
if consecutive_failure >= _MAX_CONSECUTIVE_FETCH_FAILURES:
aborted_missing_nos = [x for x in missing_nos if x > no]
break
continue
draw_map[no] = result
added += 1
consecutive_failure = 0
time.sleep(0.2)
# 누락 보강 또는 중복 정리 여지가 있으면 파일을 재생성
self._write_draw_map_files(lottoHistoryFile, draw_map)
last_no = max(draw_map.keys())
if added == 0 and not failed:
try: try:
self.bot.sendMsg( self.bot.sendMsg(
"[Lottery Crawler] up to date (last drwNo={}).".format(last_no) "[Lottery Crawler] up to date (last drwNo={}, target={}).".format(
last_no, target_no
)
)
except Exception:
pass
elif failed:
sample = ",".join(str(x) for x in failed[:10])
reason_items = sorted(fail_reasons.items(), key=lambda x: x[1], reverse=True)
reason_str = "; ".join("{} x{}".format(reason, count) for reason, count in reason_items[:3])
if aborted_missing_nos:
reason_str += " | aborted {} pending draws due to consecutive failures".format(
len(aborted_missing_nos)
)
try:
self.bot.sendMsg(
"[Lottery Crawler] appended {}, failed {} draw(s): {}{} | {}".format(
added,
len(failed),
sample,
"..." if len(failed) > 10 else "",
reason_str or "no reason",
)
) )
except Exception: except Exception:
pass pass
else: else:
try: try:
self.bot.sendMsg( self.bot.sendMsg(
"[Lottery Crawler] appended {} draw(s), last drwNo={}.".format( "[Lottery Crawler] appended {} draw(s), last drwNo={}, target={}.".format(
added, last_no + added added, last_no, target_no
) )
) )
except Exception: except Exception:

125
README.md
View File

@@ -1,47 +1,112 @@
# deeplottery # deeplottery
## 데이터 구간 `deeplottery`는 로또 번호를 예측하는 프로젝트가 아니라, **전체 조합(45C6)을 규칙 기반으로 필터링해 후보를 줄이는 시스템**입니다.
핵심은 `BallFilter` 엔진이며, 운영 실행(`final_practice.py`)과 검증(`final_FilterTest.py`)이 분리되어 있습니다.
| 구간 | 회차 | ## 목표와 설계 의도
|------|------|
| 학습 | `lotto_history.txt` 1~800 |
| 검증 | 801~1000 |
| 테스트 | 1001~이후 |
## 핵심 파일 - 목표: 통계/패턴 기반 규칙으로 비효율 조합을 제거하고 후보군을 관리 가능한 크기로 축소
- 설계 의도:
- 필터 규칙은 `final_BallFilter.py` 한 곳에서 관리
- 운영 추천 생성과 과거 회차 검증을 분리하여 반복 개선
- 같은 엔진을 운영/검증에서 공통 사용해 일관성 유지
- **`final_BallFilter.py`** — 필터 로직 (`BallFilter_25` 기반, `lotto_history.txt` CSV 로드, `socket` 제거). ## 전체 아키텍처
- **`final_filter_params.py`** — 학습 구간(1~800회)에서만 집계한 **6개 합**·**전주 합 차이** 허용 집합.
- **`filter_model.py`** — `from final_BallFilter import BallFilter` 재노출.
- **`train.py` / `valid.py`** — 구간별로 당첨 6개가 모든 필터를 통과한 회차 수 집계.
- **`final_filterTest.py`** — `1_FilterTest_25.py`와 동일한 분석·(선택) MC 생존 추정.
- **`final_Practice.py`** — DataCrawler → 마지막 JSON 회차+1 크롤 → `predict1`+`predict2`. `lotto_history.json`으로 `BallFilter` 한 번 생성 후 공유. `predict1`은 고정 5조합(기존 1 + 미당첨 4, `hasWon`으로 제외) 후 `predict2`는 정렬된 6개·`seen`·과거 당첨 조합 제외 후 필터.
## 실행 (miniconda **ncue**) 1. 데이터 수집/갱신
- `DataCrawler.py`가 로또 API를 호출해 `resources/lotto_history.json`, `resources/lotto_history.txt` 갱신
2. 필터 엔진 로딩
- `final_BallFilter.py``BallFilter`가 과거 당첨 이력을 메모리로 적재
3. 운영 후보 생성
- `final_practice.py`가 다음 회차 기준 전체 조합을 순회하며 `BallFilter.filter()`로 통과 조합만 저장
4. 필터 성능 검증
- `final_FilterTest.py`가 과거 당첨번호를 기준으로 어떤 필터가 당첨을 걸렀는지/통과시켰는지 분석
## 핵심 파일 설명
- `final_BallFilter.py`
- 프로젝트 핵심 엔진
- `extract_final_candidates()`에서 규칙 기반 탈락 사유(`set`)를 생성
- `filter()`는 실사용 진입점이며, 반환 `set`이 비어 있으면 통과
- `final_practice.py`
- 운영 실행 스크립트
- `predict1()` 고정 11조합을 유지
- `predict2()`는 1차 필터 통과 조합을 만든 뒤, 2차 포트폴리오 선별로 최종 추천 수를 제한
- 총 추천 개수는 고정수 포함 최대 70게임(70,000원) 상한을 적용
- 결과를 `resources/recommend_ball.biz_25.json`에 저장하고 Telegram 전송
- `final_FilterTest.py`
- 검증/분석 스크립트
- `find_filter_method()`로 회차별 필터 적중 통계 확인
- `find_final_candidates()`로 특정 회차 후보군 재생성
- `DataCrawler.py`
- 과거 이력 파일 수집/보강
- 네트워크 실패 재시도/백오프 및 누락 회차 보완 처리
- `TelegramBot.py`
- 추천 결과 메시지 전송
- `resources/`
- `lotto_history.json`: 회차별 원본 JSON 라인 데이터
- `lotto_history.txt`: 회차별 CSV 형태 요약 데이터
- `recommend_ball.biz_25.json`: 회차별 추천 결과 저장 파일
## 실행 방법
Python 실행은 Miniconda `ncue` 환경을 사용합니다.
```bash ```bash
conda activate ncue conda activate ncue
python train.py python DataCrawler.py
python valid.py python final_FilterTest.py
python final_filterTest.py
# 특정 회차 생존 조합 수 Monte Carlo 근사
python final_filterTest.py --mc-no 900 --mc-samples 12000
python final_practice.py python final_practice.py
``` ```
동일 환경을 셸 스크립트로: ## 동작 방식 상세
```bash - 입력: 1~45에서 6개 조합 전체
./scripts/run_with_ncue.sh train.py - 처리:
./scripts/run_with_ncue.sh valid.py - 1차: `BallFilter` 규칙 필터 적용
``` - 2차: 고정 11조합을 유지한 채, 겹침도 기반 포트폴리오 선별로 후보 축소
- 출력:
- 탈락 조합: 탈락 사유 집합 반환
- 최종 추천 조합: 예산 상한(최대 70,000원) 내에서 저장/전송
## 설계 요약 주요 규칙 범주 예시:
- **6개 합 / 전주 차이**는 `final_filter_params.TRAIN_ALLOW`로 학습 구간 분포에 맞춤. - 합/평균 및 전주 대비 차이
- 그 외 통계·배수·용지 패턴·쌍/3조합 등은 `BallFilter_25`와 동일한 고정 규칙을 유지해 과도하게 느슨해지지 않도록 함. - 앞 3개/뒤 3개 합 패턴
- `filterOneDigitPattern`에서 인자 `ball`이 예시 배열로 덮어쓰이던 버그를 수정함. - 고저 비율, 끝자리 패턴, AC 값
- 배수 개수(3/4/5/6 등)
- 최근 N주 출현 빈도/중복 관련 규칙
- 비선호 2개/3개 조합 제거 규칙
## 참고 ## 디렉터리 현재 상태 (2026-05-08)
로또는 무작위에 가깝고, 본 저장소의 필터는 **구매 조합 수를 줄이기 위한 휴리스틱**이며 당첨을 보장하지 않습니다. ### 최상위 파일/디렉터리 현황
- 실행/핵심
- `DataCrawler.py`
- `final_BallFilter.py`
- `final_FilterTest.py`
- `final_practice.py`
- `final_Practice.py` (구버전 스크립트)
- `TelegramBot.py`
- 레거시 참조 파일
- `BallFilter_22.py`, `BallFilter_25.py`
- `1_FilterTest_22.py`, `1_FilterTest_25.py`
- `2_FilterTestReview_22.py`, `2_FilterTestReview_25.py`
- `3_Practice_22.py`, `3_Practice_25.py`
- `fixed10.py`
- 데이터/설정
- `resources/`
- `requirements.txt`
- `scripts/`
### Git 작업 트리 상태(요약)
- 수정됨: `DataCrawler.py`, `README.md`, `final_BallFilter.py`, `final_practice.py`, `resources/lotto_history.json`, `resources/lotto_history.txt`
- 삭제됨: `filter_model.py`, `final_filter_params.py`, `train.py`, `valid.py`
- 신규(미추적): `resources/recommend_ball.biz_25.json`
## 주의 사항
- 이 프로젝트는 통계적 휴리스틱 기반의 후보 축소 도구이며 당첨을 보장하지 않습니다.
- 필터가 강해질수록 후보 수는 줄지만, 실제 당첨 조합을 배제할 위험도 함께 증가합니다.

View File

@@ -1,8 +0,0 @@
"""
로또 필터 로직은 `final_BallFilter.BallFilter`에 구현되어 있습니다.
학습·검증 스크립트와 동일한 클래스를 쓰도록 이 모듈에서 재노출합니다.
"""
from final_BallFilter import BallFilter
__all__ = ["BallFilter"]

View File

@@ -1,10 +1,10 @@
import json import json
from collections import Counter from collections import Counter
import socket
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from final_filter_params import TRAIN_ALLOW socket.getaddrinfo(socket.gethostname(), None)
class BallFilter: class BallFilter:
history_ball_dict = None history_ball_dict = None
@@ -16,39 +16,15 @@ class BallFilter:
compositeNumber = None compositeNumber = None
def __init__(self, lottoHistoryFileName=None): def __init__(self, lottoHistoryFileName=None):
if lottoHistoryFileName is None: if lottoHistoryFileName is not None:
return inFp = open(lottoHistoryFileName, 'r', encoding='utf-8')
self.history_ball_list = [] self.history_ball_list = []
self.history_ball_no_ymd = {} self.history_ball_no_ymd = {}
self.history_ball_no_dict = {} self.history_ball_no_dict = {}
self.history_ball_date_dict = {} self.history_ball_date_dict = {}
self.history_ball_dict = {} self.history_ball_dict = {}
path = str(lottoHistoryFileName)
if path.endswith('.txt'):
rows = []
with open(path, 'r', encoding='utf-8') as in_fp:
for line in in_fp:
line = line.strip()
if not line:
continue
parts = line.split(',')
drw_no = int(parts[0])
balls = [int(parts[i]) for i in range(1, 7)]
rows.append((drw_no, balls))
rows.sort(key=lambda x: x[0])
for drw_no, balls in rows:
sb = sorted(balls)
self.history_ball_list.append(sb)
self.history_ball_no_dict[str(sb)] = drw_no
ymd_key = f'{drw_no:08d}'
self.history_ball_date_dict[ymd_key] = drw_no
self.history_ball_dict[drw_no] = {'date': '', 'ball': list(sb)}
self.history_ball_no_ymd[drw_no] = ymd_key
else:
in_fp = open(path, 'r', encoding='utf-8')
while True: while True:
line = in_fp.readline() line = inFp.readline()
if not line or line == '\n': if not line or line == '\n':
break break
data = json.loads(line) data = json.loads(line)
@@ -57,26 +33,27 @@ class BallFilter:
self.history_ball_date_dict[data['drwNoDate'].replace('-', '')] = data['drwNo'] self.history_ball_date_dict[data['drwNoDate'].replace('-', '')] = data['drwNo']
self.history_ball_dict[data['drwNo']] = {'date': data['drwNoDate'], 'ball': [data['drwtNo1'], data['drwtNo2'], data['drwtNo3'], data['drwtNo4'], data['drwtNo5'], data['drwtNo6']]} self.history_ball_dict[data['drwNo']] = {'date': data['drwNoDate'], 'ball': [data['drwtNo1'], data['drwtNo2'], data['drwtNo3'], data['drwtNo4'], data['drwtNo5'], data['drwtNo6']]}
self.history_ball_no_ymd[data['drwNo']] = data['drwNoDate'].replace('-','') self.history_ball_no_ymd[data['drwNo']] = data['drwNoDate'].replace('-','')
in_fp.close() inFp.close()
# ball 평균과 합 구하기
ball_avg = {} ball_avg = {}
ball_sum = {} ball_sum = {}
for i in range(len(self.history_ball_list)): for i in range(len(self.history_ball_list)):
win_ball = list(self.history_ball_list[-i]) WIN_BALL = list(self.history_ball_list[-i])
avg = sum(win_ball) / 6 avg = sum(WIN_BALL) / 6
if avg not in ball_avg: if avg not in ball_avg:
ball_avg[avg] = 1 ball_avg[avg] = 1
else: else:
ball_avg[avg] += 1 ball_avg[avg] += 1
s = sum(self.history_ball_list[-i]) if sum(self.history_ball_list[-i]) in ball_sum:
if s in ball_sum: ball_sum[sum(self.history_ball_list[-i])] += 1
ball_sum[s] += 1
else: else:
ball_sum[s] = 1 ball_sum[sum(self.history_ball_list[-i])] = 1
self.primeNumber = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43] self.primeNumber = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43]
self.compositeNumber = [4, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45] self.compositeNumber = [4, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45]
return return
def getBall(self, no): def getBall(self, no):
@@ -808,6 +785,7 @@ class BallFilter:
return len(low), len(high) return len(low), len(high)
def filterOneDigitPattern(self, ball): def filterOneDigitPattern(self, ball):
ball = [8, 18, 22, 31, 40, 44]
digit = set() digit = set()
for b in ball: for b in ball:
if b % 10 not in digit: if b % 10 not in digit:
@@ -3839,7 +3817,6 @@ class BallFilter:
p_ball = p_ball[1:7] p_ball = p_ball[1:7]
filter_set = set() filter_set = set()
A = TRAIN_ALLOW
### S: 이전 당첨 번호 ### S: 이전 당첨 번호
if no is not None: if no is not None:
@@ -3852,7 +3829,7 @@ class BallFilter:
### S: 당첨번호 6개 합 ### S: 당첨번호 6개 합
acc = sum(ball) acc = sum(ball)
if acc not in A.sum6: if acc not in {112,114,121,123,126,127,131,132,138,146,148,156,154,163,165,167,172,174,183}:
filter_set.add('6개 합: {}'.format(acc)) filter_set.add('6개 합: {}'.format(acc))
if not until_end: if not until_end:
return filter_set return filter_set
@@ -3860,7 +3837,8 @@ class BallFilter:
### E: 당첨번호 6개 합 ### E: 당첨번호 6개 합
### S: 당첨번호 6개 합에 대한 전주와 차이 ### S: 당첨번호 6개 합에 대한 전주와 차이
if abs(acc - p_acc) not in A.abs_sum_diff: if abs(acc - p_acc) not in {2,3,4,6,7,8,9,10,11,12,13,14,15,17,18,24,25,26,27,28,29,30,31,32,33,34,39,40,51}:
# 첫수와 끝수의 합에 대해서 전주 금주의 차이
filter_set.add('6개 합 전주차: {}'.format(abs(acc - p_acc))) filter_set.add('6개 합 전주차: {}'.format(abs(acc - p_acc)))
if not until_end: if not until_end:
return filter_set return filter_set
@@ -4331,6 +4309,7 @@ class BallFilter:
if not until_end: if not until_end:
return filter_set return filter_set
### E: 홀짝 개수에 대한 전주와 차이 ### E: 홀짝 개수에 대한 전주와 차이
### S: 용지에 안나올 것 같은 마킹 위치 (filterPatternInPaper1~filterPatternInPaper6) ### S: 용지에 안나올 것 같은 마킹 위치 (filterPatternInPaper1~filterPatternInPaper6)
v1 = self.filterPatternInPaper1(ball) v1 = self.filterPatternInPaper1(ball)
v2 = self.filterPatternInPaper2(ball) v2 = self.filterPatternInPaper2(ball)

View File

@@ -1,39 +0,0 @@
"""
학습 구간(회차 1~800) 당첨번호로부터 6개 합 및 전주 합 차이 허용 집합만 계산합니다.
`final_BallFilter`의 나머지 필터는 `BallFilter_25`와 동일한 고정 임계값을 유지합니다.
"""
from __future__ import annotations
import os
from dataclasses import dataclass
import pandas as pd
@dataclass(frozen=True)
class TrainAllow:
sum6: frozenset
abs_sum_diff: frozenset
def build_train_allow(max_no: int = 800) -> TrainAllow:
base = os.path.dirname(__file__)
path = os.path.join(base, "resources", "lotto_history.txt")
df = pd.read_csv(path, header=None)
df.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"]
df = df[df["no"] <= max_no].sort_values("no")
rows = []
for _, r in df.iterrows():
b = sorted(int(r[f"b{i}"]) for i in range(1, 7))
rows.append((int(r["no"]), b))
sum6 = {sum(b) for _, b in rows}
abs_sum_diff = set()
for i in range(1, len(rows)):
s = sum(rows[i][1])
ps = sum(rows[i - 1][1])
abs_sum_diff.add(abs(s - ps))
return TrainAllow(sum6=frozenset(sum6), abs_sum_diff=frozenset(abs_sum_diff))
TRAIN_ALLOW = build_train_allow(800)

View File

@@ -12,22 +12,20 @@ from TelegramBot import TelegramBot
from final_BallFilter import BallFilter from final_BallFilter import BallFilter
# predict1: 기존 1개 + 과거 6개 번호 당첨 이력이 없는 조합 4개 (resources/lotto_history.json 기준으로 검증) COST_PER_GAME = 1000
_PREDICT1_FIXED_NEVER_DRAWN_EXTRA = ( MAX_BUDGET_KRW = 70000
[2, 4, 7, 17, 18, 39], MAX_GAMES_PER_DRAW = MAX_BUDGET_KRW // COST_PER_GAME
[3, 21, 24, 40, 42, 43],
[6, 9, 16, 22, 28, 29],
[12, 17, 19, 26, 40, 42],
)
class Practice: class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
def __init__(self, resources_path): def __init__(self, resources_path):
self.bot = TelegramBot() self.bot = TelegramBot()
self.preprocessor = None
self.predictor = None
self.extract_count = None
return return
@@ -85,67 +83,126 @@ class Practice:
return ball return ball
def predict1(self, result_json, ball_filter): def predict1(self, result_json):
fixed_rows = [[6, 7, 10, 11, 20, 45]] result_json.append([6, 7, 10, 11, 20, 45])
fixed_rows.extend([list(x) for x in _PREDICT1_FIXED_NEVER_DRAWN_EXTRA]) result_json.append([5, 12, 16, 27, 39, 45])
seen = set() result_json.append([5, 15, 18, 29, 36, 41])
for ball in fixed_rows: result_json.append([1, 17, 20, 25, 36, 45])
ball = sorted(ball) result_json.append([6, 15, 20, 23, 37, 43])
key = tuple(ball) result_json.append([8, 15, 19, 23, 38, 41])
if key in seen: result_json.append([3, 14, 20, 27, 35, 45])
continue result_json.append([5, 11, 19, 24, 40, 45])
if ball_filter.hasWon(ball): result_json.append([5, 9, 20, 25, 32, 37])
continue result_json.append([2, 13, 19, 27, 40, 43])
seen.add(key) result_json.append([4, 13, 17, 28, 39, 43])
result_json.append(ball)
return return
def predict2(self, resources_path, ymd, result_json, ball_filter=None): def _can_add_ball(self, ball, fixed_balls, selected_balls, max_overlap):
ball_set = set(ball)
for fixed_ball in fixed_balls:
if len(ball_set & set(fixed_ball)) > max_overlap:
return False
for selected_ball in selected_balls:
if len(ball_set & set(selected_ball)) > max_overlap:
return False
return True
def select_portfolio(self, fixed_balls, candidates, target_count):
"""
2차 포트폴리오 선정:
- 중복 제거
- 고정수/선정수 간 중복도(겹치는 번호 수) 제약을 단계적으로 완화하며 선택
"""
unique_candidates = []
seen = set()
fixed_keys = {tuple(sorted(fixed_ball)) for fixed_ball in fixed_balls}
for candidate in candidates:
key = tuple(sorted(candidate))
if key in seen or key in fixed_keys:
continue
seen.add(key)
unique_candidates.append(list(key))
if target_count <= 0:
return []
if len(unique_candidates) <= target_count:
return unique_candidates
selected = []
selected_keys = set()
overlap_stages = [2, 3, 4, 5]
for max_overlap in overlap_stages:
for candidate in unique_candidates:
key = tuple(candidate)
if key in selected_keys:
continue
if self._can_add_ball(candidate, fixed_balls, selected, max_overlap):
selected.append(candidate)
selected_keys.add(key)
if len(selected) >= target_count:
return selected
# 단계 완화 후에도 부족하면 남은 조합을 순서대로 채움
for candidate in unique_candidates:
key = tuple(candidate)
if key in selected_keys:
continue
selected.append(candidate)
selected_keys.add(key)
if len(selected) >= target_count:
break
return selected
def predict2(self, resources_path, ymd, fixed_balls, max_games_per_draw=MAX_GAMES_PER_DRAW):
candidates = [i for i in range(1, 46)] candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json') lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
if ball_filter is None: ballFilter = BallFilter(lottoHistoryFileName)
ball_filter = BallFilter(lottoHistoryFileName) no = ballFilter.getNextNo(ymd)
no = ball_filter.getNextNo(ymd)
print("회차: {}".format(no)) print("회차: {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt') lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None) df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn'] df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
seen = set() passed_candidates = []
for row in result_json:
seen.add(tuple(sorted(row)))
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
nCr = list(itertools.combinations(candidates, 6)) nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr): for idx, ball in enumerate(nCr):
if idx % 1000000 == 0: if idx % 1000000 == 0:
print(" - {} processed...".format(idx)) print(" - {} processed, pass: {}".format(idx, len(passed_candidates)))
ball = list(ball)
ball = sorted(list(ball)) filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
key = tuple(ball)
if key in seen:
continue
if ball_filter.hasWon(ball):
continue
filter_type = ball_filter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type) filter_size = len(filter_type)
if 0 < filter_size: if 0 < filter_size:
continue continue
result_json.append(ball) passed_candidates.append(ball)
seen.add(key)
variable_target_count = max(0, max_games_per_draw - len(fixed_balls))
selected_candidates = self.select_portfolio(
fixed_balls=fixed_balls,
candidates=passed_candidates,
target_count=variable_target_count
)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0] p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0] p_no = p_ball[0]
p_ball = p_ball[1:7] p_ball = p_ball[1:7]
return p_no, p_ball return p_no, p_ball, selected_candidates, len(passed_candidates), variable_target_count
if __name__ == '__main__': if __name__ == '__main__':
@@ -153,8 +210,8 @@ if __name__ == '__main__':
resources_path = os.path.join(PROJECT_HOME, 'resources') resources_path = os.path.join(PROJECT_HOME, 'resources')
# 데이터 수집 # 데이터 수집
dataCrawler = DataCrawler() #dataCrawler = DataCrawler()
dataCrawler.excute(resources_path) #dataCrawler.excute(resources_path)
today = datetime.today() today = datetime.today()
if today.weekday() == 5: if today.weekday() == 5:
@@ -175,36 +232,38 @@ if __name__ == '__main__':
# 로또 예측 # 로또 예측
practice = Practice(resources_path) practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
recommend_result_file = os.path.join(resources_path, "recommend_ball.biz_25.json") recommend_result_file = os.path.join(resources_path, "recommend_ball.biz_25.json")
if os.path.isfile(recommend_result_file): if os.path.isfile(recommend_result_file):
result_fp = open(recommend_result_file, "r") with open(recommend_result_file, "r", encoding="utf-8") as result_fp:
result_json = json.load(result_fp) result_json = json.load(result_fp)
result_json[ymd] = [] result_json[ymd] = []
else: else:
result_json = {ymd: []} result_json = {ymd: []}
lotto_json_for_filter = os.path.join(resources_path, 'lotto_history.json') # 매주 고정
ball_filter = BallFilter(lotto_json_for_filter) fixed_balls = []
practice.predict1(fixed_balls)
result_json[ymd].extend(fixed_balls)
# 매주 고정(과거 당첨 6개 조합 제외·중복 제외는 predict1 내부)
practice.predict1(result_json[ymd], ball_filter)
# 필터 기반 예측 # 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd], ball_filter) p_no, p_ball, selected_candidates, passed_count, variable_target_count = practice.predict2(
resources_path=resources_path,
ymd=ymd,
fixed_balls=fixed_balls,
max_games_per_draw=MAX_GAMES_PER_DRAW
)
result_json[ymd].extend(selected_candidates)
with open(recommend_result_file, 'w', encoding='utf-8') as outFp: with open(recommend_result_file, 'w', encoding='utf-8') as outFp:
json.dump(result_json, outFp, ensure_ascii=False) json.dump(result_json, outFp, ensure_ascii=False)
total_games = len(result_json[ymd])
total_cost = total_games * COST_PER_GAME
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1)) p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
p_str += " - 고정수: {}\n".format(len(fixed_balls))
p_str += " - 필터 통과 후보: {}\n".format(passed_count)
p_str += " - 추가 선정: {}개 (목표 {}개)\n".format(len(selected_candidates), variable_target_count)
p_str += " - 총 추천: {}개, 총 금액: {:,}원 (한도 {:,}원)\n".format(total_games, total_cost, MAX_BUDGET_KRW)
for i, ball in enumerate(result_json[ymd]): for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball)) p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0: if (i+1) % 100 == 0:
@@ -214,8 +273,8 @@ if __name__ == '__main__':
if len(result_json[ymd]) % 100 != 0: if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str)) practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd]) print("size: {}".format(total_games))
print("size: {}".format(size)) print("cost: {:,} KRW / limit: {:,} KRW".format(total_cost, MAX_BUDGET_KRW))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K # https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA # https://www.youtube.com/watch?v=YwiHaa1KNwA

View File

@@ -1217,3 +1217,6 @@
{"returnValue": "success", "drwNoDate": "2026-03-28", "drwNo": 1217, "drwtNo1": 8, "drwtNo2": 10, "drwtNo3": 15, "drwtNo4": 20, "drwtNo5": 29, "drwtNo6": 31, "bnusNo": 41} {"returnValue": "success", "drwNoDate": "2026-03-28", "drwNo": 1217, "drwtNo1": 8, "drwtNo2": 10, "drwtNo3": 15, "drwtNo4": 20, "drwtNo5": 29, "drwtNo6": 31, "bnusNo": 41}
{"returnValue": "success", "drwNoDate": "2026-04-04", "drwNo": 1218, "drwtNo1": 3, "drwtNo2": 28, "drwtNo3": 31, "drwtNo4": 32, "drwtNo5": 42, "drwtNo6": 45, "bnusNo": 25} {"returnValue": "success", "drwNoDate": "2026-04-04", "drwNo": 1218, "drwtNo1": 3, "drwtNo2": 28, "drwtNo3": 31, "drwtNo4": 32, "drwtNo5": 42, "drwtNo6": 45, "bnusNo": 25}
{"returnValue": "success", "drwNoDate": "2026-04-11", "drwNo": 1219, "drwtNo1": 1, "drwtNo2": 2, "drwtNo3": 15, "drwtNo4": 28, "drwtNo5": 39, "drwtNo6": 45, "bnusNo": 31} {"returnValue": "success", "drwNoDate": "2026-04-11", "drwNo": 1219, "drwtNo1": 1, "drwtNo2": 2, "drwtNo3": 15, "drwtNo4": 28, "drwtNo5": 39, "drwtNo6": 45, "bnusNo": 31}
{"returnValue": "success", "drwNoDate": "2026-04-18", "drwNo": 1220, "drwtNo1": 2, "drwtNo2": 22, "drwtNo3": 25, "drwtNo4": 28, "drwtNo5": 34, "drwtNo6": 43, "bnusNo": 16}
{"returnValue": "success", "drwNoDate": "2026-04-25", "drwNo": 1221, "drwtNo1": 6, "drwtNo2": 13, "drwtNo3": 18, "drwtNo4": 28, "drwtNo5": 30, "drwtNo6": 36, "bnusNo": 9}
{"returnValue": "success", "drwNoDate": "2026-05-02", "drwNo": 1222, "drwtNo1": 4, "drwtNo2": 11, "drwtNo3": 17, "drwtNo4": 22, "drwtNo5": 32, "drwtNo6": 41, "bnusNo": 34}

View File

@@ -1204,7 +1204,7 @@
1204,8,16,28,30,31,44,27 1204,8,16,28,30,31,44,27
1205,1,4,16,23,31,41,2 1205,1,4,16,23,31,41,2
1206,1,3,17,26,27,42,23 1206,1,3,17,26,27,42,23
1207,10,22,24,27,38,45,11 1207,10,22,24,27,38,45,21
1208,6,27,30,36,38,42,25 1208,6,27,30,36,38,42,25
1209,2,17,20,35,37,39,24 1209,2,17,20,35,37,39,24
1210,1,7,9,17,27,38,31 1210,1,7,9,17,27,38,31
@@ -1217,3 +1217,6 @@
1217,8,10,15,20,29,31,41 1217,8,10,15,20,29,31,41
1218,3,28,31,32,42,45,25 1218,3,28,31,32,42,45,25
1219,1,2,15,28,39,45,31 1219,1,2,15,28,39,45,31
1220,2,22,25,28,34,43,16
1221,6,13,18,28,30,36,9
1222,4,11,17,22,32,41,34

View File

@@ -0,0 +1 @@
{"20260509": [[6, 7, 10, 11, 20, 45], [5, 12, 16, 27, 39, 45], [5, 15, 18, 29, 36, 41], [1, 17, 20, 25, 36, 45], [6, 15, 20, 23, 37, 43], [8, 15, 19, 23, 38, 41], [3, 14, 20, 27, 35, 45], [5, 11, 19, 24, 40, 45], [5, 9, 20, 25, 32, 37], [2, 13, 19, 27, 40, 43], [4, 13, 17, 28, 39, 43], [1, 6, 15, 22, 37, 40], [1, 6, 17, 22, 28, 40], [1, 6, 17, 22, 37, 40], [1, 11, 12, 14, 37, 39], [2, 5, 13, 22, 34, 45], [2, 5, 14, 22, 32, 39], [2, 7, 13, 15, 40, 44], [2, 7, 15, 22, 24, 44], [2, 7, 16, 19, 33, 44], [2, 8, 11, 15, 41, 44], [2, 8, 11, 28, 33, 39], [2, 8, 11, 28, 37, 45], [2, 8, 13, 14, 33, 44], [2, 8, 13, 22, 33, 36], [2, 8, 15, 22, 33, 41], [2, 9, 10, 25, 33, 44], [2, 9, 31, 33, 40, 41], [2, 10, 14, 15, 29, 44], [2, 11, 25, 26, 29, 45], [2, 13, 32, 33, 34, 42], [2, 16, 21, 26, 29, 44], [2, 18, 25, 33, 34, 44], [2, 19, 24, 33, 34, 44], [2, 19, 25, 32, 33, 45], [2, 22, 25, 33, 34, 40], [2, 22, 28, 33, 37, 45], [3, 4, 14, 22, 37, 41], [3, 5, 14, 22, 26, 44], [3, 5, 14, 26, 31, 44], [3, 5, 14, 31, 34, 44], [3, 6, 14, 22, 37, 41], [3, 7, 10, 29, 38, 44], [3, 7, 13, 20, 33, 38], [3, 7, 15, 22, 32, 44], [3, 8, 11, 17, 38, 44], [3, 8, 11, 22, 38, 39], [3, 10, 13, 15, 29, 44], [3, 13, 22, 27, 28, 38], [3, 20, 22, 26, 42, 43], [3, 20, 22, 27, 28, 38], [3, 20, 22, 28, 38, 43], [3, 22, 27, 28, 43, 44], [4, 5, 11, 22, 37, 42], [4, 5, 15, 22, 42, 43], [4, 6, 10, 19, 31, 44], [4, 7, 10, 29, 33, 38], [4, 8, 10, 31, 33, 45], [4, 8, 11, 21, 38, 39], [4, 9, 10, 31, 33, 44], [4, 13, 22, 27, 34, 38], [4, 14, 29, 31, 33, 45], [4, 16, 25, 33, 35, 43], [5, 6, 11, 16, 38, 45], [5, 6, 11, 22, 38, 41], [5, 6, 11, 26, 38, 45], [5, 6, 11, 27, 38, 44], [5, 6, 14, 22, 26, 41], [5, 8, 11, 30, 38, 39], [5, 9, 11, 20, 26, 43], [5, 13, 24, 25, 27, 44], [5, 13, 27, 31, 34, 44], [5, 15, 19, 22, 34, 36], [5, 22, 28, 31, 34, 36], [7, 8, 10, 19, 33, 44], [7, 8, 30, 33, 37, 41], [7, 18, 20, 22, 43, 44], [8, 10, 27, 33, 37, 41], [8, 13, 25, 31, 33, 44], [8, 14, 25, 31, 33, 45], [8, 14, 25, 31, 44, 45], [8, 14, 30, 31, 33, 38], [8, 15, 22, 33, 35, 41], [8, 17, 30, 33, 35, 44], [8, 19, 20, 33, 35, 41], [9, 13, 31, 32, 33, 38], [10, 11, 24, 26, 29, 38], [10, 13, 22, 31, 36, 44], [10, 14, 15, 22, 27, 43], [10, 16, 19, 33, 44, 45], [10, 17, 22, 27, 37, 43], [10, 18, 19, 22, 25, 44], [10, 18, 22, 29, 31, 44], [10, 18, 25, 26, 31, 44], [10, 19, 24, 31, 39, 44], [11, 12, 19, 32, 35, 45], [11, 12, 29, 31, 32, 39], [11, 12, 29, 31, 33, 38], [11, 13, 29, 30, 33, 38], [11, 14, 22, 30, 38, 39], [11, 14, 22, 32, 38, 39], [11, 15, 16, 26, 43, 45], [11, 15, 16, 28, 41, 43], [11, 16, 25, 27, 31, 44], [11, 16, 27, 31, 38, 44], [11, 21, 22, 24, 37, 41], [12, 13, 17, 25, 44, 45], [12, 13, 22, 31, 34, 44], [12, 13, 25, 29, 31, 44], [12, 19, 22, 25, 34, 44]]}

View File

@@ -1,50 +0,0 @@
"""
학습 구간(1~800회): 당첨번호가 필터를 모두 통과한 회차 수를 집계합니다.
최소 20회차 이후부터 통계(최근 N주 등)가 의미 있으므로 기본은 21~800회만 평가합니다.
"""
import argparse
import os
import pandas as pd
from final_BallFilter import BallFilter
def load_history(resources_path: str) -> pd.DataFrame:
path = os.path.join(resources_path, "lotto_history.txt")
df = pd.read_csv(path, header=None)
df.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"]
return df
def run_train(resources_path: str, start_no: int, end_no: int) -> tuple[int, int, list[int]]:
df = load_history(resources_path)
hist_path = os.path.join(resources_path, "lotto_history.txt")
bf = BallFilter(hist_path)
wins = 0
total = 0
win_nos: list[int] = []
for no in range(start_no, end_no + 1):
sub = df[df["no"] == no]
if sub.empty:
continue
answer = sorted(int(x) for x in sub.iloc[0][1:7].tolist())
fts = bf.extract_final_candidates(answer, no=no, until_end=True, df=df)
total += 1
if len(fts) == 0:
wins += 1
win_nos.append(no)
return wins, total, win_nos
if __name__ == "__main__":
p = argparse.ArgumentParser()
p.add_argument("--resources", default=os.path.join(os.path.dirname(__file__), "resources"))
p.add_argument("--start-no", type=int, default=21)
p.add_argument("--end-no", type=int, default=800)
args = p.parse_args()
w, t, nos = run_train(args.resources, args.start_no, args.end_no)
rate = w / t if t else 0.0
print(f"학습 구간 당첨 통과: {w} / {t} ({rate:.4f})")
print(f"통과 회차: {nos}")

View File

@@ -1,49 +0,0 @@
"""
검증 구간(801~1000회): 필터만 검사(학습으로 튜닝하지 않음).
"""
import argparse
import os
import pandas as pd
from final_BallFilter import BallFilter
def load_history(resources_path: str) -> pd.DataFrame:
path = os.path.join(resources_path, "lotto_history.txt")
df = pd.read_csv(path, header=None)
df.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"]
return df
def run_valid(resources_path: str, start_no: int, end_no: int) -> tuple[int, int, list[int]]:
df = load_history(resources_path)
hist_path = os.path.join(resources_path, "lotto_history.txt")
bf = BallFilter(hist_path)
wins = 0
total = 0
win_nos: list[int] = []
for no in range(start_no, end_no + 1):
sub = df[df["no"] == no]
if sub.empty:
continue
answer = sorted(int(x) for x in sub.iloc[0][1:7].tolist())
fts = bf.extract_final_candidates(answer, no=no, until_end=True, df=df)
total += 1
if len(fts) == 0:
wins += 1
win_nos.append(no)
return wins, total, win_nos
if __name__ == "__main__":
p = argparse.ArgumentParser()
p.add_argument("--resources", default=os.path.join(os.path.dirname(__file__), "resources"))
p.add_argument("--start-no", type=int, default=801)
p.add_argument("--end-no", type=int, default=1000)
args = p.parse_args()
w, t, nos = run_valid(args.resources, args.start_no, args.end_no)
rate = w / t if t else 0.0
print(f"검증 구간 당첨 통과: {w} / {t} ({rate:.4f})")
print(f"통과 회차: {nos}")