refactor: apply portfolio cap and align project docs

Keep the fixed 11-number set intact while adding a second-stage portfolio selection that caps final recommendations to the 70,000 KRW budget, and update docs/data/scripts to match the current project structure and runtime flow.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-05-08 10:37:03 +09:00
parent bd9eea2aee
commit 919f2e19bb
11 changed files with 492 additions and 343 deletions

View File

@@ -4,6 +4,9 @@ import time
import requests
# JSON 포맷을 다루기 위한 라이브러리를 호출합니다.
import json
from datetime import datetime, timedelta
import random
import socket
import urllib3
@@ -19,8 +22,15 @@ except ModuleNotFoundError:
def sendMsg(self, msg):
pass
_LOTTO_URL = "https://www.dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}"
_REQUEST_TIMEOUT = 15
_LOTTO_URLS = (
"https://www.dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}",
"https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}",
)
_REQUEST_TIMEOUT = float(os.environ.get("LOTTO_REQUEST_TIMEOUT", "12"))
_FETCH_RETRIES_PER_DRAW = int(os.environ.get("LOTTO_FETCH_RETRIES", "3"))
_BACKOFF_BASE_SECONDS = float(os.environ.get("LOTTO_BACKOFF_BASE", "0.7"))
_MAX_CONSECUTIVE_FETCH_FAILURES = int(os.environ.get("LOTTO_MAX_CONSEC_FAIL", "8"))
_CONNECTION_PROBE_TIMEOUT = float(os.environ.get("LOTTO_PROBE_TIMEOUT", "3"))
_BROWSER_HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
@@ -51,11 +61,30 @@ class DataCrawler:
self.bot = TelegramBot()
self._session = requests.Session()
self._session.headers.update(_BROWSER_HEADERS)
self._last_fetch_error = ""
def _can_reach_lottery_host(self):
"""
API 호스트 TCP 연결 가능 여부를 빠르게 확인합니다.
완전한 보장은 아니지만, 완전 차단 상태를 조기 감지해 불필요한 대기 시간을 줄입니다.
"""
for host in ("www.dhlottery.co.kr", "dhlottery.co.kr"):
try:
with socket.create_connection((host, 443), timeout=_CONNECTION_PROBE_TIMEOUT):
return True
except OSError:
continue
return False
def _fetch_draw(self, drw_no):
"""동행복권 API에서 단일 회차 결과를 가져옵니다. 실패 시 None."""
url = _LOTTO_URL.format(int(drw_no))
for verify in (_ssl_verify_arg(), False):
self._last_fetch_error = ""
verify_options = (_ssl_verify_arg(), False)
last_error = "unknown"
for attempt in range(1, _FETCH_RETRIES_PER_DRAW + 1):
for raw_url in _LOTTO_URLS:
url = raw_url.format(int(drw_no))
for verify in verify_options:
for method in ("POST", "GET"):
try:
res = self._session.request(
@@ -65,19 +94,29 @@ class DataCrawler:
verify=verify,
)
if res.status_code != 200:
last_error = "http {}".format(res.status_code)
continue
text = res.text.strip()
if not text.startswith("{"):
last_error = "non-json response"
continue
result = json.loads(text)
except (
requests.RequestException,
ValueError,
json.JSONDecodeError,
):
) as ex:
last_error = str(ex)
continue
if isinstance(result, dict) and result.get("returnValue") == "success":
return result
rv = result.get("returnValue") if isinstance(result, dict) else "unknown"
last_error = "api returnValue={}".format(rv)
if attempt < _FETCH_RETRIES_PER_DRAW:
# 지수 백오프 + 지터로 일시적 네트워크 혼잡 완화
delay = _BACKOFF_BASE_SECONDS * (2 ** (attempt - 1)) + random.uniform(0, 0.25)
time.sleep(delay)
self._last_fetch_error = last_error
return None
def _append_draw_files(self, lottoHistoryFile, result):
@@ -120,6 +159,99 @@ class DataCrawler:
return None
return last_json.get("drwNo")
def _read_draw_map_from_json(self, json_path):
"""
JSONL 전체를 읽어 drwNo -> record 맵으로 반환합니다.
잘못된 라인/중복 라인은 정리 대상이며, 마지막 유효값을 유지합니다.
"""
draw_map = {}
if not os.path.isfile(json_path) or os.path.getsize(json_path) == 0:
return draw_map
with open(json_path, "r", encoding="utf-8") as fp:
for line in fp:
line = line.strip()
if not line:
continue
try:
data = json.loads(line)
except json.JSONDecodeError:
continue
if (
isinstance(data, dict)
and data.get("returnValue") == "success"
and isinstance(data.get("drwNo"), int)
):
draw_map[data["drwNo"]] = data
return draw_map
def _write_draw_map_files(self, lottoHistoryFile, draw_map):
"""
drwNo 오름차순으로 json/txt를 재생성합니다.
누락 회차 보강/중복 제거 후 일관된 파일 상태를 보장합니다.
"""
json_path = lottoHistoryFile + ".json"
txt_path = lottoHistoryFile + ".txt"
with open(json_path, "w", encoding="utf-8") as json_fp, open(
txt_path, "w", encoding="utf-8"
) as text_fp:
for drw_no in sorted(draw_map.keys()):
result = draw_map[drw_no]
json_fp.write(json.dumps(result, ensure_ascii=False) + "\n")
text_fp.write(
"%d,%d,%d,%d,%d,%d,%d,%d\n"
% (
drw_no,
result["drwtNo1"],
result["drwtNo2"],
result["drwtNo3"],
result["drwtNo4"],
result["drwtNo5"],
result["drwtNo6"],
result["bnusNo"],
)
)
def _get_last_week_draw_date(self):
"""
'지난 주' 기준 토요일 날짜를 반환합니다.
예: 금요일(2026-05-08) 실행 시 직전 토요일(2026-05-02)
"""
now = datetime.now()
days_since_saturday = (now.weekday() - 5) % 7
latest_saturday = now.date() - timedelta(days=days_since_saturday)
# 토요일이면서 추첨 전(20시 이전)이라면 지난 주 토요일을 목표로 사용
if now.weekday() == 5 and now.hour < 20:
latest_saturday = latest_saturday - timedelta(days=7)
return latest_saturday
def _estimate_target_draw_no(self, draw_map):
"""
기존 데이터의 마지막 drwNoDate와 지난 주 토요일을 비교해
이번 실행에서 확보해야 할 목표 회차를 계산합니다.
"""
if not draw_map:
return None
last_no = max(draw_map.keys())
last_data = draw_map[last_no]
last_date_str = last_data.get("drwNoDate", "")
try:
last_date = datetime.strptime(last_date_str, "%Y-%m-%d").date()
except ValueError:
return last_no
target_date = self._get_last_week_draw_date()
if target_date <= last_date:
return last_no
week_gap = (target_date - last_date).days // 7
if week_gap <= 0:
return last_no
return last_no + week_gap
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일 (확장자 제외)
def craw(self, lottoHistoryFile, drwNo=None):
@@ -161,44 +293,97 @@ class DataCrawler:
def excute(self, resource_path):
"""
resources/lotto_history.* 를 마지막 회차 이후까지 연속으로 갱신합니다.
(기존: 날짜 비교 + 1회차만 수집 → 누락·정지가 잦음)
resources/lotto_history.* 를 지난 주 기준으로 누락 없이 동기화합니다.
- 마지막 회차+1만 확인하지 않고, 1~목표회차 범위에서 누락 회차를 탐지/보강
- 중복/깨진 라인을 정리해 json/txt를 일관 상태로 재생성
"""
lottoHistoryFile = os.path.join(resource_path, "lotto_history")
json_path = lottoHistoryFile + ".json"
last_no = self._read_last_draw_from_json(json_path)
if last_no is None:
self.craw(lottoHistoryFile)
draw_map = self._read_draw_map_from_json(json_path)
# 기존 이력이 비었거나 깨졌으면 전체 재수집(기존 동작 유지)
if not draw_map:
try:
self.craw(lottoHistoryFile)
self.bot.sendMsg("[Lottery Crawler] full history rebuilt (no valid json).")
except Exception:
pass
return True
added = 0
next_no = last_no + 1
while True:
result = self._fetch_draw(next_no)
if result is None:
break
self._append_draw_files(lottoHistoryFile, result)
added += 1
next_no += 1
time.sleep(0.35)
target_no = self._estimate_target_draw_no(draw_map)
if target_no is None:
target_no = max(draw_map.keys())
if added == 0:
if not self._can_reach_lottery_host():
msg = "[Lottery Crawler] network blocked: cannot reach dhlottery host."
print(msg)
try:
self.bot.sendMsg(msg)
except Exception:
pass
return False
missing_nos = [no for no in range(1, target_no + 1) if no not in draw_map]
added = 0
failed = []
aborted_missing_nos = []
consecutive_failure = 0
fail_reasons = {}
for no in missing_nos:
result = self._fetch_draw(no)
if result is None:
failed.append(no)
reason = self._last_fetch_error or "unknown"
fail_reasons[reason] = fail_reasons.get(reason, 0) + 1
consecutive_failure += 1
if consecutive_failure >= _MAX_CONSECUTIVE_FETCH_FAILURES:
aborted_missing_nos = [x for x in missing_nos if x > no]
break
continue
draw_map[no] = result
added += 1
consecutive_failure = 0
time.sleep(0.2)
# 누락 보강 또는 중복 정리 여지가 있으면 파일을 재생성
self._write_draw_map_files(lottoHistoryFile, draw_map)
last_no = max(draw_map.keys())
if added == 0 and not failed:
try:
self.bot.sendMsg(
"[Lottery Crawler] up to date (last drwNo={}).".format(last_no)
"[Lottery Crawler] up to date (last drwNo={}, target={}).".format(
last_no, target_no
)
)
except Exception:
pass
elif failed:
sample = ",".join(str(x) for x in failed[:10])
reason_items = sorted(fail_reasons.items(), key=lambda x: x[1], reverse=True)
reason_str = "; ".join("{} x{}".format(reason, count) for reason, count in reason_items[:3])
if aborted_missing_nos:
reason_str += " | aborted {} pending draws due to consecutive failures".format(
len(aborted_missing_nos)
)
try:
self.bot.sendMsg(
"[Lottery Crawler] appended {}, failed {} draw(s): {}{} | {}".format(
added,
len(failed),
sample,
"..." if len(failed) > 10 else "",
reason_str or "no reason",
)
)
except Exception:
pass
else:
try:
self.bot.sendMsg(
"[Lottery Crawler] appended {} draw(s), last drwNo={}.".format(
added, last_no + added
"[Lottery Crawler] appended {} draw(s), last drwNo={}, target={}.".format(
added, last_no, target_no
)
)
except Exception:

125
README.md
View File

@@ -1,47 +1,112 @@
# deeplottery
## 데이터 구간
`deeplottery`는 로또 번호를 예측하는 프로젝트가 아니라, **전체 조합(45C6)을 규칙 기반으로 필터링해 후보를 줄이는 시스템**입니다.
핵심은 `BallFilter` 엔진이며, 운영 실행(`final_practice.py`)과 검증(`final_FilterTest.py`)이 분리되어 있습니다.
| 구간 | 회차 |
|------|------|
| 학습 | `lotto_history.txt` 1~800 |
| 검증 | 801~1000 |
| 테스트 | 1001~이후 |
## 목표와 설계 의도
## 핵심 파일
- 목표: 통계/패턴 기반 규칙으로 비효율 조합을 제거하고 후보군을 관리 가능한 크기로 축소
- 설계 의도:
- 필터 규칙은 `final_BallFilter.py` 한 곳에서 관리
- 운영 추천 생성과 과거 회차 검증을 분리하여 반복 개선
- 같은 엔진을 운영/검증에서 공통 사용해 일관성 유지
- **`final_BallFilter.py`** — 필터 로직 (`BallFilter_25` 기반, `lotto_history.txt` CSV 로드, `socket` 제거).
- **`final_filter_params.py`** — 학습 구간(1~800회)에서만 집계한 **6개 합**·**전주 합 차이** 허용 집합.
- **`filter_model.py`** — `from final_BallFilter import BallFilter` 재노출.
- **`train.py` / `valid.py`** — 구간별로 당첨 6개가 모든 필터를 통과한 회차 수 집계.
- **`final_filterTest.py`** — `1_FilterTest_25.py`와 동일한 분석·(선택) MC 생존 추정.
- **`final_Practice.py`** — DataCrawler → 마지막 JSON 회차+1 크롤 → `predict1`+`predict2`. `lotto_history.json`으로 `BallFilter` 한 번 생성 후 공유. `predict1`은 고정 5조합(기존 1 + 미당첨 4, `hasWon`으로 제외) 후 `predict2`는 정렬된 6개·`seen`·과거 당첨 조합 제외 후 필터.
## 전체 아키텍처
## 실행 (miniconda **ncue**)
1. 데이터 수집/갱신
- `DataCrawler.py`가 로또 API를 호출해 `resources/lotto_history.json`, `resources/lotto_history.txt` 갱신
2. 필터 엔진 로딩
- `final_BallFilter.py``BallFilter`가 과거 당첨 이력을 메모리로 적재
3. 운영 후보 생성
- `final_practice.py`가 다음 회차 기준 전체 조합을 순회하며 `BallFilter.filter()`로 통과 조합만 저장
4. 필터 성능 검증
- `final_FilterTest.py`가 과거 당첨번호를 기준으로 어떤 필터가 당첨을 걸렀는지/통과시켰는지 분석
## 핵심 파일 설명
- `final_BallFilter.py`
- 프로젝트 핵심 엔진
- `extract_final_candidates()`에서 규칙 기반 탈락 사유(`set`)를 생성
- `filter()`는 실사용 진입점이며, 반환 `set`이 비어 있으면 통과
- `final_practice.py`
- 운영 실행 스크립트
- `predict1()` 고정 11조합을 유지
- `predict2()`는 1차 필터 통과 조합을 만든 뒤, 2차 포트폴리오 선별로 최종 추천 수를 제한
- 총 추천 개수는 고정수 포함 최대 70게임(70,000원) 상한을 적용
- 결과를 `resources/recommend_ball.biz_25.json`에 저장하고 Telegram 전송
- `final_FilterTest.py`
- 검증/분석 스크립트
- `find_filter_method()`로 회차별 필터 적중 통계 확인
- `find_final_candidates()`로 특정 회차 후보군 재생성
- `DataCrawler.py`
- 과거 이력 파일 수집/보강
- 네트워크 실패 재시도/백오프 및 누락 회차 보완 처리
- `TelegramBot.py`
- 추천 결과 메시지 전송
- `resources/`
- `lotto_history.json`: 회차별 원본 JSON 라인 데이터
- `lotto_history.txt`: 회차별 CSV 형태 요약 데이터
- `recommend_ball.biz_25.json`: 회차별 추천 결과 저장 파일
## 실행 방법
Python 실행은 Miniconda `ncue` 환경을 사용합니다.
```bash
conda activate ncue
python train.py
python valid.py
python final_filterTest.py
# 특정 회차 생존 조합 수 Monte Carlo 근사
python final_filterTest.py --mc-no 900 --mc-samples 12000
python DataCrawler.py
python final_FilterTest.py
python final_practice.py
```
동일 환경을 셸 스크립트로:
## 동작 방식 상세
```bash
./scripts/run_with_ncue.sh train.py
./scripts/run_with_ncue.sh valid.py
```
- 입력: 1~45에서 6개 조합 전체
- 처리:
- 1차: `BallFilter` 규칙 필터 적용
- 2차: 고정 11조합을 유지한 채, 겹침도 기반 포트폴리오 선별로 후보 축소
- 출력:
- 탈락 조합: 탈락 사유 집합 반환
- 최종 추천 조합: 예산 상한(최대 70,000원) 내에서 저장/전송
## 설계 요약
주요 규칙 범주 예시:
- **6개 합 / 전주 차이**는 `final_filter_params.TRAIN_ALLOW`로 학습 구간 분포에 맞춤.
- 그 외 통계·배수·용지 패턴·쌍/3조합 등은 `BallFilter_25`와 동일한 고정 규칙을 유지해 과도하게 느슨해지지 않도록 함.
- `filterOneDigitPattern`에서 인자 `ball`이 예시 배열로 덮어쓰이던 버그를 수정함.
- 합/평균 및 전주 대비 차이
- 앞 3개/뒤 3개 합 패턴
- 고저 비율, 끝자리 패턴, AC 값
- 배수 개수(3/4/5/6 등)
- 최근 N주 출현 빈도/중복 관련 규칙
- 비선호 2개/3개 조합 제거 규칙
## 참고
## 디렉터리 현재 상태 (2026-05-08)
로또는 무작위에 가깝고, 본 저장소의 필터는 **구매 조합 수를 줄이기 위한 휴리스틱**이며 당첨을 보장하지 않습니다.
### 최상위 파일/디렉터리 현황
- 실행/핵심
- `DataCrawler.py`
- `final_BallFilter.py`
- `final_FilterTest.py`
- `final_practice.py`
- `final_Practice.py` (구버전 스크립트)
- `TelegramBot.py`
- 레거시 참조 파일
- `BallFilter_22.py`, `BallFilter_25.py`
- `1_FilterTest_22.py`, `1_FilterTest_25.py`
- `2_FilterTestReview_22.py`, `2_FilterTestReview_25.py`
- `3_Practice_22.py`, `3_Practice_25.py`
- `fixed10.py`
- 데이터/설정
- `resources/`
- `requirements.txt`
- `scripts/`
### Git 작업 트리 상태(요약)
- 수정됨: `DataCrawler.py`, `README.md`, `final_BallFilter.py`, `final_practice.py`, `resources/lotto_history.json`, `resources/lotto_history.txt`
- 삭제됨: `filter_model.py`, `final_filter_params.py`, `train.py`, `valid.py`
- 신규(미추적): `resources/recommend_ball.biz_25.json`
## 주의 사항
- 이 프로젝트는 통계적 휴리스틱 기반의 후보 축소 도구이며 당첨을 보장하지 않습니다.
- 필터가 강해질수록 후보 수는 줄지만, 실제 당첨 조합을 배제할 위험도 함께 증가합니다.

View File

@@ -1,8 +0,0 @@
"""
로또 필터 로직은 `final_BallFilter.BallFilter`에 구현되어 있습니다.
학습·검증 스크립트와 동일한 클래스를 쓰도록 이 모듈에서 재노출합니다.
"""
from final_BallFilter import BallFilter
__all__ = ["BallFilter"]

View File

@@ -1,10 +1,10 @@
import json
from collections import Counter
import socket
import numpy as np
import pandas as pd
from final_filter_params import TRAIN_ALLOW
socket.getaddrinfo(socket.gethostname(), None)
class BallFilter:
history_ball_dict = None
@@ -16,39 +16,15 @@ class BallFilter:
compositeNumber = None
def __init__(self, lottoHistoryFileName=None):
if lottoHistoryFileName is None:
return
if lottoHistoryFileName is not None:
inFp = open(lottoHistoryFileName, 'r', encoding='utf-8')
self.history_ball_list = []
self.history_ball_no_ymd = {}
self.history_ball_no_dict = {}
self.history_ball_date_dict = {}
self.history_ball_dict = {}
path = str(lottoHistoryFileName)
if path.endswith('.txt'):
rows = []
with open(path, 'r', encoding='utf-8') as in_fp:
for line in in_fp:
line = line.strip()
if not line:
continue
parts = line.split(',')
drw_no = int(parts[0])
balls = [int(parts[i]) for i in range(1, 7)]
rows.append((drw_no, balls))
rows.sort(key=lambda x: x[0])
for drw_no, balls in rows:
sb = sorted(balls)
self.history_ball_list.append(sb)
self.history_ball_no_dict[str(sb)] = drw_no
ymd_key = f'{drw_no:08d}'
self.history_ball_date_dict[ymd_key] = drw_no
self.history_ball_dict[drw_no] = {'date': '', 'ball': list(sb)}
self.history_ball_no_ymd[drw_no] = ymd_key
else:
in_fp = open(path, 'r', encoding='utf-8')
while True:
line = in_fp.readline()
line = inFp.readline()
if not line or line == '\n':
break
data = json.loads(line)
@@ -56,27 +32,28 @@ class BallFilter:
self.history_ball_no_dict[str(self.history_ball_list[len(self.history_ball_list) - 1])] = data['drwNo']
self.history_ball_date_dict[data['drwNoDate'].replace('-', '')] = data['drwNo']
self.history_ball_dict[data['drwNo']] = {'date': data['drwNoDate'], 'ball': [data['drwtNo1'], data['drwtNo2'], data['drwtNo3'], data['drwtNo4'], data['drwtNo5'], data['drwtNo6']]}
self.history_ball_no_ymd[data['drwNo']] = data['drwNoDate'].replace('-', '')
in_fp.close()
self.history_ball_no_ymd[data['drwNo']] = data['drwNoDate'].replace('-','')
inFp.close()
# ball 평균과 합 구하기
ball_avg = {}
ball_sum = {}
for i in range(len(self.history_ball_list)):
win_ball = list(self.history_ball_list[-i])
avg = sum(win_ball) / 6
WIN_BALL = list(self.history_ball_list[-i])
avg = sum(WIN_BALL) / 6
if avg not in ball_avg:
ball_avg[avg] = 1
else:
ball_avg[avg] += 1
s = sum(self.history_ball_list[-i])
if s in ball_sum:
ball_sum[s] += 1
if sum(self.history_ball_list[-i]) in ball_sum:
ball_sum[sum(self.history_ball_list[-i])] += 1
else:
ball_sum[s] = 1
ball_sum[sum(self.history_ball_list[-i])] = 1
self.primeNumber = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43]
self.compositeNumber = [4, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45]
return
def getBall(self, no):
@@ -808,6 +785,7 @@ class BallFilter:
return len(low), len(high)
def filterOneDigitPattern(self, ball):
ball = [8, 18, 22, 31, 40, 44]
digit = set()
for b in ball:
if b % 10 not in digit:
@@ -3839,7 +3817,6 @@ class BallFilter:
p_ball = p_ball[1:7]
filter_set = set()
A = TRAIN_ALLOW
### S: 이전 당첨 번호
if no is not None:
@@ -3852,7 +3829,7 @@ class BallFilter:
### S: 당첨번호 6개 합
acc = sum(ball)
if acc not in A.sum6:
if acc not in {112,114,121,123,126,127,131,132,138,146,148,156,154,163,165,167,172,174,183}:
filter_set.add('6개 합: {}'.format(acc))
if not until_end:
return filter_set
@@ -3860,7 +3837,8 @@ class BallFilter:
### E: 당첨번호 6개 합
### S: 당첨번호 6개 합에 대한 전주와 차이
if abs(acc - p_acc) not in A.abs_sum_diff:
if abs(acc - p_acc) not in {2,3,4,6,7,8,9,10,11,12,13,14,15,17,18,24,25,26,27,28,29,30,31,32,33,34,39,40,51}:
# 첫수와 끝수의 합에 대해서 전주 금주의 차이
filter_set.add('6개 합 전주차: {}'.format(abs(acc - p_acc)))
if not until_end:
return filter_set
@@ -4331,6 +4309,7 @@ class BallFilter:
if not until_end:
return filter_set
### E: 홀짝 개수에 대한 전주와 차이
### S: 용지에 안나올 것 같은 마킹 위치 (filterPatternInPaper1~filterPatternInPaper6)
v1 = self.filterPatternInPaper1(ball)
v2 = self.filterPatternInPaper2(ball)

View File

@@ -1,39 +0,0 @@
"""
학습 구간(회차 1~800) 당첨번호로부터 6개 합 및 전주 합 차이 허용 집합만 계산합니다.
`final_BallFilter`의 나머지 필터는 `BallFilter_25`와 동일한 고정 임계값을 유지합니다.
"""
from __future__ import annotations
import os
from dataclasses import dataclass
import pandas as pd
@dataclass(frozen=True)
class TrainAllow:
sum6: frozenset
abs_sum_diff: frozenset
def build_train_allow(max_no: int = 800) -> TrainAllow:
base = os.path.dirname(__file__)
path = os.path.join(base, "resources", "lotto_history.txt")
df = pd.read_csv(path, header=None)
df.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"]
df = df[df["no"] <= max_no].sort_values("no")
rows = []
for _, r in df.iterrows():
b = sorted(int(r[f"b{i}"]) for i in range(1, 7))
rows.append((int(r["no"]), b))
sum6 = {sum(b) for _, b in rows}
abs_sum_diff = set()
for i in range(1, len(rows)):
s = sum(rows[i][1])
ps = sum(rows[i - 1][1])
abs_sum_diff.add(abs(s - ps))
return TrainAllow(sum6=frozenset(sum6), abs_sum_diff=frozenset(abs_sum_diff))
TRAIN_ALLOW = build_train_allow(800)

View File

@@ -12,22 +12,20 @@ from TelegramBot import TelegramBot
from final_BallFilter import BallFilter
# predict1: 기존 1개 + 과거 6개 번호 당첨 이력이 없는 조합 4개 (resources/lotto_history.json 기준으로 검증)
_PREDICT1_FIXED_NEVER_DRAWN_EXTRA = (
[2, 4, 7, 17, 18, 39],
[3, 21, 24, 40, 42, 43],
[6, 9, 16, 22, 28, 29],
[12, 17, 19, 26, 40, 42],
)
COST_PER_GAME = 1000
MAX_BUDGET_KRW = 70000
MAX_GAMES_PER_DRAW = MAX_BUDGET_KRW // COST_PER_GAME
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
def __init__(self, resources_path):
self.bot = TelegramBot()
self.preprocessor = None
self.predictor = None
self.extract_count = None
return
@@ -85,67 +83,126 @@ class Practice:
return ball
def predict1(self, result_json, ball_filter):
fixed_rows = [[6, 7, 10, 11, 20, 45]]
fixed_rows.extend([list(x) for x in _PREDICT1_FIXED_NEVER_DRAWN_EXTRA])
seen = set()
for ball in fixed_rows:
ball = sorted(ball)
key = tuple(ball)
if key in seen:
continue
if ball_filter.hasWon(ball):
continue
seen.add(key)
result_json.append(ball)
def predict1(self, result_json):
result_json.append([6, 7, 10, 11, 20, 45])
result_json.append([5, 12, 16, 27, 39, 45])
result_json.append([5, 15, 18, 29, 36, 41])
result_json.append([1, 17, 20, 25, 36, 45])
result_json.append([6, 15, 20, 23, 37, 43])
result_json.append([8, 15, 19, 23, 38, 41])
result_json.append([3, 14, 20, 27, 35, 45])
result_json.append([5, 11, 19, 24, 40, 45])
result_json.append([5, 9, 20, 25, 32, 37])
result_json.append([2, 13, 19, 27, 40, 43])
result_json.append([4, 13, 17, 28, 39, 43])
return
def predict2(self, resources_path, ymd, result_json, ball_filter=None):
def _can_add_ball(self, ball, fixed_balls, selected_balls, max_overlap):
ball_set = set(ball)
for fixed_ball in fixed_balls:
if len(ball_set & set(fixed_ball)) > max_overlap:
return False
for selected_ball in selected_balls:
if len(ball_set & set(selected_ball)) > max_overlap:
return False
return True
def select_portfolio(self, fixed_balls, candidates, target_count):
"""
2차 포트폴리오 선정:
- 중복 제거
- 고정수/선정수 간 중복도(겹치는 번호 수) 제약을 단계적으로 완화하며 선택
"""
unique_candidates = []
seen = set()
fixed_keys = {tuple(sorted(fixed_ball)) for fixed_ball in fixed_balls}
for candidate in candidates:
key = tuple(sorted(candidate))
if key in seen or key in fixed_keys:
continue
seen.add(key)
unique_candidates.append(list(key))
if target_count <= 0:
return []
if len(unique_candidates) <= target_count:
return unique_candidates
selected = []
selected_keys = set()
overlap_stages = [2, 3, 4, 5]
for max_overlap in overlap_stages:
for candidate in unique_candidates:
key = tuple(candidate)
if key in selected_keys:
continue
if self._can_add_ball(candidate, fixed_balls, selected, max_overlap):
selected.append(candidate)
selected_keys.add(key)
if len(selected) >= target_count:
return selected
# 단계 완화 후에도 부족하면 남은 조합을 순서대로 채움
for candidate in unique_candidates:
key = tuple(candidate)
if key in selected_keys:
continue
selected.append(candidate)
selected_keys.add(key)
if len(selected) >= target_count:
break
return selected
def predict2(self, resources_path, ymd, fixed_balls, max_games_per_draw=MAX_GAMES_PER_DRAW):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
if ball_filter is None:
ball_filter = BallFilter(lottoHistoryFileName)
no = ball_filter.getNextNo(ymd)
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNextNo(ymd)
print("회차: {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
seen = set()
for row in result_json:
seen.add(tuple(sorted(row)))
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
passed_candidates = []
nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
print(" - {} processed, pass: {}".format(idx, len(passed_candidates)))
ball = list(ball)
ball = sorted(list(ball))
key = tuple(ball)
if key in seen:
continue
if ball_filter.hasWon(ball):
continue
filter_type = ball_filter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
result_json.append(ball)
seen.add(key)
passed_candidates.append(ball)
variable_target_count = max(0, max_games_per_draw - len(fixed_balls))
selected_candidates = self.select_portfolio(
fixed_balls=fixed_balls,
candidates=passed_candidates,
target_count=variable_target_count
)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = p_ball[1:7]
return p_no, p_ball
return p_no, p_ball, selected_candidates, len(passed_candidates), variable_target_count
if __name__ == '__main__':
@@ -153,8 +210,8 @@ if __name__ == '__main__':
resources_path = os.path.join(PROJECT_HOME, 'resources')
# 데이터 수집
dataCrawler = DataCrawler()
dataCrawler.excute(resources_path)
#dataCrawler = DataCrawler()
#dataCrawler.excute(resources_path)
today = datetime.today()
if today.weekday() == 5:
@@ -175,36 +232,38 @@ if __name__ == '__main__':
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
recommend_result_file = os.path.join(resources_path, "recommend_ball.biz_25.json")
if os.path.isfile(recommend_result_file):
result_fp = open(recommend_result_file, "r")
with open(recommend_result_file, "r", encoding="utf-8") as result_fp:
result_json = json.load(result_fp)
result_json[ymd] = []
else:
result_json = {ymd: []}
lotto_json_for_filter = os.path.join(resources_path, 'lotto_history.json')
ball_filter = BallFilter(lotto_json_for_filter)
# 매주 고정
fixed_balls = []
practice.predict1(fixed_balls)
result_json[ymd].extend(fixed_balls)
# 매주 고정(과거 당첨 6개 조합 제외·중복 제외는 predict1 내부)
practice.predict1(result_json[ymd], ball_filter)
# 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd], ball_filter)
p_no, p_ball, selected_candidates, passed_count, variable_target_count = practice.predict2(
resources_path=resources_path,
ymd=ymd,
fixed_balls=fixed_balls,
max_games_per_draw=MAX_GAMES_PER_DRAW
)
result_json[ymd].extend(selected_candidates)
with open(recommend_result_file, 'w', encoding='utf-8') as outFp:
json.dump(result_json, outFp, ensure_ascii=False)
total_games = len(result_json[ymd])
total_cost = total_games * COST_PER_GAME
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
p_str += " - 고정수: {}\n".format(len(fixed_balls))
p_str += " - 필터 통과 후보: {}\n".format(passed_count)
p_str += " - 추가 선정: {}개 (목표 {}개)\n".format(len(selected_candidates), variable_target_count)
p_str += " - 총 추천: {}개, 총 금액: {:,}원 (한도 {:,}원)\n".format(total_games, total_cost, MAX_BUDGET_KRW)
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
@@ -214,8 +273,8 @@ if __name__ == '__main__':
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
print("size: {}".format(total_games))
print("cost: {:,} KRW / limit: {:,} KRW".format(total_cost, MAX_BUDGET_KRW))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA

View File

@@ -1217,3 +1217,6 @@
{"returnValue": "success", "drwNoDate": "2026-03-28", "drwNo": 1217, "drwtNo1": 8, "drwtNo2": 10, "drwtNo3": 15, "drwtNo4": 20, "drwtNo5": 29, "drwtNo6": 31, "bnusNo": 41}
{"returnValue": "success", "drwNoDate": "2026-04-04", "drwNo": 1218, "drwtNo1": 3, "drwtNo2": 28, "drwtNo3": 31, "drwtNo4": 32, "drwtNo5": 42, "drwtNo6": 45, "bnusNo": 25}
{"returnValue": "success", "drwNoDate": "2026-04-11", "drwNo": 1219, "drwtNo1": 1, "drwtNo2": 2, "drwtNo3": 15, "drwtNo4": 28, "drwtNo5": 39, "drwtNo6": 45, "bnusNo": 31}
{"returnValue": "success", "drwNoDate": "2026-04-18", "drwNo": 1220, "drwtNo1": 2, "drwtNo2": 22, "drwtNo3": 25, "drwtNo4": 28, "drwtNo5": 34, "drwtNo6": 43, "bnusNo": 16}
{"returnValue": "success", "drwNoDate": "2026-04-25", "drwNo": 1221, "drwtNo1": 6, "drwtNo2": 13, "drwtNo3": 18, "drwtNo4": 28, "drwtNo5": 30, "drwtNo6": 36, "bnusNo": 9}
{"returnValue": "success", "drwNoDate": "2026-05-02", "drwNo": 1222, "drwtNo1": 4, "drwtNo2": 11, "drwtNo3": 17, "drwtNo4": 22, "drwtNo5": 32, "drwtNo6": 41, "bnusNo": 34}

View File

@@ -1204,7 +1204,7 @@
1204,8,16,28,30,31,44,27
1205,1,4,16,23,31,41,2
1206,1,3,17,26,27,42,23
1207,10,22,24,27,38,45,11
1207,10,22,24,27,38,45,21
1208,6,27,30,36,38,42,25
1209,2,17,20,35,37,39,24
1210,1,7,9,17,27,38,31
@@ -1217,3 +1217,6 @@
1217,8,10,15,20,29,31,41
1218,3,28,31,32,42,45,25
1219,1,2,15,28,39,45,31
1220,2,22,25,28,34,43,16
1221,6,13,18,28,30,36,9
1222,4,11,17,22,32,41,34

View File

@@ -0,0 +1 @@
{"20260509": [[6, 7, 10, 11, 20, 45], [5, 12, 16, 27, 39, 45], [5, 15, 18, 29, 36, 41], [1, 17, 20, 25, 36, 45], [6, 15, 20, 23, 37, 43], [8, 15, 19, 23, 38, 41], [3, 14, 20, 27, 35, 45], [5, 11, 19, 24, 40, 45], [5, 9, 20, 25, 32, 37], [2, 13, 19, 27, 40, 43], [4, 13, 17, 28, 39, 43], [1, 6, 15, 22, 37, 40], [1, 6, 17, 22, 28, 40], [1, 6, 17, 22, 37, 40], [1, 11, 12, 14, 37, 39], [2, 5, 13, 22, 34, 45], [2, 5, 14, 22, 32, 39], [2, 7, 13, 15, 40, 44], [2, 7, 15, 22, 24, 44], [2, 7, 16, 19, 33, 44], [2, 8, 11, 15, 41, 44], [2, 8, 11, 28, 33, 39], [2, 8, 11, 28, 37, 45], [2, 8, 13, 14, 33, 44], [2, 8, 13, 22, 33, 36], [2, 8, 15, 22, 33, 41], [2, 9, 10, 25, 33, 44], [2, 9, 31, 33, 40, 41], [2, 10, 14, 15, 29, 44], [2, 11, 25, 26, 29, 45], [2, 13, 32, 33, 34, 42], [2, 16, 21, 26, 29, 44], [2, 18, 25, 33, 34, 44], [2, 19, 24, 33, 34, 44], [2, 19, 25, 32, 33, 45], [2, 22, 25, 33, 34, 40], [2, 22, 28, 33, 37, 45], [3, 4, 14, 22, 37, 41], [3, 5, 14, 22, 26, 44], [3, 5, 14, 26, 31, 44], [3, 5, 14, 31, 34, 44], [3, 6, 14, 22, 37, 41], [3, 7, 10, 29, 38, 44], [3, 7, 13, 20, 33, 38], [3, 7, 15, 22, 32, 44], [3, 8, 11, 17, 38, 44], [3, 8, 11, 22, 38, 39], [3, 10, 13, 15, 29, 44], [3, 13, 22, 27, 28, 38], [3, 20, 22, 26, 42, 43], [3, 20, 22, 27, 28, 38], [3, 20, 22, 28, 38, 43], [3, 22, 27, 28, 43, 44], [4, 5, 11, 22, 37, 42], [4, 5, 15, 22, 42, 43], [4, 6, 10, 19, 31, 44], [4, 7, 10, 29, 33, 38], [4, 8, 10, 31, 33, 45], [4, 8, 11, 21, 38, 39], [4, 9, 10, 31, 33, 44], [4, 13, 22, 27, 34, 38], [4, 14, 29, 31, 33, 45], [4, 16, 25, 33, 35, 43], [5, 6, 11, 16, 38, 45], [5, 6, 11, 22, 38, 41], [5, 6, 11, 26, 38, 45], [5, 6, 11, 27, 38, 44], [5, 6, 14, 22, 26, 41], [5, 8, 11, 30, 38, 39], [5, 9, 11, 20, 26, 43], [5, 13, 24, 25, 27, 44], [5, 13, 27, 31, 34, 44], [5, 15, 19, 22, 34, 36], [5, 22, 28, 31, 34, 36], [7, 8, 10, 19, 33, 44], [7, 8, 30, 33, 37, 41], [7, 18, 20, 22, 43, 44], [8, 10, 27, 33, 37, 41], [8, 13, 25, 31, 33, 44], [8, 14, 25, 31, 33, 45], [8, 14, 25, 31, 44, 45], [8, 14, 30, 31, 33, 38], [8, 15, 22, 33, 35, 41], [8, 17, 30, 33, 35, 44], [8, 19, 20, 33, 35, 41], [9, 13, 31, 32, 33, 38], [10, 11, 24, 26, 29, 38], [10, 13, 22, 31, 36, 44], [10, 14, 15, 22, 27, 43], [10, 16, 19, 33, 44, 45], [10, 17, 22, 27, 37, 43], [10, 18, 19, 22, 25, 44], [10, 18, 22, 29, 31, 44], [10, 18, 25, 26, 31, 44], [10, 19, 24, 31, 39, 44], [11, 12, 19, 32, 35, 45], [11, 12, 29, 31, 32, 39], [11, 12, 29, 31, 33, 38], [11, 13, 29, 30, 33, 38], [11, 14, 22, 30, 38, 39], [11, 14, 22, 32, 38, 39], [11, 15, 16, 26, 43, 45], [11, 15, 16, 28, 41, 43], [11, 16, 25, 27, 31, 44], [11, 16, 27, 31, 38, 44], [11, 21, 22, 24, 37, 41], [12, 13, 17, 25, 44, 45], [12, 13, 22, 31, 34, 44], [12, 13, 25, 29, 31, 44], [12, 19, 22, 25, 34, 44]]}

View File

@@ -1,50 +0,0 @@
"""
학습 구간(1~800회): 당첨번호가 필터를 모두 통과한 회차 수를 집계합니다.
최소 20회차 이후부터 통계(최근 N주 등)가 의미 있으므로 기본은 21~800회만 평가합니다.
"""
import argparse
import os
import pandas as pd
from final_BallFilter import BallFilter
def load_history(resources_path: str) -> pd.DataFrame:
path = os.path.join(resources_path, "lotto_history.txt")
df = pd.read_csv(path, header=None)
df.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"]
return df
def run_train(resources_path: str, start_no: int, end_no: int) -> tuple[int, int, list[int]]:
df = load_history(resources_path)
hist_path = os.path.join(resources_path, "lotto_history.txt")
bf = BallFilter(hist_path)
wins = 0
total = 0
win_nos: list[int] = []
for no in range(start_no, end_no + 1):
sub = df[df["no"] == no]
if sub.empty:
continue
answer = sorted(int(x) for x in sub.iloc[0][1:7].tolist())
fts = bf.extract_final_candidates(answer, no=no, until_end=True, df=df)
total += 1
if len(fts) == 0:
wins += 1
win_nos.append(no)
return wins, total, win_nos
if __name__ == "__main__":
p = argparse.ArgumentParser()
p.add_argument("--resources", default=os.path.join(os.path.dirname(__file__), "resources"))
p.add_argument("--start-no", type=int, default=21)
p.add_argument("--end-no", type=int, default=800)
args = p.parse_args()
w, t, nos = run_train(args.resources, args.start_no, args.end_no)
rate = w / t if t else 0.0
print(f"학습 구간 당첨 통과: {w} / {t} ({rate:.4f})")
print(f"통과 회차: {nos}")

View File

@@ -1,49 +0,0 @@
"""
검증 구간(801~1000회): 필터만 검사(학습으로 튜닝하지 않음).
"""
import argparse
import os
import pandas as pd
from final_BallFilter import BallFilter
def load_history(resources_path: str) -> pd.DataFrame:
path = os.path.join(resources_path, "lotto_history.txt")
df = pd.read_csv(path, header=None)
df.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"]
return df
def run_valid(resources_path: str, start_no: int, end_no: int) -> tuple[int, int, list[int]]:
df = load_history(resources_path)
hist_path = os.path.join(resources_path, "lotto_history.txt")
bf = BallFilter(hist_path)
wins = 0
total = 0
win_nos: list[int] = []
for no in range(start_no, end_no + 1):
sub = df[df["no"] == no]
if sub.empty:
continue
answer = sorted(int(x) for x in sub.iloc[0][1:7].tolist())
fts = bf.extract_final_candidates(answer, no=no, until_end=True, df=df)
total += 1
if len(fts) == 0:
wins += 1
win_nos.append(no)
return wins, total, win_nos
if __name__ == "__main__":
p = argparse.ArgumentParser()
p.add_argument("--resources", default=os.path.join(os.path.dirname(__file__), "resources"))
p.add_argument("--start-no", type=int, default=801)
p.add_argument("--end-no", type=int, default=1000)
args = p.parse_args()
w, t, nos = run_valid(args.resources, args.start_no, args.end_no)
rate = w / t if t else 0.0
print(f"검증 구간 당첨 통과: {w} / {t} ({rate:.4f})")
print(f"통과 회차: {nos}")