diff --git a/DataCrawler.py b/DataCrawler.py index f8734c3..7f96ce0 100644 --- a/DataCrawler.py +++ b/DataCrawler.py @@ -4,6 +4,9 @@ import time import requests # JSON 포맷을 다루기 위한 라이브러리를 호출합니다. import json +from datetime import datetime, timedelta +import random +import socket import urllib3 @@ -19,8 +22,15 @@ except ModuleNotFoundError: def sendMsg(self, msg): pass -_LOTTO_URL = "https://www.dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}" -_REQUEST_TIMEOUT = 15 +_LOTTO_URLS = ( + "https://www.dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}", + "https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}", +) +_REQUEST_TIMEOUT = float(os.environ.get("LOTTO_REQUEST_TIMEOUT", "12")) +_FETCH_RETRIES_PER_DRAW = int(os.environ.get("LOTTO_FETCH_RETRIES", "3")) +_BACKOFF_BASE_SECONDS = float(os.environ.get("LOTTO_BACKOFF_BASE", "0.7")) +_MAX_CONSECUTIVE_FETCH_FAILURES = int(os.environ.get("LOTTO_MAX_CONSEC_FAIL", "8")) +_CONNECTION_PROBE_TIMEOUT = float(os.environ.get("LOTTO_PROBE_TIMEOUT", "3")) _BROWSER_HEADERS = { "User-Agent": ( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 " @@ -51,33 +61,62 @@ class DataCrawler: self.bot = TelegramBot() self._session = requests.Session() self._session.headers.update(_BROWSER_HEADERS) + self._last_fetch_error = "" + + def _can_reach_lottery_host(self): + """ + API 호스트 TCP 연결 가능 여부를 빠르게 확인합니다. + 완전한 보장은 아니지만, 완전 차단 상태를 조기 감지해 불필요한 대기 시간을 줄입니다. + """ + for host in ("www.dhlottery.co.kr", "dhlottery.co.kr"): + try: + with socket.create_connection((host, 443), timeout=_CONNECTION_PROBE_TIMEOUT): + return True + except OSError: + continue + return False def _fetch_draw(self, drw_no): """동행복권 API에서 단일 회차 결과를 가져옵니다. 실패 시 None.""" - url = _LOTTO_URL.format(int(drw_no)) - for verify in (_ssl_verify_arg(), False): - for method in ("POST", "GET"): - try: - res = self._session.request( - method, - url, - timeout=_REQUEST_TIMEOUT, - verify=verify, - ) - if res.status_code != 200: - continue - text = res.text.strip() - if not text.startswith("{"): - continue - result = json.loads(text) - except ( - requests.RequestException, - ValueError, - json.JSONDecodeError, - ): - continue - if isinstance(result, dict) and result.get("returnValue") == "success": - return result + self._last_fetch_error = "" + verify_options = (_ssl_verify_arg(), False) + last_error = "unknown" + for attempt in range(1, _FETCH_RETRIES_PER_DRAW + 1): + for raw_url in _LOTTO_URLS: + url = raw_url.format(int(drw_no)) + for verify in verify_options: + for method in ("POST", "GET"): + try: + res = self._session.request( + method, + url, + timeout=_REQUEST_TIMEOUT, + verify=verify, + ) + if res.status_code != 200: + last_error = "http {}".format(res.status_code) + continue + text = res.text.strip() + if not text.startswith("{"): + last_error = "non-json response" + continue + result = json.loads(text) + except ( + requests.RequestException, + ValueError, + json.JSONDecodeError, + ) as ex: + last_error = str(ex) + continue + if isinstance(result, dict) and result.get("returnValue") == "success": + return result + rv = result.get("returnValue") if isinstance(result, dict) else "unknown" + last_error = "api returnValue={}".format(rv) + if attempt < _FETCH_RETRIES_PER_DRAW: + # 지수 백오프 + 지터로 일시적 네트워크 혼잡 완화 + delay = _BACKOFF_BASE_SECONDS * (2 ** (attempt - 1)) + random.uniform(0, 0.25) + time.sleep(delay) + self._last_fetch_error = last_error return None def _append_draw_files(self, lottoHistoryFile, result): @@ -120,6 +159,99 @@ class DataCrawler: return None return last_json.get("drwNo") + def _read_draw_map_from_json(self, json_path): + """ + JSONL 전체를 읽어 drwNo -> record 맵으로 반환합니다. + 잘못된 라인/중복 라인은 정리 대상이며, 마지막 유효값을 유지합니다. + """ + draw_map = {} + if not os.path.isfile(json_path) or os.path.getsize(json_path) == 0: + return draw_map + + with open(json_path, "r", encoding="utf-8") as fp: + for line in fp: + line = line.strip() + if not line: + continue + try: + data = json.loads(line) + except json.JSONDecodeError: + continue + if ( + isinstance(data, dict) + and data.get("returnValue") == "success" + and isinstance(data.get("drwNo"), int) + ): + draw_map[data["drwNo"]] = data + + return draw_map + + def _write_draw_map_files(self, lottoHistoryFile, draw_map): + """ + drwNo 오름차순으로 json/txt를 재생성합니다. + 누락 회차 보강/중복 제거 후 일관된 파일 상태를 보장합니다. + """ + json_path = lottoHistoryFile + ".json" + txt_path = lottoHistoryFile + ".txt" + with open(json_path, "w", encoding="utf-8") as json_fp, open( + txt_path, "w", encoding="utf-8" + ) as text_fp: + for drw_no in sorted(draw_map.keys()): + result = draw_map[drw_no] + json_fp.write(json.dumps(result, ensure_ascii=False) + "\n") + text_fp.write( + "%d,%d,%d,%d,%d,%d,%d,%d\n" + % ( + drw_no, + result["drwtNo1"], + result["drwtNo2"], + result["drwtNo3"], + result["drwtNo4"], + result["drwtNo5"], + result["drwtNo6"], + result["bnusNo"], + ) + ) + + def _get_last_week_draw_date(self): + """ + '지난 주' 기준 토요일 날짜를 반환합니다. + 예: 금요일(2026-05-08) 실행 시 직전 토요일(2026-05-02) + """ + now = datetime.now() + days_since_saturday = (now.weekday() - 5) % 7 + latest_saturday = now.date() - timedelta(days=days_since_saturday) + # 토요일이면서 추첨 전(20시 이전)이라면 지난 주 토요일을 목표로 사용 + if now.weekday() == 5 and now.hour < 20: + latest_saturday = latest_saturday - timedelta(days=7) + return latest_saturday + + def _estimate_target_draw_no(self, draw_map): + """ + 기존 데이터의 마지막 drwNoDate와 지난 주 토요일을 비교해 + 이번 실행에서 확보해야 할 목표 회차를 계산합니다. + """ + if not draw_map: + return None + + last_no = max(draw_map.keys()) + last_data = draw_map[last_no] + last_date_str = last_data.get("drwNoDate", "") + try: + last_date = datetime.strptime(last_date_str, "%Y-%m-%d").date() + except ValueError: + return last_no + + target_date = self._get_last_week_draw_date() + if target_date <= last_date: + return last_no + + week_gap = (target_date - last_date).days // 7 + if week_gap <= 0: + return last_no + + return last_no + week_gap + # 로또 당첨 데이터를 수집해서 파일로 저장합니다. # lottoHistoryFile: 로또 당첨 데이터를 저장할 파일 (확장자 제외) def craw(self, lottoHistoryFile, drwNo=None): @@ -161,44 +293,97 @@ class DataCrawler: def excute(self, resource_path): """ - resources/lotto_history.* 를 마지막 회차 이후까지 연속으로 갱신합니다. - (기존: 날짜 비교 + 1회차만 수집 → 누락·정지가 잦음) + resources/lotto_history.* 를 지난 주 기준으로 누락 없이 동기화합니다. + - 마지막 회차+1만 확인하지 않고, 1~목표회차 범위에서 누락 회차를 탐지/보강 + - 중복/깨진 라인을 정리해 json/txt를 일관 상태로 재생성 """ lottoHistoryFile = os.path.join(resource_path, "lotto_history") json_path = lottoHistoryFile + ".json" - last_no = self._read_last_draw_from_json(json_path) - if last_no is None: - self.craw(lottoHistoryFile) + draw_map = self._read_draw_map_from_json(json_path) + + # 기존 이력이 비었거나 깨졌으면 전체 재수집(기존 동작 유지) + if not draw_map: try: + self.craw(lottoHistoryFile) self.bot.sendMsg("[Lottery Crawler] full history rebuilt (no valid json).") except Exception: pass return True - added = 0 - next_no = last_no + 1 - while True: - result = self._fetch_draw(next_no) - if result is None: - break - self._append_draw_files(lottoHistoryFile, result) - added += 1 - next_no += 1 - time.sleep(0.35) + target_no = self._estimate_target_draw_no(draw_map) + if target_no is None: + target_no = max(draw_map.keys()) - if added == 0: + if not self._can_reach_lottery_host(): + msg = "[Lottery Crawler] network blocked: cannot reach dhlottery host." + print(msg) + try: + self.bot.sendMsg(msg) + except Exception: + pass + return False + + missing_nos = [no for no in range(1, target_no + 1) if no not in draw_map] + added = 0 + failed = [] + aborted_missing_nos = [] + consecutive_failure = 0 + fail_reasons = {} + for no in missing_nos: + result = self._fetch_draw(no) + if result is None: + failed.append(no) + reason = self._last_fetch_error or "unknown" + fail_reasons[reason] = fail_reasons.get(reason, 0) + 1 + consecutive_failure += 1 + if consecutive_failure >= _MAX_CONSECUTIVE_FETCH_FAILURES: + aborted_missing_nos = [x for x in missing_nos if x > no] + break + continue + draw_map[no] = result + added += 1 + consecutive_failure = 0 + time.sleep(0.2) + + # 누락 보강 또는 중복 정리 여지가 있으면 파일을 재생성 + self._write_draw_map_files(lottoHistoryFile, draw_map) + + last_no = max(draw_map.keys()) + if added == 0 and not failed: try: self.bot.sendMsg( - "[Lottery Crawler] up to date (last drwNo={}).".format(last_no) + "[Lottery Crawler] up to date (last drwNo={}, target={}).".format( + last_no, target_no + ) + ) + except Exception: + pass + elif failed: + sample = ",".join(str(x) for x in failed[:10]) + reason_items = sorted(fail_reasons.items(), key=lambda x: x[1], reverse=True) + reason_str = "; ".join("{} x{}".format(reason, count) for reason, count in reason_items[:3]) + if aborted_missing_nos: + reason_str += " | aborted {} pending draws due to consecutive failures".format( + len(aborted_missing_nos) + ) + try: + self.bot.sendMsg( + "[Lottery Crawler] appended {}, failed {} draw(s): {}{} | {}".format( + added, + len(failed), + sample, + "..." if len(failed) > 10 else "", + reason_str or "no reason", + ) ) except Exception: pass else: try: self.bot.sendMsg( - "[Lottery Crawler] appended {} draw(s), last drwNo={}.".format( - added, last_no + added + "[Lottery Crawler] appended {} draw(s), last drwNo={}, target={}.".format( + added, last_no, target_no ) ) except Exception: diff --git a/README.md b/README.md index 53fc09e..342d131 100644 --- a/README.md +++ b/README.md @@ -1,47 +1,112 @@ # deeplottery -## 데이터 구간 +`deeplottery`는 로또 번호를 예측하는 프로젝트가 아니라, **전체 조합(45C6)을 규칙 기반으로 필터링해 후보를 줄이는 시스템**입니다. +핵심은 `BallFilter` 엔진이며, 운영 실행(`final_practice.py`)과 검증(`final_FilterTest.py`)이 분리되어 있습니다. -| 구간 | 회차 | -|------|------| -| 학습 | `lotto_history.txt` 1~800 | -| 검증 | 801~1000 | -| 테스트 | 1001~이후 | +## 목표와 설계 의도 -## 핵심 파일 +- 목표: 통계/패턴 기반 규칙으로 비효율 조합을 제거하고 후보군을 관리 가능한 크기로 축소 +- 설계 의도: + - 필터 규칙은 `final_BallFilter.py` 한 곳에서 관리 + - 운영 추천 생성과 과거 회차 검증을 분리하여 반복 개선 + - 같은 엔진을 운영/검증에서 공통 사용해 일관성 유지 -- **`final_BallFilter.py`** — 필터 로직 (`BallFilter_25` 기반, `lotto_history.txt` CSV 로드, `socket` 제거). -- **`final_filter_params.py`** — 학습 구간(1~800회)에서만 집계한 **6개 합**·**전주 합 차이** 허용 집합. -- **`filter_model.py`** — `from final_BallFilter import BallFilter` 재노출. -- **`train.py` / `valid.py`** — 구간별로 당첨 6개가 모든 필터를 통과한 회차 수 집계. -- **`final_filterTest.py`** — `1_FilterTest_25.py`와 동일한 분석·(선택) MC 생존 추정. -- **`final_Practice.py`** — DataCrawler → 마지막 JSON 회차+1 크롤 → `predict1`+`predict2`. `lotto_history.json`으로 `BallFilter` 한 번 생성 후 공유. `predict1`은 고정 5조합(기존 1 + 미당첨 4, `hasWon`으로 제외) 후 `predict2`는 정렬된 6개·`seen`·과거 당첨 조합 제외 후 필터. +## 전체 아키텍처 -## 실행 (miniconda **ncue**) +1. 데이터 수집/갱신 + - `DataCrawler.py`가 로또 API를 호출해 `resources/lotto_history.json`, `resources/lotto_history.txt` 갱신 +2. 필터 엔진 로딩 + - `final_BallFilter.py`의 `BallFilter`가 과거 당첨 이력을 메모리로 적재 +3. 운영 후보 생성 + - `final_practice.py`가 다음 회차 기준 전체 조합을 순회하며 `BallFilter.filter()`로 통과 조합만 저장 +4. 필터 성능 검증 + - `final_FilterTest.py`가 과거 당첨번호를 기준으로 어떤 필터가 당첨을 걸렀는지/통과시켰는지 분석 + +## 핵심 파일 설명 + +- `final_BallFilter.py` + - 프로젝트 핵심 엔진 + - `extract_final_candidates()`에서 규칙 기반 탈락 사유(`set`)를 생성 + - `filter()`는 실사용 진입점이며, 반환 `set`이 비어 있으면 통과 +- `final_practice.py` + - 운영 실행 스크립트 + - `predict1()` 고정 11조합을 유지 + - `predict2()`는 1차 필터 통과 조합을 만든 뒤, 2차 포트폴리오 선별로 최종 추천 수를 제한 + - 총 추천 개수는 고정수 포함 최대 70게임(70,000원) 상한을 적용 + - 결과를 `resources/recommend_ball.biz_25.json`에 저장하고 Telegram 전송 +- `final_FilterTest.py` + - 검증/분석 스크립트 + - `find_filter_method()`로 회차별 필터 적중 통계 확인 + - `find_final_candidates()`로 특정 회차 후보군 재생성 +- `DataCrawler.py` + - 과거 이력 파일 수집/보강 + - 네트워크 실패 재시도/백오프 및 누락 회차 보완 처리 +- `TelegramBot.py` + - 추천 결과 메시지 전송 +- `resources/` + - `lotto_history.json`: 회차별 원본 JSON 라인 데이터 + - `lotto_history.txt`: 회차별 CSV 형태 요약 데이터 + - `recommend_ball.biz_25.json`: 회차별 추천 결과 저장 파일 + +## 실행 방법 + +Python 실행은 Miniconda `ncue` 환경을 사용합니다. ```bash conda activate ncue -python train.py -python valid.py -python final_filterTest.py -# 특정 회차 생존 조합 수 Monte Carlo 근사 -python final_filterTest.py --mc-no 900 --mc-samples 12000 +python DataCrawler.py +python final_FilterTest.py python final_practice.py ``` -동일 환경을 셸 스크립트로: +## 동작 방식 상세 -```bash -./scripts/run_with_ncue.sh train.py -./scripts/run_with_ncue.sh valid.py -``` +- 입력: 1~45에서 6개 조합 전체 +- 처리: + - 1차: `BallFilter` 규칙 필터 적용 + - 2차: 고정 11조합을 유지한 채, 겹침도 기반 포트폴리오 선별로 후보 축소 +- 출력: + - 탈락 조합: 탈락 사유 집합 반환 + - 최종 추천 조합: 예산 상한(최대 70,000원) 내에서 저장/전송 -## 설계 요약 +주요 규칙 범주 예시: -- **6개 합 / 전주 합 차이**는 `final_filter_params.TRAIN_ALLOW`로 학습 구간 분포에 맞춤. -- 그 외 통계·배수·용지 패턴·쌍/3조합 등은 `BallFilter_25`와 동일한 고정 규칙을 유지해 과도하게 느슨해지지 않도록 함. -- `filterOneDigitPattern`에서 인자 `ball`이 예시 배열로 덮어쓰이던 버그를 수정함. +- 합/평균 및 전주 대비 차이 +- 앞 3개/뒤 3개 합 패턴 +- 고저 비율, 끝자리 패턴, AC 값 +- 배수 개수(3/4/5/6 등) +- 최근 N주 출현 빈도/중복 관련 규칙 +- 비선호 2개/3개 조합 제거 규칙 -## 참고 +## 디렉터리 현재 상태 (2026-05-08) -로또는 무작위에 가깝고, 본 저장소의 필터는 **구매 조합 수를 줄이기 위한 휴리스틱**이며 당첨을 보장하지 않습니다. +### 최상위 파일/디렉터리 현황 + +- 실행/핵심 + - `DataCrawler.py` + - `final_BallFilter.py` + - `final_FilterTest.py` + - `final_practice.py` + - `final_Practice.py` (구버전 스크립트) + - `TelegramBot.py` +- 레거시 참조 파일 + - `BallFilter_22.py`, `BallFilter_25.py` + - `1_FilterTest_22.py`, `1_FilterTest_25.py` + - `2_FilterTestReview_22.py`, `2_FilterTestReview_25.py` + - `3_Practice_22.py`, `3_Practice_25.py` + - `fixed10.py` +- 데이터/설정 + - `resources/` + - `requirements.txt` + - `scripts/` + +### Git 작업 트리 상태(요약) + +- 수정됨: `DataCrawler.py`, `README.md`, `final_BallFilter.py`, `final_practice.py`, `resources/lotto_history.json`, `resources/lotto_history.txt` +- 삭제됨: `filter_model.py`, `final_filter_params.py`, `train.py`, `valid.py` +- 신규(미추적): `resources/recommend_ball.biz_25.json` + +## 주의 사항 + +- 이 프로젝트는 통계적 휴리스틱 기반의 후보 축소 도구이며 당첨을 보장하지 않습니다. +- 필터가 강해질수록 후보 수는 줄지만, 실제 당첨 조합을 배제할 위험도 함께 증가합니다. diff --git a/filter_model.py b/filter_model.py deleted file mode 100644 index 283de23..0000000 --- a/filter_model.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -로또 필터 로직은 `final_BallFilter.BallFilter`에 구현되어 있습니다. -학습·검증 스크립트와 동일한 클래스를 쓰도록 이 모듈에서 재노출합니다. -""" - -from final_BallFilter import BallFilter - -__all__ = ["BallFilter"] diff --git a/final_BallFilter.py b/final_BallFilter.py index dae08c2..869bcce 100644 --- a/final_BallFilter.py +++ b/final_BallFilter.py @@ -1,10 +1,10 @@ import json from collections import Counter - +import socket import numpy as np import pandas as pd -from final_filter_params import TRAIN_ALLOW +socket.getaddrinfo(socket.gethostname(), None) class BallFilter: history_ball_dict = None @@ -16,39 +16,15 @@ class BallFilter: compositeNumber = None def __init__(self, lottoHistoryFileName=None): - if lottoHistoryFileName is None: - return - self.history_ball_list = [] - self.history_ball_no_ymd = {} - self.history_ball_no_dict = {} - self.history_ball_date_dict = {} - self.history_ball_dict = {} - path = str(lottoHistoryFileName) - - if path.endswith('.txt'): - rows = [] - with open(path, 'r', encoding='utf-8') as in_fp: - for line in in_fp: - line = line.strip() - if not line: - continue - parts = line.split(',') - drw_no = int(parts[0]) - balls = [int(parts[i]) for i in range(1, 7)] - rows.append((drw_no, balls)) - rows.sort(key=lambda x: x[0]) - for drw_no, balls in rows: - sb = sorted(balls) - self.history_ball_list.append(sb) - self.history_ball_no_dict[str(sb)] = drw_no - ymd_key = f'{drw_no:08d}' - self.history_ball_date_dict[ymd_key] = drw_no - self.history_ball_dict[drw_no] = {'date': '', 'ball': list(sb)} - self.history_ball_no_ymd[drw_no] = ymd_key - else: - in_fp = open(path, 'r', encoding='utf-8') + if lottoHistoryFileName is not None: + inFp = open(lottoHistoryFileName, 'r', encoding='utf-8') + self.history_ball_list = [] + self.history_ball_no_ymd = {} + self.history_ball_no_dict = {} + self.history_ball_date_dict = {} + self.history_ball_dict = {} while True: - line = in_fp.readline() + line = inFp.readline() if not line or line == '\n': break data = json.loads(line) @@ -56,27 +32,28 @@ class BallFilter: self.history_ball_no_dict[str(self.history_ball_list[len(self.history_ball_list) - 1])] = data['drwNo'] self.history_ball_date_dict[data['drwNoDate'].replace('-', '')] = data['drwNo'] self.history_ball_dict[data['drwNo']] = {'date': data['drwNoDate'], 'ball': [data['drwtNo1'], data['drwtNo2'], data['drwtNo3'], data['drwtNo4'], data['drwtNo5'], data['drwtNo6']]} - self.history_ball_no_ymd[data['drwNo']] = data['drwNoDate'].replace('-', '') - in_fp.close() + self.history_ball_no_ymd[data['drwNo']] = data['drwNoDate'].replace('-','') + inFp.close() - ball_avg = {} - ball_sum = {} - for i in range(len(self.history_ball_list)): - win_ball = list(self.history_ball_list[-i]) - avg = sum(win_ball) / 6 - if avg not in ball_avg: - ball_avg[avg] = 1 - else: - ball_avg[avg] += 1 + # ball 평균과 합 구하기 + ball_avg = {} + ball_sum = {} + for i in range(len(self.history_ball_list)): + WIN_BALL = list(self.history_ball_list[-i]) + avg = sum(WIN_BALL) / 6 + if avg not in ball_avg: + ball_avg[avg] = 1 + else: + ball_avg[avg] += 1 - s = sum(self.history_ball_list[-i]) - if s in ball_sum: - ball_sum[s] += 1 - else: - ball_sum[s] = 1 + if sum(self.history_ball_list[-i]) in ball_sum: + ball_sum[sum(self.history_ball_list[-i])] += 1 + else: + ball_sum[sum(self.history_ball_list[-i])] = 1 + + self.primeNumber = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43] + self.compositeNumber = [4, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45] - self.primeNumber = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43] - self.compositeNumber = [4, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45] return def getBall(self, no): @@ -808,6 +785,7 @@ class BallFilter: return len(low), len(high) def filterOneDigitPattern(self, ball): + ball = [8, 18, 22, 31, 40, 44] digit = set() for b in ball: if b % 10 not in digit: @@ -3839,7 +3817,6 @@ class BallFilter: p_ball = p_ball[1:7] filter_set = set() - A = TRAIN_ALLOW ### S: 이전 당첨 번호 if no is not None: @@ -3852,7 +3829,7 @@ class BallFilter: ### S: 당첨번호 6개 합 acc = sum(ball) - if acc not in A.sum6: + if acc not in {112,114,121,123,126,127,131,132,138,146,148,156,154,163,165,167,172,174,183}: filter_set.add('6개 합: {}'.format(acc)) if not until_end: return filter_set @@ -3860,7 +3837,8 @@ class BallFilter: ### E: 당첨번호 6개 합 ### S: 당첨번호 6개 합에 대한 전주와 차이 - if abs(acc - p_acc) not in A.abs_sum_diff: + if abs(acc - p_acc) not in {2,3,4,6,7,8,9,10,11,12,13,14,15,17,18,24,25,26,27,28,29,30,31,32,33,34,39,40,51}: + # 첫수와 끝수의 합에 대해서 전주 금주의 차이 filter_set.add('6개 합 전주차: {}'.format(abs(acc - p_acc))) if not until_end: return filter_set @@ -4331,6 +4309,7 @@ class BallFilter: if not until_end: return filter_set ### E: 홀짝 개수에 대한 전주와 차이 + ### S: 용지에 안나올 것 같은 마킹 위치 (filterPatternInPaper1~filterPatternInPaper6) v1 = self.filterPatternInPaper1(ball) v2 = self.filterPatternInPaper2(ball) diff --git a/final_filter_params.py b/final_filter_params.py deleted file mode 100644 index e2579f6..0000000 --- a/final_filter_params.py +++ /dev/null @@ -1,39 +0,0 @@ -""" -학습 구간(회차 1~800) 당첨번호로부터 6개 합 및 전주 합 차이 허용 집합만 계산합니다. -`final_BallFilter`의 나머지 필터는 `BallFilter_25`와 동일한 고정 임계값을 유지합니다. -""" - -from __future__ import annotations - -import os -from dataclasses import dataclass - -import pandas as pd - - -@dataclass(frozen=True) -class TrainAllow: - sum6: frozenset - abs_sum_diff: frozenset - - -def build_train_allow(max_no: int = 800) -> TrainAllow: - base = os.path.dirname(__file__) - path = os.path.join(base, "resources", "lotto_history.txt") - df = pd.read_csv(path, header=None) - df.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"] - df = df[df["no"] <= max_no].sort_values("no") - rows = [] - for _, r in df.iterrows(): - b = sorted(int(r[f"b{i}"]) for i in range(1, 7)) - rows.append((int(r["no"]), b)) - sum6 = {sum(b) for _, b in rows} - abs_sum_diff = set() - for i in range(1, len(rows)): - s = sum(rows[i][1]) - ps = sum(rows[i - 1][1]) - abs_sum_diff.add(abs(s - ps)) - return TrainAllow(sum6=frozenset(sum6), abs_sum_diff=frozenset(abs_sum_diff)) - - -TRAIN_ALLOW = build_train_allow(800) diff --git a/final_practice.py b/final_practice.py index 6cb4f45..8b0de85 100644 --- a/final_practice.py +++ b/final_practice.py @@ -12,22 +12,20 @@ from TelegramBot import TelegramBot from final_BallFilter import BallFilter -# predict1: 기존 1개 + 과거 6개 번호 당첨 이력이 없는 조합 4개 (resources/lotto_history.json 기준으로 검증) -_PREDICT1_FIXED_NEVER_DRAWN_EXTRA = ( - [2, 4, 7, 17, 18, 39], - [3, 21, 24, 40, 42, 43], - [6, 9, 16, 22, 28, 29], - [12, 17, 19, 26, 40, 42], -) - +COST_PER_GAME = 1000 +MAX_BUDGET_KRW = 70000 +MAX_GAMES_PER_DRAW = MAX_BUDGET_KRW // COST_PER_GAME class Practice: + bot = None + preprocessor = None + predictor = None + + extract_count = None + def __init__(self, resources_path): self.bot = TelegramBot() - self.preprocessor = None - self.predictor = None - self.extract_count = None return @@ -85,67 +83,126 @@ class Practice: return ball - def predict1(self, result_json, ball_filter): - fixed_rows = [[6, 7, 10, 11, 20, 45]] - fixed_rows.extend([list(x) for x in _PREDICT1_FIXED_NEVER_DRAWN_EXTRA]) - seen = set() - for ball in fixed_rows: - ball = sorted(ball) - key = tuple(ball) - if key in seen: - continue - if ball_filter.hasWon(ball): - continue - seen.add(key) - result_json.append(ball) + def predict1(self, result_json): + result_json.append([6, 7, 10, 11, 20, 45]) + result_json.append([5, 12, 16, 27, 39, 45]) + result_json.append([5, 15, 18, 29, 36, 41]) + result_json.append([1, 17, 20, 25, 36, 45]) + result_json.append([6, 15, 20, 23, 37, 43]) + result_json.append([8, 15, 19, 23, 38, 41]) + result_json.append([3, 14, 20, 27, 35, 45]) + result_json.append([5, 11, 19, 24, 40, 45]) + result_json.append([5, 9, 20, 25, 32, 37]) + result_json.append([2, 13, 19, 27, 40, 43]) + result_json.append([4, 13, 17, 28, 39, 43]) + return - def predict2(self, resources_path, ymd, result_json, ball_filter=None): + def _can_add_ball(self, ball, fixed_balls, selected_balls, max_overlap): + ball_set = set(ball) + + for fixed_ball in fixed_balls: + if len(ball_set & set(fixed_ball)) > max_overlap: + return False + + for selected_ball in selected_balls: + if len(ball_set & set(selected_ball)) > max_overlap: + return False + + return True + + def select_portfolio(self, fixed_balls, candidates, target_count): + """ + 2차 포트폴리오 선정: + - 중복 제거 + - 고정수/선정수 간 중복도(겹치는 번호 수) 제약을 단계적으로 완화하며 선택 + """ + unique_candidates = [] + seen = set() + fixed_keys = {tuple(sorted(fixed_ball)) for fixed_ball in fixed_balls} + + for candidate in candidates: + key = tuple(sorted(candidate)) + if key in seen or key in fixed_keys: + continue + seen.add(key) + unique_candidates.append(list(key)) + + if target_count <= 0: + return [] + + if len(unique_candidates) <= target_count: + return unique_candidates + + selected = [] + selected_keys = set() + overlap_stages = [2, 3, 4, 5] + + for max_overlap in overlap_stages: + for candidate in unique_candidates: + key = tuple(candidate) + if key in selected_keys: + continue + + if self._can_add_ball(candidate, fixed_balls, selected, max_overlap): + selected.append(candidate) + selected_keys.add(key) + if len(selected) >= target_count: + return selected + + # 단계 완화 후에도 부족하면 남은 조합을 순서대로 채움 + for candidate in unique_candidates: + key = tuple(candidate) + if key in selected_keys: + continue + selected.append(candidate) + selected_keys.add(key) + if len(selected) >= target_count: + break + + return selected + + def predict2(self, resources_path, ymd, fixed_balls, max_games_per_draw=MAX_GAMES_PER_DRAW): candidates = [i for i in range(1, 46)] lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json') - if ball_filter is None: - ball_filter = BallFilter(lottoHistoryFileName) - no = ball_filter.getNextNo(ymd) + ballFilter = BallFilter(lottoHistoryFileName) + no = ballFilter.getNextNo(ymd) print("회차: {}".format(no)) lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt') df_ball = pd.read_csv(lottoHistoryFileName, header=None) df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn'] - seen = set() - for row in result_json: - seen.add(tuple(sorted(row))) - - #filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44] + passed_candidates = [] nCr = list(itertools.combinations(candidates, 6)) for idx, ball in enumerate(nCr): if idx % 1000000 == 0: - print(" - {} processed...".format(idx)) + print(" - {} processed, pass: {}".format(idx, len(passed_candidates))) + ball = list(ball) - ball = sorted(list(ball)) - key = tuple(ball) - if key in seen: - continue - if ball_filter.hasWon(ball): - continue - - filter_type = ball_filter.filter(ball=ball, no=no, until_end=False, df=df_ball) + filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball) filter_size = len(filter_type) if 0 < filter_size: continue - result_json.append(ball) - seen.add(key) + passed_candidates.append(ball) + + variable_target_count = max(0, max_games_per_draw - len(fixed_balls)) + selected_candidates = self.select_portfolio( + fixed_balls=fixed_balls, + candidates=passed_candidates, + target_count=variable_target_count + ) p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0] p_no = p_ball[0] p_ball = p_ball[1:7] - return p_no, p_ball + return p_no, p_ball, selected_candidates, len(passed_candidates), variable_target_count if __name__ == '__main__': @@ -153,8 +210,8 @@ if __name__ == '__main__': resources_path = os.path.join(PROJECT_HOME, 'resources') # 데이터 수집 - dataCrawler = DataCrawler() - dataCrawler.excute(resources_path) + #dataCrawler = DataCrawler() + #dataCrawler.excute(resources_path) today = datetime.today() if today.weekday() == 5: @@ -175,36 +232,38 @@ if __name__ == '__main__': # 로또 예측 practice = Practice(resources_path) - # 데이터 수집 - lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history' - lottoHistoryFileName = lottoHistoryFile + '.json' - with open(lottoHistoryFileName, "r", encoding='utf-8') as f: - for line in f: - if line != '\n': - last_json = json.loads(line) - - ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1) - recommend_result_file = os.path.join(resources_path, "recommend_ball.biz_25.json") if os.path.isfile(recommend_result_file): - result_fp = open(recommend_result_file, "r") - result_json = json.load(result_fp) + with open(recommend_result_file, "r", encoding="utf-8") as result_fp: + result_json = json.load(result_fp) result_json[ymd] = [] else: result_json = {ymd: []} - lotto_json_for_filter = os.path.join(resources_path, 'lotto_history.json') - ball_filter = BallFilter(lotto_json_for_filter) + # 매주 고정 + fixed_balls = [] + practice.predict1(fixed_balls) + result_json[ymd].extend(fixed_balls) - # 매주 고정(과거 당첨 6개 조합 제외·중복 제외는 predict1 내부) - practice.predict1(result_json[ymd], ball_filter) # 필터 기반 예측 - p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd], ball_filter) + p_no, p_ball, selected_candidates, passed_count, variable_target_count = practice.predict2( + resources_path=resources_path, + ymd=ymd, + fixed_balls=fixed_balls, + max_games_per_draw=MAX_GAMES_PER_DRAW + ) + result_json[ymd].extend(selected_candidates) with open(recommend_result_file, 'w', encoding='utf-8') as outFp: json.dump(result_json, outFp, ensure_ascii=False) + total_games = len(result_json[ymd]) + total_cost = total_games * COST_PER_GAME p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1)) + p_str += " - 고정수: {}개\n".format(len(fixed_balls)) + p_str += " - 필터 통과 후보: {}개\n".format(passed_count) + p_str += " - 추가 선정: {}개 (목표 {}개)\n".format(len(selected_candidates), variable_target_count) + p_str += " - 총 추천: {}개, 총 금액: {:,}원 (한도 {:,}원)\n".format(total_games, total_cost, MAX_BUDGET_KRW) for i, ball in enumerate(result_json[ymd]): p_str += " {}. {}\n".format((i+1), str(ball)) if (i+1) % 100 == 0: @@ -214,8 +273,8 @@ if __name__ == '__main__': if len(result_json[ymd]) % 100 != 0: practice.bot.sendMsg("{}".format(p_str)) - size = len(result_json[ymd]) - print("size: {}".format(size)) + print("size: {}".format(total_games)) + print("cost: {:,} KRW / limit: {:,} KRW".format(total_cost, MAX_BUDGET_KRW)) # https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K # https://www.youtube.com/watch?v=YwiHaa1KNwA diff --git a/resources/lotto_history.json b/resources/lotto_history.json index 931614f..83319c7 100644 --- a/resources/lotto_history.json +++ b/resources/lotto_history.json @@ -1217,3 +1217,6 @@ {"returnValue": "success", "drwNoDate": "2026-03-28", "drwNo": 1217, "drwtNo1": 8, "drwtNo2": 10, "drwtNo3": 15, "drwtNo4": 20, "drwtNo5": 29, "drwtNo6": 31, "bnusNo": 41} {"returnValue": "success", "drwNoDate": "2026-04-04", "drwNo": 1218, "drwtNo1": 3, "drwtNo2": 28, "drwtNo3": 31, "drwtNo4": 32, "drwtNo5": 42, "drwtNo6": 45, "bnusNo": 25} {"returnValue": "success", "drwNoDate": "2026-04-11", "drwNo": 1219, "drwtNo1": 1, "drwtNo2": 2, "drwtNo3": 15, "drwtNo4": 28, "drwtNo5": 39, "drwtNo6": 45, "bnusNo": 31} +{"returnValue": "success", "drwNoDate": "2026-04-18", "drwNo": 1220, "drwtNo1": 2, "drwtNo2": 22, "drwtNo3": 25, "drwtNo4": 28, "drwtNo5": 34, "drwtNo6": 43, "bnusNo": 16} +{"returnValue": "success", "drwNoDate": "2026-04-25", "drwNo": 1221, "drwtNo1": 6, "drwtNo2": 13, "drwtNo3": 18, "drwtNo4": 28, "drwtNo5": 30, "drwtNo6": 36, "bnusNo": 9} +{"returnValue": "success", "drwNoDate": "2026-05-02", "drwNo": 1222, "drwtNo1": 4, "drwtNo2": 11, "drwtNo3": 17, "drwtNo4": 22, "drwtNo5": 32, "drwtNo6": 41, "bnusNo": 34} diff --git a/resources/lotto_history.txt b/resources/lotto_history.txt index a214d15..1df0784 100644 --- a/resources/lotto_history.txt +++ b/resources/lotto_history.txt @@ -1204,7 +1204,7 @@ 1204,8,16,28,30,31,44,27 1205,1,4,16,23,31,41,2 1206,1,3,17,26,27,42,23 -1207,10,22,24,27,38,45,11 +1207,10,22,24,27,38,45,21 1208,6,27,30,36,38,42,25 1209,2,17,20,35,37,39,24 1210,1,7,9,17,27,38,31 @@ -1217,3 +1217,6 @@ 1217,8,10,15,20,29,31,41 1218,3,28,31,32,42,45,25 1219,1,2,15,28,39,45,31 +1220,2,22,25,28,34,43,16 +1221,6,13,18,28,30,36,9 +1222,4,11,17,22,32,41,34 diff --git a/resources/recommend_ball.biz_25.json b/resources/recommend_ball.biz_25.json new file mode 100644 index 0000000..084377b --- /dev/null +++ b/resources/recommend_ball.biz_25.json @@ -0,0 +1 @@ +{"20260509": [[6, 7, 10, 11, 20, 45], [5, 12, 16, 27, 39, 45], [5, 15, 18, 29, 36, 41], [1, 17, 20, 25, 36, 45], [6, 15, 20, 23, 37, 43], [8, 15, 19, 23, 38, 41], [3, 14, 20, 27, 35, 45], [5, 11, 19, 24, 40, 45], [5, 9, 20, 25, 32, 37], [2, 13, 19, 27, 40, 43], [4, 13, 17, 28, 39, 43], [1, 6, 15, 22, 37, 40], [1, 6, 17, 22, 28, 40], [1, 6, 17, 22, 37, 40], [1, 11, 12, 14, 37, 39], [2, 5, 13, 22, 34, 45], [2, 5, 14, 22, 32, 39], [2, 7, 13, 15, 40, 44], [2, 7, 15, 22, 24, 44], [2, 7, 16, 19, 33, 44], [2, 8, 11, 15, 41, 44], [2, 8, 11, 28, 33, 39], [2, 8, 11, 28, 37, 45], [2, 8, 13, 14, 33, 44], [2, 8, 13, 22, 33, 36], [2, 8, 15, 22, 33, 41], [2, 9, 10, 25, 33, 44], [2, 9, 31, 33, 40, 41], [2, 10, 14, 15, 29, 44], [2, 11, 25, 26, 29, 45], [2, 13, 32, 33, 34, 42], [2, 16, 21, 26, 29, 44], [2, 18, 25, 33, 34, 44], [2, 19, 24, 33, 34, 44], [2, 19, 25, 32, 33, 45], [2, 22, 25, 33, 34, 40], [2, 22, 28, 33, 37, 45], [3, 4, 14, 22, 37, 41], [3, 5, 14, 22, 26, 44], [3, 5, 14, 26, 31, 44], [3, 5, 14, 31, 34, 44], [3, 6, 14, 22, 37, 41], [3, 7, 10, 29, 38, 44], [3, 7, 13, 20, 33, 38], [3, 7, 15, 22, 32, 44], [3, 8, 11, 17, 38, 44], [3, 8, 11, 22, 38, 39], [3, 10, 13, 15, 29, 44], [3, 13, 22, 27, 28, 38], [3, 20, 22, 26, 42, 43], [3, 20, 22, 27, 28, 38], [3, 20, 22, 28, 38, 43], [3, 22, 27, 28, 43, 44], [4, 5, 11, 22, 37, 42], [4, 5, 15, 22, 42, 43], [4, 6, 10, 19, 31, 44], [4, 7, 10, 29, 33, 38], [4, 8, 10, 31, 33, 45], [4, 8, 11, 21, 38, 39], [4, 9, 10, 31, 33, 44], [4, 13, 22, 27, 34, 38], [4, 14, 29, 31, 33, 45], [4, 16, 25, 33, 35, 43], [5, 6, 11, 16, 38, 45], [5, 6, 11, 22, 38, 41], [5, 6, 11, 26, 38, 45], [5, 6, 11, 27, 38, 44], [5, 6, 14, 22, 26, 41], [5, 8, 11, 30, 38, 39], [5, 9, 11, 20, 26, 43], [5, 13, 24, 25, 27, 44], [5, 13, 27, 31, 34, 44], [5, 15, 19, 22, 34, 36], [5, 22, 28, 31, 34, 36], [7, 8, 10, 19, 33, 44], [7, 8, 30, 33, 37, 41], [7, 18, 20, 22, 43, 44], [8, 10, 27, 33, 37, 41], [8, 13, 25, 31, 33, 44], [8, 14, 25, 31, 33, 45], [8, 14, 25, 31, 44, 45], [8, 14, 30, 31, 33, 38], [8, 15, 22, 33, 35, 41], [8, 17, 30, 33, 35, 44], [8, 19, 20, 33, 35, 41], [9, 13, 31, 32, 33, 38], [10, 11, 24, 26, 29, 38], [10, 13, 22, 31, 36, 44], [10, 14, 15, 22, 27, 43], [10, 16, 19, 33, 44, 45], [10, 17, 22, 27, 37, 43], [10, 18, 19, 22, 25, 44], [10, 18, 22, 29, 31, 44], [10, 18, 25, 26, 31, 44], [10, 19, 24, 31, 39, 44], [11, 12, 19, 32, 35, 45], [11, 12, 29, 31, 32, 39], [11, 12, 29, 31, 33, 38], [11, 13, 29, 30, 33, 38], [11, 14, 22, 30, 38, 39], [11, 14, 22, 32, 38, 39], [11, 15, 16, 26, 43, 45], [11, 15, 16, 28, 41, 43], [11, 16, 25, 27, 31, 44], [11, 16, 27, 31, 38, 44], [11, 21, 22, 24, 37, 41], [12, 13, 17, 25, 44, 45], [12, 13, 22, 31, 34, 44], [12, 13, 25, 29, 31, 44], [12, 19, 22, 25, 34, 44]]} \ No newline at end of file diff --git a/train.py b/train.py deleted file mode 100644 index 248c0cc..0000000 --- a/train.py +++ /dev/null @@ -1,50 +0,0 @@ -""" -학습 구간(1~800회): 당첨번호가 필터를 모두 통과한 회차 수를 집계합니다. -최소 20회차 이후부터 통계(최근 N주 등)가 의미 있으므로 기본은 21~800회만 평가합니다. -""" - -import argparse -import os - -import pandas as pd - -from final_BallFilter import BallFilter - - -def load_history(resources_path: str) -> pd.DataFrame: - path = os.path.join(resources_path, "lotto_history.txt") - df = pd.read_csv(path, header=None) - df.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"] - return df - - -def run_train(resources_path: str, start_no: int, end_no: int) -> tuple[int, int, list[int]]: - df = load_history(resources_path) - hist_path = os.path.join(resources_path, "lotto_history.txt") - bf = BallFilter(hist_path) - wins = 0 - total = 0 - win_nos: list[int] = [] - for no in range(start_no, end_no + 1): - sub = df[df["no"] == no] - if sub.empty: - continue - answer = sorted(int(x) for x in sub.iloc[0][1:7].tolist()) - fts = bf.extract_final_candidates(answer, no=no, until_end=True, df=df) - total += 1 - if len(fts) == 0: - wins += 1 - win_nos.append(no) - return wins, total, win_nos - - -if __name__ == "__main__": - p = argparse.ArgumentParser() - p.add_argument("--resources", default=os.path.join(os.path.dirname(__file__), "resources")) - p.add_argument("--start-no", type=int, default=21) - p.add_argument("--end-no", type=int, default=800) - args = p.parse_args() - w, t, nos = run_train(args.resources, args.start_no, args.end_no) - rate = w / t if t else 0.0 - print(f"학습 구간 당첨 통과: {w} / {t} ({rate:.4f})") - print(f"통과 회차: {nos}") diff --git a/valid.py b/valid.py deleted file mode 100644 index 18fac87..0000000 --- a/valid.py +++ /dev/null @@ -1,49 +0,0 @@ -""" -검증 구간(801~1000회): 필터만 검사(학습으로 튜닝하지 않음). -""" - -import argparse -import os - -import pandas as pd - -from final_BallFilter import BallFilter - - -def load_history(resources_path: str) -> pd.DataFrame: - path = os.path.join(resources_path, "lotto_history.txt") - df = pd.read_csv(path, header=None) - df.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"] - return df - - -def run_valid(resources_path: str, start_no: int, end_no: int) -> tuple[int, int, list[int]]: - df = load_history(resources_path) - hist_path = os.path.join(resources_path, "lotto_history.txt") - bf = BallFilter(hist_path) - wins = 0 - total = 0 - win_nos: list[int] = [] - for no in range(start_no, end_no + 1): - sub = df[df["no"] == no] - if sub.empty: - continue - answer = sorted(int(x) for x in sub.iloc[0][1:7].tolist()) - fts = bf.extract_final_candidates(answer, no=no, until_end=True, df=df) - total += 1 - if len(fts) == 0: - wins += 1 - win_nos.append(no) - return wins, total, win_nos - - -if __name__ == "__main__": - p = argparse.ArgumentParser() - p.add_argument("--resources", default=os.path.join(os.path.dirname(__file__), "resources")) - p.add_argument("--start-no", type=int, default=801) - p.add_argument("--end-no", type=int, default=1000) - args = p.parse_args() - w, t, nos = run_valid(args.resources, args.start_no, args.end_no) - rate = w / t if t else 0.0 - print(f"검증 구간 당첨 통과: {w} / {t} ({rate:.4f})") - print(f"통과 회차: {nos}")