Compare commits

..

15 Commits

Author SHA1 Message Date
dsyoon
79121dafdb init 2026-06-23 01:30:35 +09:00
af6b96fc12 update: add lotto draw results for rounds 1226 and 1227
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-07 15:36:03 +09:00
dsyoon
b82b5a58ee perf: filter scan optimization and portfolio selection improvements
Precompute p_ball to speed up exhaustive filtering, add fixed-ball validation with labeled exceptions, and improve portfolio selection via ymd-seeded shuffle and coverage-aware tie-breaking. Include lotto draw 1225 history update.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-27 11:10:37 +09:00
dsyoon
aa0f925d4e chore: recommend_ball.biz_25.json Git 추적 제외
로컬 산출물만 사용하고 원격 저장소에는 포함하지 않습니다.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-27 01:55:50 +09:00
dsyoon
0d27ee88e2 update: model#25 추천 데이터에 20260523 회차 추가
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-27 01:54:01 +09:00
dsyoon
d3da7346cd chore: track .env and remove it from .gitignore
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-19 21:52:37 +09:00
dsyoon
c329c44643 update: add lotto draw results for rounds 1223 and 1224
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-19 21:47:44 +09:00
ae47258ed1 update: refresh model#25 recommendation output
Persist the latest generated recommendation set for the 20260509 draw so the repository reflects the current final_practice run result.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-08 13:47:17 +09:00
919f2e19bb refactor: apply portfolio cap and align project docs
Keep the fixed 11-number set intact while adding a second-stage portfolio selection that caps final recommendations to the 70,000 KRW budget, and update docs/data/scripts to match the current project structure and runtime flow.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-08 10:37:03 +09:00
bd9eea2aee Practice/FilterTest: 인스턴스 상태, 정렬 일관성, 고정 5조합·중복·과거당첨 제외; README 정리
Made-with: Cursor
2026-04-12 11:21:58 +09:00
a6b170fefa init 2026-04-12 10:55:46 +09:00
e31eefef09 refactor: final_practice mirrors 3_Practice_22 (txt-based next no, exhaustive predict2)
Made-with: Cursor
2026-04-08 19:41:04 +09:00
b440ec96c9 fix: craw() keyword drw_no (not drwNo)
Made-with: Cursor
2026-04-08 19:34:25 +09:00
d08e906066 Add final_practice.py for next-draw recommendations via final_BallFilter
- Mirrors 3_Practice_22 flow: DataCrawler, optional API crawl, fixed combo,
  Monte Carlo filtered samples (default) or exhaustive mode
- Caps total recommendations under 100; saves recommend_ball.final.json

Made-with: Cursor
2026-04-08 19:33:26 +09:00
52e8495148 Add final BallFilter, train/valid scripts, train-derived sum filters
- final_BallFilter: CSV history loader, TRAIN_ALLOW for 6-sum and week diff,
  fix filterOneDigitPattern ball overwrite bug, drop socket call
- final_filter_params: build sum6 and abs_sum_diff from rounds 1-800
- filter_model re-exports BallFilter; train/valid evaluate pass-through counts
- final_filterTest aligned with 1_FilterTest_25 plus optional MC survivors
- README and scripts/run_with_ncue.sh for ncue workflow

Made-with: Cursor
2026-04-08 19:29:10 +09:00
35 changed files with 5288 additions and 12276 deletions

1
.env Normal file
View File

@@ -0,0 +1 @@
GIT_USER_TOKEN=c42edf3a584adbcac211c8c4ac13988efe56b0a5

6
.gitignore vendored
View File

@@ -82,9 +82,6 @@ celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
@@ -99,3 +96,6 @@ ENV/
# macOS metadata
.DS_Store
# 로컬 추천 번호 산출물 (Git 추적 제외)
resources/recommend_ball.biz_25.json

View File

@@ -4,6 +4,9 @@ import time
import requests
# JSON 포맷을 다루기 위한 라이브러리를 호출합니다.
import json
from datetime import datetime, timedelta
import random
import socket
import urllib3
@@ -19,8 +22,15 @@ except ModuleNotFoundError:
def sendMsg(self, msg):
pass
_LOTTO_URL = "https://www.dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}"
_REQUEST_TIMEOUT = 15
_LOTTO_URLS = (
"https://www.dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}",
"https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}",
)
_REQUEST_TIMEOUT = float(os.environ.get("LOTTO_REQUEST_TIMEOUT", "12"))
_FETCH_RETRIES_PER_DRAW = int(os.environ.get("LOTTO_FETCH_RETRIES", "3"))
_BACKOFF_BASE_SECONDS = float(os.environ.get("LOTTO_BACKOFF_BASE", "0.7"))
_MAX_CONSECUTIVE_FETCH_FAILURES = int(os.environ.get("LOTTO_MAX_CONSEC_FAIL", "8"))
_CONNECTION_PROBE_TIMEOUT = float(os.environ.get("LOTTO_PROBE_TIMEOUT", "3"))
_BROWSER_HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
@@ -51,33 +61,62 @@ class DataCrawler:
self.bot = TelegramBot()
self._session = requests.Session()
self._session.headers.update(_BROWSER_HEADERS)
self._last_fetch_error = ""
def _can_reach_lottery_host(self):
"""
API 호스트 TCP 연결 가능 여부를 빠르게 확인합니다.
완전한 보장은 아니지만, 완전 차단 상태를 조기 감지해 불필요한 대기 시간을 줄입니다.
"""
for host in ("www.dhlottery.co.kr", "dhlottery.co.kr"):
try:
with socket.create_connection((host, 443), timeout=_CONNECTION_PROBE_TIMEOUT):
return True
except OSError:
continue
return False
def _fetch_draw(self, drw_no):
"""동행복권 API에서 단일 회차 결과를 가져옵니다. 실패 시 None."""
url = _LOTTO_URL.format(int(drw_no))
for verify in (_ssl_verify_arg(), False):
for method in ("POST", "GET"):
try:
res = self._session.request(
method,
url,
timeout=_REQUEST_TIMEOUT,
verify=verify,
)
if res.status_code != 200:
continue
text = res.text.strip()
if not text.startswith("{"):
continue
result = json.loads(text)
except (
requests.RequestException,
ValueError,
json.JSONDecodeError,
):
continue
if isinstance(result, dict) and result.get("returnValue") == "success":
return result
self._last_fetch_error = ""
verify_options = (_ssl_verify_arg(), False)
last_error = "unknown"
for attempt in range(1, _FETCH_RETRIES_PER_DRAW + 1):
for raw_url in _LOTTO_URLS:
url = raw_url.format(int(drw_no))
for verify in verify_options:
for method in ("POST", "GET"):
try:
res = self._session.request(
method,
url,
timeout=_REQUEST_TIMEOUT,
verify=verify,
)
if res.status_code != 200:
last_error = "http {}".format(res.status_code)
continue
text = res.text.strip()
if not text.startswith("{"):
last_error = "non-json response"
continue
result = json.loads(text)
except (
requests.RequestException,
ValueError,
json.JSONDecodeError,
) as ex:
last_error = str(ex)
continue
if isinstance(result, dict) and result.get("returnValue") == "success":
return result
rv = result.get("returnValue") if isinstance(result, dict) else "unknown"
last_error = "api returnValue={}".format(rv)
if attempt < _FETCH_RETRIES_PER_DRAW:
# 지수 백오프 + 지터로 일시적 네트워크 혼잡 완화
delay = _BACKOFF_BASE_SECONDS * (2 ** (attempt - 1)) + random.uniform(0, 0.25)
time.sleep(delay)
self._last_fetch_error = last_error
return None
def _append_draw_files(self, lottoHistoryFile, result):
@@ -120,6 +159,99 @@ class DataCrawler:
return None
return last_json.get("drwNo")
def _read_draw_map_from_json(self, json_path):
"""
JSONL 전체를 읽어 drwNo -> record 맵으로 반환합니다.
잘못된 라인/중복 라인은 정리 대상이며, 마지막 유효값을 유지합니다.
"""
draw_map = {}
if not os.path.isfile(json_path) or os.path.getsize(json_path) == 0:
return draw_map
with open(json_path, "r", encoding="utf-8") as fp:
for line in fp:
line = line.strip()
if not line:
continue
try:
data = json.loads(line)
except json.JSONDecodeError:
continue
if (
isinstance(data, dict)
and data.get("returnValue") == "success"
and isinstance(data.get("drwNo"), int)
):
draw_map[data["drwNo"]] = data
return draw_map
def _write_draw_map_files(self, lottoHistoryFile, draw_map):
"""
drwNo 오름차순으로 json/txt를 재생성합니다.
누락 회차 보강/중복 제거 후 일관된 파일 상태를 보장합니다.
"""
json_path = lottoHistoryFile + ".json"
txt_path = lottoHistoryFile + ".txt"
with open(json_path, "w", encoding="utf-8") as json_fp, open(
txt_path, "w", encoding="utf-8"
) as text_fp:
for drw_no in sorted(draw_map.keys()):
result = draw_map[drw_no]
json_fp.write(json.dumps(result, ensure_ascii=False) + "\n")
text_fp.write(
"%d,%d,%d,%d,%d,%d,%d,%d\n"
% (
drw_no,
result["drwtNo1"],
result["drwtNo2"],
result["drwtNo3"],
result["drwtNo4"],
result["drwtNo5"],
result["drwtNo6"],
result["bnusNo"],
)
)
def _get_last_week_draw_date(self):
"""
'지난 주' 기준 토요일 날짜를 반환합니다.
예: 금요일(2026-05-08) 실행 시 직전 토요일(2026-05-02)
"""
now = datetime.now()
days_since_saturday = (now.weekday() - 5) % 7
latest_saturday = now.date() - timedelta(days=days_since_saturday)
# 토요일이면서 추첨 전(20시 이전)이라면 지난 주 토요일을 목표로 사용
if now.weekday() == 5 and now.hour < 20:
latest_saturday = latest_saturday - timedelta(days=7)
return latest_saturday
def _estimate_target_draw_no(self, draw_map):
"""
기존 데이터의 마지막 drwNoDate와 지난 주 토요일을 비교해
이번 실행에서 확보해야 할 목표 회차를 계산합니다.
"""
if not draw_map:
return None
last_no = max(draw_map.keys())
last_data = draw_map[last_no]
last_date_str = last_data.get("drwNoDate", "")
try:
last_date = datetime.strptime(last_date_str, "%Y-%m-%d").date()
except ValueError:
return last_no
target_date = self._get_last_week_draw_date()
if target_date <= last_date:
return last_no
week_gap = (target_date - last_date).days // 7
if week_gap <= 0:
return last_no
return last_no + week_gap
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일 (확장자 제외)
def craw(self, lottoHistoryFile, drwNo=None):
@@ -161,44 +293,97 @@ class DataCrawler:
def excute(self, resource_path):
"""
resources/lotto_history.* 를 마지막 회차 이후까지 연속으로 갱신합니다.
(기존: 날짜 비교 + 1회차만 수집 → 누락·정지가 잦음)
resources/lotto_history.* 를 지난 주 기준으로 누락 없이 동기화합니다.
- 마지막 회차+1만 확인하지 않고, 1~목표회차 범위에서 누락 회차를 탐지/보강
- 중복/깨진 라인을 정리해 json/txt를 일관 상태로 재생성
"""
lottoHistoryFile = os.path.join(resource_path, "lotto_history")
json_path = lottoHistoryFile + ".json"
last_no = self._read_last_draw_from_json(json_path)
if last_no is None:
self.craw(lottoHistoryFile)
draw_map = self._read_draw_map_from_json(json_path)
# 기존 이력이 비었거나 깨졌으면 전체 재수집(기존 동작 유지)
if not draw_map:
try:
self.craw(lottoHistoryFile)
self.bot.sendMsg("[Lottery Crawler] full history rebuilt (no valid json).")
except Exception:
pass
return True
added = 0
next_no = last_no + 1
while True:
result = self._fetch_draw(next_no)
if result is None:
break
self._append_draw_files(lottoHistoryFile, result)
added += 1
next_no += 1
time.sleep(0.35)
target_no = self._estimate_target_draw_no(draw_map)
if target_no is None:
target_no = max(draw_map.keys())
if added == 0:
if not self._can_reach_lottery_host():
msg = "[Lottery Crawler] network blocked: cannot reach dhlottery host."
print(msg)
try:
self.bot.sendMsg(msg)
except Exception:
pass
return False
missing_nos = [no for no in range(1, target_no + 1) if no not in draw_map]
added = 0
failed = []
aborted_missing_nos = []
consecutive_failure = 0
fail_reasons = {}
for no in missing_nos:
result = self._fetch_draw(no)
if result is None:
failed.append(no)
reason = self._last_fetch_error or "unknown"
fail_reasons[reason] = fail_reasons.get(reason, 0) + 1
consecutive_failure += 1
if consecutive_failure >= _MAX_CONSECUTIVE_FETCH_FAILURES:
aborted_missing_nos = [x for x in missing_nos if x > no]
break
continue
draw_map[no] = result
added += 1
consecutive_failure = 0
time.sleep(0.2)
# 누락 보강 또는 중복 정리 여지가 있으면 파일을 재생성
self._write_draw_map_files(lottoHistoryFile, draw_map)
last_no = max(draw_map.keys())
if added == 0 and not failed:
try:
self.bot.sendMsg(
"[Lottery Crawler] up to date (last drwNo={}).".format(last_no)
"[Lottery Crawler] up to date (last drwNo={}, target={}).".format(
last_no, target_no
)
)
except Exception:
pass
elif failed:
sample = ",".join(str(x) for x in failed[:10])
reason_items = sorted(fail_reasons.items(), key=lambda x: x[1], reverse=True)
reason_str = "; ".join("{} x{}".format(reason, count) for reason, count in reason_items[:3])
if aborted_missing_nos:
reason_str += " | aborted {} pending draws due to consecutive failures".format(
len(aborted_missing_nos)
)
try:
self.bot.sendMsg(
"[Lottery Crawler] appended {}, failed {} draw(s): {}{} | {}".format(
added,
len(failed),
sample,
"..." if len(failed) > 10 else "",
reason_str or "no reason",
)
)
except Exception:
pass
else:
try:
self.bot.sendMsg(
"[Lottery Crawler] appended {} draw(s), last drwNo={}.".format(
added, last_no + added
"[Lottery Crawler] appended {} draw(s), last drwNo={}, target={}.".format(
added, last_no, target_no
)
)
except Exception:

View File

@@ -1,11 +1,12 @@
데이터는 다음과 같습니다.
(학습 데이터)
- train.json, train.txt
- 1회차부터 800회차
- lotto_history.txt에서 회차부터 800회차
(검증 데이터)
- valid.json, valid.txt
- 801회차부터 1000회차
- lotto_history.txt에서 801회차부터 1000회차
(테스트 데이터)
- lotto_history.txt에서 1001회차부터 이후 모두
파일 구조를 먼저 이해하세요.
@@ -110,3 +111,6 @@
그리고 요구사항에 대해서 시도 방법을 설계하세요.
그리고 반복적으로 실행해서 최적화된 방법을 찾아서 적용해주세요.
(최적화는 언제든 학습 데이터로 최적화를 해야 합니다. 그리고 검증 데이터로 테스트만 수행하세요.)
당첨번호에 대한 추천 개수가 100개 미만이어야 합니다.
1_FilterTest_25.py, BallFilter_25.py를 참고해서 최적의 final_filterTest.py, final_BallFilter.py를 작성해 주세요.

421
README.md
View File

@@ -1,343 +1,112 @@
# 실행 순서
# deeplottery
## final_BallFilter · `final_filterTest.py` (miniconda **ncue**)
`deeplottery`는 로또 번호를 예측하는 프로젝트가 아니라, **전체 조합(45C6)을 규칙 기반으로 필터링해 후보를 줄이는 시스템**입니다.
핵심은 `BallFilter` 엔진이며, 운영 실행(`final_practice.py`)과 검증(`final_FilterTest.py`)이 분리되어 있습니다.
임계값은 `tools/compute_final_filter_params.py`가 학습 구간(1~800회) 분포에서 생성하며, 결과는 `final_filter_params.py`에 기록됩니다.
## 목표와 설계 의도
- 목표: 통계/패턴 기반 규칙으로 비효율 조합을 제거하고 후보군을 관리 가능한 크기로 축소
- 설계 의도:
- 필터 규칙은 `final_BallFilter.py` 한 곳에서 관리
- 운영 추천 생성과 과거 회차 검증을 분리하여 반복 개선
- 같은 엔진을 운영/검증에서 공통 사용해 일관성 유지
## 전체 아키텍처
1. 데이터 수집/갱신
- `DataCrawler.py`가 로또 API를 호출해 `resources/lotto_history.json`, `resources/lotto_history.txt` 갱신
2. 필터 엔진 로딩
- `final_BallFilter.py``BallFilter`가 과거 당첨 이력을 메모리로 적재
3. 운영 후보 생성
- `final_practice.py`가 다음 회차 기준 전체 조합을 순회하며 `BallFilter.filter()`로 통과 조합만 저장
4. 필터 성능 검증
- `final_FilterTest.py`가 과거 당첨번호를 기준으로 어떤 필터가 당첨을 걸렀는지/통과시켰는지 분석
## 핵심 파일 설명
- `final_BallFilter.py`
- 프로젝트 핵심 엔진
- `extract_final_candidates()`에서 규칙 기반 탈락 사유(`set`)를 생성
- `filter()`는 실사용 진입점이며, 반환 `set`이 비어 있으면 통과
- `final_practice.py`
- 운영 실행 스크립트
- `predict1()` 고정 11조합을 유지
- `predict2()`는 1차 필터 통과 조합을 만든 뒤, 2차 포트폴리오 선별로 최종 추천 수를 제한
- 총 추천 개수는 고정수 포함 최대 70게임(70,000원) 상한을 적용
- 결과를 `resources/recommend_ball.biz_25.json`에 저장하고 Telegram 전송
- `final_FilterTest.py`
- 검증/분석 스크립트
- `find_filter_method()`로 회차별 필터 적중 통계 확인
- `find_final_candidates()`로 특정 회차 후보군 재생성
- `DataCrawler.py`
- 과거 이력 파일 수집/보강
- 네트워크 실패 재시도/백오프 및 누락 회차 보완 처리
- `TelegramBot.py`
- 추천 결과 메시지 전송
- `resources/`
- `lotto_history.json`: 회차별 원본 JSON 라인 데이터
- `lotto_history.txt`: 회차별 CSV 형태 요약 데이터
- `recommend_ball.biz_25.json`: 회차별 추천 결과 저장 파일
## 실행 방법
Python 실행은 Miniconda `ncue` 환경을 사용합니다.
```bash
conda activate ncue
python tools/compute_final_filter_params.py
python final_filterTest.py
python DataCrawler.py
python final_FilterTest.py
python final_practice.py
```
conda 경로를 쓰기 어려우면 프로젝트의 `scripts/run_with_ncue.sh`로 동일하게 실행할 수 있습니다.
## 동작 방식 상세
```bash
./scripts/run_with_ncue.sh tools/compute_final_filter_params.py
./scripts/run_with_ncue.sh final_filterTest.py
```
- 입력: 1~45에서 6개 조합 전체
- 처리:
- 1차: `BallFilter` 규칙 필터 적용
- 2차: 고정 11조합을 유지한 채, 겹침도 기반 포트폴리오 선별로 후보 축소
- 출력:
- 탈락 조합: 탈락 사유 집합 반환
- 최종 추천 조합: 예산 상한(최대 70,000원) 내에서 저장/전송
* FilterFeature.py를 실행한다.
* lotto_history.json을 읽어서 all_filter_[1-100].[cluster,csv,feature] 파일을 생성한다.
주요 규칙 범주 예시:
- 합/평균 및 전주 대비 차이
- 앞 3개/뒤 3개 합 패턴
- 고저 비율, 끝자리 패턴, AC 값
- 배수 개수(3/4/5/6 등)
- 최근 N주 출현 빈도/중복 관련 규칙
- 비선호 2개/3개 조합 제거 규칙
* FilterFeatureCluster.py를 실행한다.
* 첫수는 1~10까지만 허용한다.
* random_state 전체 내 각 cluster에 대해서 당첨 회수를 파악하여 ./resources/cluster_win_info.csv 파일을 생성한다.
* 생성 파일
* filtertest_1.csv: random_state 내 cluster 개수를 파악한다.
* filtertest_2.csv: random_state 내 cluster 개수 별 전체 당첨 회수를 파악한다.
* filtertest_3.csv: random_state 내 cluster 개수 별 최초 당첨 번호만 파악한다.
## 디렉터리 현재 상태 (2026-05-08)
### 최상위 파일/디렉터리 현황
* 실행할 random_state와 cluster 번호 파악
* filtertest_2.csv과 answer_pattern_analsys.xlsx을 이용하여 선별한다.
- 실행/핵심
- `DataCrawler.py`
- `final_BallFilter.py`
- `final_FilterTest.py`
- `final_practice.py`
- `final_Practice.py` (구버전 스크립트)
- `TelegramBot.py`
- 레거시 참조 파일
- `BallFilter_22.py`, `BallFilter_25.py`
- `1_FilterTest_22.py`, `1_FilterTest_25.py`
- `2_FilterTestReview_22.py`, `2_FilterTestReview_25.py`
- `3_Practice_22.py`, `3_Practice_25.py`
- `fixed10.py`
- 데이터/설정
- `resources/`
- `requirements.txt`
- `scripts/`
### Git 작업 트리 상태(요약)
* cluster_info.json 파일 업데이트
* 실행할 random_state와 cluster 번호를 json 형태로 등록한다.
- 수정됨: `DataCrawler.py`, `README.md`, `final_BallFilter.py`, `final_practice.py`, `resources/lotto_history.json`, `resources/lotto_history.txt`
- 삭제됨: `filter_model.py`, `final_filter_params.py`, `train.py`, `valid.py`
- 신규(미추적): `resources/recommend_ball.biz_25.json`
## 주의 사항
* Util_filegen.py 실행
* m1, amd, intel 컴퓨터에서 실행할 sh, bat 파일을 생성한다.
* 파이썬 내에서 아래 두 부분만 수정하면 된다.
* m1_file_max, amd_file_max, intel_file_max = 8,12,7
* m1_proc_limit, amd_proc_limit, intel_proc_limit = 124,125,110
* 각 장비에서 sh와 bat 파일 실행
## Ruleset(임계값 설정) 기반으로 운영하기
`filter_model.BallFilter`의 주요 임계값(합/평균/앞3합/뒤3합/간격 등)을 **JSON ruleset**으로 외부화했습니다.
이제 “코드 수정 없이” ruleset 파일만 바꿔서 실험/튜닝을 자동화할 수 있습니다.
- **기본 ruleset 경로**: `resources/rulesets/default.json`
- **주의/한계**: 로또는 본질적으로 랜덤(독립/균등 가설)이며, ruleset은 “구매 조합 수를 줄이기 위한 필터”입니다. **당첨 보장/예측을 주장하지 않습니다.**
### valid 성능 확인 예시
```bash
python scripts/eval_filters.py \
--data valid \
--resources resources \
--ruleset resources/rulesets/default.json \
--start-no 801 --end-no 1000 \
--survivors-samples 0
```
### survivors(생존 조합 수) 근사 포함 예시
```bash
python scripts/eval_filters.py \
--data valid \
--resources resources \
--ruleset resources/rulesets/default.json \
--start-no 801 --end-no 1000 \
--survivors-samples 3000
```
## 자동 튜닝 → ruleset 생성 → 일괄 평가 파이프라인
### 1) train 기반 자동 튜닝(후보 ruleset 생성)
아래 스크립트는 **train 구간에서만** 임계값을 랜덤 탐색으로 튜닝한 뒤,
`resources/rulesets/``Balanced.json`, `Coverage-First.json`을 저장합니다.
```bash
python scripts/tune_ruleset.py \
--resources resources \
--base-ruleset resources/rulesets/default.json \
--out-dir resources/rulesets \
--train-start 21 --train-end 800 \
--hit-rate-min 0.01 \
--iters 200 \
--mc-samples 40000
```
- **Coverage-First**: survivors(생존 조합 수) 최소화를 우선
- **Balanced**: survivors를 줄이되 hit-rate도 함께 고려
> 주의: survivors는 전수(8,145,060조합) 대신 **풀링 Monte Carlo**로 근사하므로 오차가 있습니다.
### 2) valid/train 구간에서 ruleset 일괄 평가
```bash
python scripts/eval_rulesets.py \
--resources resources \
--rulesets-dir resources/rulesets \
--data valid \
--start-no 801 --end-no 1000 \
--survivors-samples 0
```
# Query
```SQL
##### #####
with source_count as (
select source, count(*) as source_count
from cluster_info
where priority not in (99)
and source in (1,3)
group by 1
),
ball_count as (
# 1) random_state, cluster
select source, random_state, cluster, ball_cnt
from (
SELECT source, random_state, cluster, count(*)
as ball_cnt
from recommend_ball
where no=1136
and b1 > 0
group by 1,2,3
union all
SELECT source, random_state, cluster, 0 as ball_cnt
from recommend_ball
where no=1136
and b1 = 0
group by 1,2,3
) lj
),
source_rc_cluster_list as (
select ci.source, ci.random_state, ci.cluster, ci.cluster_count, ci.win_count, ci.priority, rc.source_count, bc.ball_cnt
from cluster_info ci
left join source_count rc on ci.source = rc.source
left join ball_count bc on ci.source = bc.source and ci.random_state = bc.random_state and ci.cluster = bc.cluster
where ci.priority not in (99)
and ci.source in (1,3)
),
source_process as (
select source, "done" as type, count(*) as cnt from source_rc_cluster_list
where ball_cnt is not NULL
group by 1,2
union all
select source, "yet" as type, count(*) as cnt from source_rc_cluster_list
where ball_cnt is NULL
group by 1,2
)
select source, type, cnt,
case when source=1 then concat(round(100.0 * cnt / (select source_count from source_count where source=1),2), '%')
when source=3 then concat(round(100.0 * cnt / (select source_count from source_count where source=3),2), '%')
end as rate from source_process order by 1,2
;
### ###
SELECT ci.source, ci.random_state, ci.cluster, lj.cnt
from cluster_info ci
left join (select source, random_state, cluster, count(*) as cnt from recommend_ball rb where no=1136 group by 1,2,3) lj on ci.source=lj.source and ci.random_state=lj.random_state and ci.cluster=lj.cluster
where priority not in (99)
and lj.cnt is null
order by 1,2,3
;
##### cluster #####
with raw_data as (
select rb.source, ci.priority, rb.random_state, rb.cluster, ci.cluster_count, ci.win_count, b1, count(*) as ball_cnt
from recommend_ball rb left join cluster_info ci on rb.source=ci.source and rb.random_state = ci.random_state and rb.cluster = ci.cluster
where no=1136
group by 1,2,3,4,5,6,7
),
all_cluster as (
select source, priority, random_state, cluster, ball_cnt
from raw_data
where (
(source = 1 and priority in (1,2)) or
(source = 3 and priority in (1,2))
)
group by 1,2,3,4
),
valid_total_cluster as (
select source, priority, random_state, cluster, ball_cnt
from raw_data
where (
(source = 1 and priority = 1 and
ball_cnt BETWEEN 50 and 80
) or
(source = 1 and priority = 2 and (
win_count = 12 and ball_cnt BETWEEN 50 and 80)
) or
(source = 3 and priority = 1 and
(ball_cnt BETWEEN 1 and 30 or ball_cnt BETWEEN 50 and 100)
) or
(source = 3 and priority = 2 and (
win_count=13 and (ball_cnt BETWEEN 1 and 30 or ball_cnt BETWEEN 50 and 100))
) or
(source = 1 and
((win_count between 5 and 10) and ball_cnt BETWEEN 1 and 20)
)
)
group by 1,2,3,4
),
valid_none_0_cluster as (
select source, priority, random_state, cluster, ball_cnt
from raw_data
where b1 <> 0 AND
(
(source = 1 and priority = 1 and
ball_cnt BETWEEN 50 and 80
) or
(source = 1 and priority = 2 and (
win_count = 12 and ball_cnt BETWEEN 50 and 80)
) or
(source = 3 and priority = 1 and
(ball_cnt BETWEEN 1 and 30 or ball_cnt BETWEEN 50 and 100)
) or
(source = 3 and priority = 2 and (
win_count=13 and (ball_cnt BETWEEN 1 and 30 or ball_cnt BETWEEN 50 and 100))
) or
(source = 1 and
((win_count between 5 and 10) and ball_cnt BETWEEN 1 and 20)
)
)
group by 1,2,3,4
)
#
select 1 as col, count(*) from all_cluster
union all
#
select 2 as col, count(*) from valid_total_cluster
union all
# 0
select 3 as col, count(*) from valid_none_0_cluster
;
##### #####
select b1,b2,b3,b4,b5,b6,count(*) as ball_cnt
from recommend_ball
where no=1136
and b1>0
group by 1,2,3,4,5,6
order by 7 desc;
##### #####
with priority as (
select source, random_state, cluster, cluster_count, win_count, priority
from cluster_info
where priority not in (99)
),
recommend as (
select source, random_state, cluster, b1,b2,b3,b4,b5,b6
from recommend_ball
where b1 > 0
and no=1136
),
recommend_count as (
select source, random_state, cluster, count(*) as ball_cnt
from recommend_ball
where b1 > 0
and no=1136
group by 1,2,3
),
raw_data as (
select r.source, r.random_state, r.cluster, p.cluster_count, p.win_count, p.priority, r.b1,r.b2,r.b3,r.b4,r.b5,r.b6, rc.ball_cnt
from recommend r
left join priority p on r.source=p.source and r.random_state=p.random_state and r.cluster=p.cluster
left join recommend_count rc on r.source=rc.source and r.random_state=rc.random_state and r.cluster=rc.cluster
),
candidate as (
select source, random_state, cluster, cluster_count, win_count, priority, b1,b2,b3,b4,b5,b6, ball_cnt
from raw_data
where (
(source = 0 and b1=7)
or (source = 1 and priority=-1 and ball_cnt<=140 and (
b1 not in (13, 19, 28)
and b2 not in (13, 19, 28)
and b3 not in (13, 19, 28)
and b4 not in (13, 19, 28)
and b5 not in (13, 19, 28)
and b6 not in (13, 19, 28)
)
)
or (source = 3 and priority=-1 and ball_cnt<=150 and (
b1 not in (13, 19, 28)
and b2 not in (13, 19, 28)
and b3 not in (13, 19, 28)
and b4 not in (13, 19, 28)
and b5 not in (13, 19, 28)
and b6 not in (13, 19, 28)
)
)
)
)
#select source, random_state,cluster,b1,b2,b3,b4,b5,b6 from candidate order by 4,5,6,7,8,9;
, duplication as (
# 34
select source, random_state, cluster, cluster_count, win_count, priority, b1,b2,b3,b4,b5,b6, ball_cnt
from (
select source, random_state, cluster, cluster_count, win_count, priority, b1,b2,b3,b4,b5,b6, ball_cnt,
ROW_NUMBER() OVER(PARTITION BY b1,b2,b3,b4,b5,b6 ORDER BY b1,b2,b3,b4,b5,b6) AS rnk
from candidate
) a
where rnk=1
order by source,random_state,cluster,b1,b2,b3,b4,b5,b6
)
select count(*) as cnt from duplication;
#select source, priority, random_state, cluster, win_count, count(*) as cnt from duplication group by 1,2,3;
#select b1, count(*) as ball_cnt from duplication group by 1
#select b6, count(*) as ball_cnt from duplication group by 1
#select source,random_state,cluster,b1,b2,b3,b4,b5,b6 from duplication order by 4,5,6,7,8,9;
```
- 이 프로젝트는 통계적 휴리스틱 기반의 후보 축소 도구이며 당첨을 보장하지 않습니다.
- 필터가 강해질수록 후보 수는 줄지만, 실제 당첨 조합을 배제할 위험도 함께 증가합니다.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,94 +0,0 @@
"""
filter_model_3.py
OR-composed BallFilter:
- A candidate ball is ACCEPTED if it passes EITHER filter_model_1 OR filter_model_2.
- A candidate ball is REJECTED only if it fails BOTH.
This keeps the same public interface used across the project:
BallFilter(lottoHistoryFileName, ruleset_path=..., ruleset=...)
.filter(ball, no, until_end=False, df=None, filter_ball=None) -> set[str]
.extract_final_candidates(ball, no=None, until_end=False, df=None) -> set[str]
Notes:
- The underlying filters return a non-empty set of failure reasons when rejected.
- Callers treat "len(result) == 0" as PASS.
"""
from __future__ import annotations
from typing import Any, Dict, Optional
import filter_model_1 as fm1
import filter_model_2 as fm2
class BallFilter:
"""
OR composition of filter_model_1.BallFilter and filter_model_2.BallFilter.
- If model1 PASSES OR model2 PASSES -> return empty set()
- If both FAIL -> return union of reasons (prefixed for debugging)
"""
def __init__(
self,
lottoHistoryFileName: Optional[str] = None,
# Backward compatible single ruleset knobs (applied to both if specific ones not provided)
ruleset_path: Optional[str] = None,
ruleset: Optional[Dict[str, Any]] = None,
# Optional per-model overrides
ruleset_path_1: Optional[str] = None,
ruleset_path_2: Optional[str] = None,
ruleset_1: Optional[Dict[str, Any]] = None,
ruleset_2: Optional[Dict[str, Any]] = None,
):
rp1 = ruleset_path_1 if ruleset_path_1 is not None else ruleset_path
rp2 = ruleset_path_2 if ruleset_path_2 is not None else ruleset_path
r1 = ruleset_1 if ruleset_1 is not None else ruleset
r2 = ruleset_2 if ruleset_2 is not None else ruleset
self.m1 = fm1.BallFilter(lottoHistoryFileName, ruleset_path=rp1, ruleset=r1)
self.m2 = fm2.BallFilter(lottoHistoryFileName, ruleset_path=rp2, ruleset=r2)
#
# Delegate common helper methods (both models expose the same API)
#
def getBall(self, no):
return self.m1.getBall(no)
def getLastNo(self, YMD):
return self.m1.getLastNo(YMD)
def getNextNo(self, YMD):
return self.m1.getNextNo(YMD)
def getYMD(self, no):
return self.m1.getYMD(no)
def _prefixed(self, prefix: str, reasons: set) -> set:
# keep stable, readable debug strings
return {f"{prefix}{r}" for r in reasons}
def extract_final_candidates(self, ball, no=None, until_end: bool = False, df=None):
"""
OR-pass semantics:
- If either model returns empty set -> PASS (return empty set)
- Else -> FAIL (return union of reasons)
"""
r1 = self.m1.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
if len(r1) == 0:
return set()
r2 = self.m2.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
if len(r2) == 0:
return set()
# both failed
return self._prefixed("m1:", set(r1)) | self._prefixed("m2:", set(r2))
def filter(self, ball, no, until_end: bool = False, df=None, filter_ball=None):
"""
Keep signature compatible with existing callers.
- filter_ball is ignored here (callers typically pre-filter before calling .filter()).
"""
return self.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)

File diff suppressed because it is too large Load Diff

View File

@@ -1,50 +1,33 @@
# -*- coding: utf-8 -*-
"""
학습(1~800) / 검증(801~1000) / 테스트(1001~) 구간별 필터 통과(당첨번호가 필터를 통과하는지) 분석.
1_FilterTest_25.py 와 동일한 흐름이며 BallFilter 대신 final_BallFilter.BallFilter 를 사용합니다.
실행: miniconda 환경 ncue 에서 `python final_filterTest.py` (README 참고).
"""
from __future__ import annotations
import datetime
import os
import time
import pandas as pd
import itertools
from final_BallFilter import BallFilter
# PROMPT.txt 기준 구간
TRAIN_NO = (1, 800)
VALID_NO = (801, 1000)
TEST_NO = (1001, 10**9)
import time
import datetime
class FilterTest:
def __init__(self, resources_path: str):
lotto_json = os.path.join(resources_path, "lotto_history.json")
self.ballFilter = BallFilter(lotto_json)
def find_filter_method(self, df_ball, filter_ball=None, no_min=None, no_max=None):
"""no_min~no_max 회차만 역순으로 검사 (None 이면 전체)."""
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def find_filter_method(self, df_ball, filter_ball=None):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
for i in range(len(df_ball)-1, 19, -1):
idx_list = list(range(len(df_ball) - 1, 19, -1))
for i in idx_list:
no = int(df_ball["no"].iloc[i])
if no_min is not None and no < no_min:
continue
if no_max is not None and no > no_max:
continue
answer = df_ball[df_ball["no"] == no].values.tolist()[0]
answer = answer[1:7]
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = sorted(answer[1:7])
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
@@ -53,60 +36,180 @@ class FilterTest:
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
filter_dic_1[key] = filter_dic_1.get(key, 0) + 1
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ",".join(filter_type)
filter_dic_2[key] = filter_dic_2.get(key, 0) + 1
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
filter_dic[f_t] = filter_dic.get(f_t, 0) + 1
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[구간 {}~{}] 필터에 걸리지 않은 회차 (당첨 조합 통과)]".format(no_min, no_max))
print("\tcount: {:,} (통과)".format(len(no_filter_ball)))
for no in sorted(no_filter_ball.keys()):
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
return win_count, no_filter_ball
return win_count
def report_split(self, df_ball, name: str, lo: int, hi: int):
print("\n" + "=" * 60)
print(" {} | 회차 {} ~ {}".format(name, lo, hi))
print("=" * 60)
t0 = time.time()
wc, _ = self.find_filter_method(df_ball, no_min=lo, no_max=hi)
elapsed = datetime.timedelta(seconds=time.time() - t0)
span = hi - lo + 1
rate = (wc / span * 100) if span else 0
print("\t처리 시간: {}".format(elapsed))
print("\t통과 회차 수: {} / {} ({:.2f}%)".format(wc, span, rate))
if lo >= TRAIN_NO[0] and hi <= TRAIN_NO[1]:
need = max(1, span // 100)
print("\t(참고) 100회당 최소 1회 기준 대략 {}회 이상이면 충족".format(need))
if lo >= VALID_NO[0] and hi <= VALID_NO[1]:
print("\t(참고) 검증 200회 구간에서 최소 3회 이상이면 요구사항 예시 충족")
return wc
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
ball = sorted(list(ball))
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = sorted(answer[1:7])
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = sorted(answer[1:7])
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = sorted(list(ball))
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == "__main__":
resources_path = os.path.join(os.path.dirname(__file__), "resources")
csv_path = os.path.join(resources_path, "lotto_history.txt")
df_ball = pd.read_csv(csv_path, header=None)
df_ball.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"]
if __name__ == '__main__':
ft = FilterTest(resources_path)
resources_path = 'resources'
ft.report_split(df_ball, "학습 TRAIN", TRAIN_NO[0], TRAIN_NO[1])
ft.report_split(df_ball, "검증 VALID", VALID_NO[0], min(VALID_NO[1], int(df_ball["no"].max())))
if int(df_ball["no"].max()) >= TEST_NO[0]:
ft.report_split(
df_ball,
"테스트 TEST",
TEST_NO[0],
int(df_ball["no"].max()),
)
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path)
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, filter_ball)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

File diff suppressed because one or more lines are too long

409
final_practice.py Normal file
View File

@@ -0,0 +1,409 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
from DataCrawler import DataCrawler
import json
import os
import random
import pandas as pd
import itertools
from collections import Counter
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from final_BallFilter import BallFilter
COST_PER_GAME = 1000
MAX_BUDGET_KRW = 70000
MAX_GAMES_PER_DRAW = MAX_BUDGET_KRW // COST_PER_GAME
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
def __init__(self, resources_path):
self.bot = TelegramBot()
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6, 7, 10, 11, 20, 45])
result_json.append([5, 12, 16, 27, 39, 45])
result_json.append([5, 15, 18, 29, 36, 41])
result_json.append([1, 17, 20, 25, 36, 45])
result_json.append([6, 15, 20, 23, 37, 43])
result_json.append([8, 15, 19, 23, 38, 41])
result_json.append([3, 14, 20, 27, 35, 45])
result_json.append([5, 11, 19, 24, 40, 45])
result_json.append([5, 9, 20, 25, 32, 37])
result_json.append([2, 13, 19, 27, 40, 43])
result_json.append([4, 13, 17, 28, 39, 43])
return
def validate_fixed_balls(self, resources_path, ymd, fixed_balls):
"""
고정수 BallFilter 통과 여부를 검증한다.
Returns:
dict: total, passed_count, failed_count, draw_no, details
"""
lotto_history_json = os.path.join(resources_path, 'lotto_history.json')
ball_filter = BallFilter(lotto_history_json)
draw_no = ball_filter.getNextNo(ymd)
lotto_history_txt = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lotto_history_txt, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
prev_row = df_ball[df_ball['no'] == draw_no - 1].values.tolist()[0]
p_ball = prev_row[1:7]
details = []
passed_count = 0
for index, ball in enumerate(fixed_balls):
filter_type = ball_filter.filter(
ball=ball, no=draw_no, until_end=False, df=df_ball, p_ball=p_ball
)
passed = len(filter_type) == 0
if passed:
passed_count += 1
details.append({
'index': index + 1,
'ball': ball,
'passed': passed,
'filter_reasons': sorted(filter_type),
})
return {
'draw_no': draw_no,
'total': len(fixed_balls),
'passed_count': passed_count,
'failed_count': len(fixed_balls) - passed_count,
'details': details,
}
@staticmethod
def format_fixed_validation_summary(validation):
"""고정수 검증 결과를 Telegram/로그용 문자열로 변환한다."""
lines = [
" - 고정수 필터 검증: {}/{} 통과".format(
validation['passed_count'], validation['total']
)
]
if validation['failed_count'] > 0:
lines.append(
" - 필터 예외 포함: {}개 (고정수 유지)".format(
validation['failed_count']
)
)
for item in validation['details']:
if item['passed']:
continue
reason = item['filter_reasons'][0] if item['filter_reasons'] else 'unknown'
lines.append(
" * #{} {} -> {}".format(item['index'], item['ball'], reason)
)
return "\n".join(lines)
def _can_add_ball(self, ball, fixed_balls, selected_balls, max_overlap):
ball_set = set(ball)
for fixed_ball in fixed_balls:
if len(ball_set & set(fixed_ball)) > max_overlap:
return False
for selected_ball in selected_balls:
if len(ball_set & set(selected_ball)) > max_overlap:
return False
return True
@staticmethod
def _portfolio_number_counts(fixed_balls, selected_balls):
"""포트폴리오 내 번호 등장 횟수를 집계한다."""
counts = Counter()
for ball in fixed_balls + selected_balls:
counts.update(ball)
return counts
@staticmethod
def _coverage_priority(ball, number_counts):
"""낮을수록 포트폴리오에 덜 등장한 번호 위주 조합이다."""
return sum(number_counts.get(number, 0) for number in ball)
def _pick_best_candidate(self, unique_candidates, selected_keys, fixed_balls, selected, max_overlap):
"""겹침 제약을 만족하는 후보 중 번호 커버리지가 가장 넓은 조합을 고른다."""
number_counts = self._portfolio_number_counts(fixed_balls, selected)
best_candidate = None
best_score = None
best_key = None
for candidate in unique_candidates:
key = tuple(candidate)
if key in selected_keys:
continue
if not self._can_add_ball(candidate, fixed_balls, selected, max_overlap):
continue
score = self._coverage_priority(candidate, number_counts)
if best_candidate is None or score < best_score or (score == best_score and key < best_key):
best_candidate = candidate
best_score = score
best_key = key
return best_candidate, best_key
def select_portfolio(self, fixed_balls, candidates, target_count, shuffle_seed=None):
"""
2차 포트폴리오 선정:
- 중복 제거
- shuffle_seed 기반 셔플로 순서 편향 완화
- 고정수/선정수 간 중복도(겹치는 번호 수) 제약을 단계적으로 완화하며 선택
- 동률 후보는 번호 커버리지가 넓은 조합 우선
"""
unique_candidates = []
seen = set()
fixed_keys = {tuple(sorted(fixed_ball)) for fixed_ball in fixed_balls}
for candidate in candidates:
key = tuple(sorted(candidate))
if key in seen or key in fixed_keys:
continue
seen.add(key)
unique_candidates.append(list(key))
if shuffle_seed is not None:
rng = random.Random(int(shuffle_seed))
rng.shuffle(unique_candidates)
if target_count <= 0:
return []
if len(unique_candidates) <= target_count:
return unique_candidates
selected = []
selected_keys = set()
overlap_stages = [2, 3, 4, 5]
for max_overlap in overlap_stages:
while len(selected) < target_count:
best_candidate, best_key = self._pick_best_candidate(
unique_candidates, selected_keys, fixed_balls, selected, max_overlap
)
if best_candidate is None:
break
selected.append(best_candidate)
selected_keys.add(best_key)
if len(selected) >= target_count:
return selected
while len(selected) < target_count:
best_candidate, best_key = self._pick_best_candidate(
unique_candidates, selected_keys, fixed_balls, selected, max_overlap=6
)
if best_candidate is None:
break
selected.append(best_candidate)
selected_keys.add(best_key)
return selected
def predict2(self, resources_path, ymd, fixed_balls, max_games_per_draw=MAX_GAMES_PER_DRAW):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNextNo(ymd)
print("회차: {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
prev_row = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_ball = prev_row[1:7]
passed_candidates = []
for idx, ball in enumerate(itertools.combinations(candidates, 6)):
if idx % 1000000 == 0:
print(" - {} processed, pass: {}".format(idx, len(passed_candidates)))
ball = list(ball)
filter_type = ballFilter.filter(
ball=ball, no=no, until_end=False, df=df_ball, p_ball=p_ball
)
filter_size = len(filter_type)
if 0 < filter_size:
continue
passed_candidates.append(ball)
variable_target_count = max(0, max_games_per_draw - len(fixed_balls))
selected_candidates = self.select_portfolio(
fixed_balls=fixed_balls,
candidates=passed_candidates,
target_count=variable_target_count,
shuffle_seed=ymd,
)
p_no = prev_row[0]
return p_no, p_ball, selected_candidates, len(passed_candidates), variable_target_count
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
# 데이터 수집
#dataCrawler = DataCrawler()
#dataCrawler.excute(resources_path)
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
recommend_result_file = os.path.join(resources_path, "recommend_ball.biz_25.json")
if os.path.isfile(recommend_result_file):
with open(recommend_result_file, "r", encoding="utf-8") as result_fp:
result_json = json.load(result_fp)
result_json[ymd] = []
else:
result_json = {ymd: []}
# 매주 고정
fixed_balls = []
practice.predict1(fixed_balls)
fixed_validation = practice.validate_fixed_balls(
resources_path=resources_path,
ymd=ymd,
fixed_balls=fixed_balls,
)
print(Practice.format_fixed_validation_summary(fixed_validation))
result_json[ymd].extend(fixed_balls)
# 필터 기반 예측
p_no, p_ball, selected_candidates, passed_count, variable_target_count = practice.predict2(
resources_path=resources_path,
ymd=ymd,
fixed_balls=fixed_balls,
max_games_per_draw=MAX_GAMES_PER_DRAW
)
result_json[ymd].extend(selected_candidates)
if '_meta' not in result_json:
result_json['_meta'] = {}
result_json['_meta'][ymd] = {
'fixed_validation': fixed_validation,
'passed_count': passed_count,
'selected_count': len(selected_candidates),
'portfolio_shuffle_seed': ymd,
}
with open(recommend_result_file, 'w', encoding='utf-8') as outFp:
json.dump(result_json, outFp, ensure_ascii=False)
total_games = len(result_json[ymd])
total_cost = total_games * COST_PER_GAME
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
p_str += " - 고정수: {}\n".format(len(fixed_balls))
p_str += Practice.format_fixed_validation_summary(fixed_validation) + "\n"
p_str += " - 필터 통과 후보: {}\n".format(passed_count)
p_str += " - 추가 선정: {}개 (목표 {}개)\n".format(len(selected_candidates), variable_target_count)
p_str += " - 총 추천: {}개, 총 금액: {:,}원 (한도 {:,}원)\n".format(total_games, total_cost, MAX_BUDGET_KRW)
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
print("size: {}".format(total_games))
print("cost: {:,} KRW / limit: {:,} KRW".format(total_cost, MAX_BUDGET_KRW))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

File diff suppressed because it is too large Load Diff

View File

@@ -1,179 +0,0 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
from DataCrawler import DataCrawler
import json
import os
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from filter_model_1 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
def __init__(self, resources_path):
self.bot = TelegramBot()
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6,7,10,11,20,45])
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNextNo(ymd)
print("회차: {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
result_json.append(ball)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = p_ball[1:7]
return p_no, p_ball
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

View File

@@ -1,179 +0,0 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
from DataCrawler import DataCrawler
import json
import os
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from filter_model_2 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
def __init__(self, resources_path):
self.bot = TelegramBot()
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6,7,10,11,20,45])
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNextNo(ymd)
print("회차: {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
result_json.append(ball)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = p_ball[1:7]
return p_no, p_ball
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

View File

@@ -1,546 +0,0 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
from DataCrawler import DataCrawler
import json
import os
import copy
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from filter_model_3 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
TARGET_MIN_SURVIVORS = 30
TARGET_MAX_SURVIVORS = 150
PREDICT_TIMEOUT_SECONDS = 180
def __init__(self, resources_path):
self.bot = TelegramBot()
self.resources_path = resources_path
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6, 7, 10, 11, 20, 45])
result_json.append([2, 7, 17, 28, 35, 39])
result_json.append([6, 10, 19, 25, 33, 35])
result_json.append([3, 17, 20, 24, 35, 45])
result_json.append([5, 15, 18, 29, 36, 41])
result_json.append([6, 15, 20, 23, 37, 43])
result_json.append([8, 15, 19, 23, 38, 41])
result_json.append([5, 11, 19, 24, 40, 45])
result_json.append([9, 16, 18, 23, 35, 43])
result_json.append([7, 13, 19, 28, 33, 44])
result_json.append([7, 11, 18, 29, 37, 42])
print("회차(predict1)")
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNextNo(ymd)
print("회차(predict2): {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
result_json.append(ball)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = p_ball[1:7]
return p_no, p_ball
def predict3(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
no = BallFilter(lottoHistoryFileName).getNextNo(ymd)
print("회차(predict3): {}".format(no))
predict_start_ts = time.time()
deadline_ts = predict_start_ts + self.PREDICT_TIMEOUT_SECONDS
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = sorted(p_ball[1:7])
base_ruleset = self._get_base_ruleset()
tighten_rulesets = [
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": True,
"ban_triples_legacy": True,
"all_in_previous7": True,
"previous_neighbors": True,
},
allowed_overrides={
"ac_value": [8, 9],
"uniq_last_digit_count": [4, 5],
"even_count": [2, 3, 4],
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": True,
"ban_triples_legacy": True,
"all_in_previous7": True,
"previous_neighbors": True,
},
allowed_overrides={
"ac_value": [8, 9],
"uniq_last_digit_count": [4, 5],
"even_count": [2, 3, 4],
"sum": [112, 114, 121, 123, 126, 127, 131, 132, 138, 146, 148],
"sum_prev_diff": [13, 14, 17, 18, 26, 28, 29, 30, 32, 39, 40],
},
),
]
relax_rulesets = [
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
"previous_neighbors": False,
"all_in_previous7": False,
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
"previous_neighbors": False,
"all_in_previous7": False,
"weeks_8_count": False,
"weeks_12_count": False,
"weeks_16_count": False,
"weeks_20_count": False,
},
),
]
min_survivors = self.TARGET_MIN_SURVIVORS
max_survivors = self.TARGET_MAX_SURVIVORS
chosen = []
stage_name = "base"
current_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=base_ruleset,
stop_when_gt=max_survivors,
stage_name="base",
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
current = current_info["candidates"]
if current_info["timed_out"]:
chosen = self._finalize_on_timeout(current, p_ball, min_survivors, max_survivors)
stage_name = "base_timeout_fallback"
print("predict3 stage: {}, survivors: {}".format(stage_name, len(chosen)))
for ball in chosen:
result_json.append(ball)
return p_no, p_ball
if min_survivors <= len(current) <= max_survivors:
chosen = current
elif len(current) > max_survivors:
chosen = current
stage_name = "base_overflow"
for idx, rs in enumerate(tighten_rulesets, start=1):
t_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=rs,
stop_when_gt=max_survivors,
stage_name="tighten_{}".format(idx),
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
t = t_info["candidates"]
if t_info["timed_out"]:
chosen = self._finalize_on_timeout(t, p_ball, min_survivors, max_survivors)
stage_name = "tighten_{}_timeout_fallback".format(idx)
break
if min_survivors <= len(t) <= max_survivors:
chosen = t
stage_name = "tighten_{}".format(idx)
break
if len(t) <= max_survivors:
chosen = t
stage_name = "tighten_{}".format(idx)
if len(chosen) > max_survivors:
full_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=tighten_rulesets[-1],
stop_when_gt=None,
stage_name="tighten_full_rank",
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
full_for_ranking = full_info["candidates"]
if full_info["timed_out"]:
chosen = self._finalize_on_timeout(full_for_ranking, p_ball, min_survivors, max_survivors)
stage_name = "tighten_rank_timeout_fallback"
else:
chosen = self._rank_and_trim(full_for_ranking, p_ball, max_survivors)
stage_name = "tighten_rank_trim"
else:
chosen = current
stage_name = "base_underflow"
for idx, rs in enumerate(relax_rulesets, start=1):
r_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=rs,
stop_when_gt=None,
stop_when_gte=min_survivors,
stage_name="relax_{}".format(idx),
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
r = r_info["candidates"]
chosen = r
stage_name = "relax_{}".format(idx)
if r_info["timed_out"]:
chosen = self._finalize_on_timeout(r, p_ball, min_survivors, max_survivors)
stage_name = "relax_{}_timeout_fallback".format(idx)
break
if len(r) >= min_survivors:
break
if len(chosen) == 0:
stage_name = "relax_zero_fallback"
chosen = self._fallback_candidates_from_prev(p_ball, min_survivors)
elif len(chosen) < min_survivors:
stage_name = "{}_fill".format(stage_name)
fill = self._fallback_candidates_from_prev(
p_ball,
min_survivors - len(chosen),
exclude=set(tuple(x) for x in chosen),
)
chosen.extend(fill)
print("predict3 stage: {}, survivors: {}".format(stage_name, len(chosen)))
for ball in chosen:
result_json.append(ball)
return p_no, p_ball
def _get_base_ruleset(self):
history_json = os.path.join(self.resources_path, "lotto_history.json")
base_filter = BallFilter(history_json)
return copy.deepcopy(base_filter.m1.ruleset)
def _build_ruleset(self, base_ruleset, enabled_overrides=None, allowed_overrides=None):
ruleset = copy.deepcopy(base_ruleset)
ruleset.setdefault("filters", {})
enabled_overrides = enabled_overrides or {}
allowed_overrides = allowed_overrides or {}
for key, value in enabled_overrides.items():
ruleset["filters"].setdefault(key, {})
ruleset["filters"][key]["enabled"] = bool(value)
for key, values in allowed_overrides.items():
ruleset["filters"].setdefault(key, {})
ruleset["filters"][key]["enabled"] = True
ruleset["filters"][key]["allowed"] = list(values)
return ruleset
def _collect_candidates(
self,
candidates,
no,
df_ball,
ruleset,
stop_when_gt=None,
stop_when_gte=None,
stage_name="base",
predict_start_ts=None,
deadline_ts=None,
):
lottoHistoryFileName = os.path.join(self.resources_path, "lotto_history.json")
ballFilter = BallFilter(lottoHistoryFileName, ruleset=ruleset)
result = []
last_idx = 0
for idx, ball in enumerate(itertools.combinations(candidates, 6), start=1):
last_idx = idx
if deadline_ts is not None and deadline_ts <= time.time():
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
print(" - [{}] timeout after {:,} processed (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
return {"candidates": result, "timed_out": True, "processed": idx}
if idx % 1000000 == 0:
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
print(" - [{}] {:,} processed... (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
b = list(ball)
if len(ballFilter.filter(ball=b, no=no, until_end=False, df=df_ball)) == 0:
result.append(b)
if stop_when_gt is not None and len(result) > stop_when_gt:
return {"candidates": result, "timed_out": False, "processed": idx}
if stop_when_gte is not None and len(result) >= stop_when_gte:
return {"candidates": result, "timed_out": False, "processed": idx}
return {"candidates": result, "timed_out": False, "processed": last_idx}
def _finalize_on_timeout(self, partial_candidates, prev_ball, min_survivors, max_survivors):
chosen = list(partial_candidates)
if len(chosen) > max_survivors:
chosen = self._rank_and_trim(chosen, prev_ball, max_survivors)
elif len(chosen) < min_survivors:
fill = self._fallback_candidates_from_prev(
prev_ball,
min_survivors - len(chosen),
exclude=set(tuple(x) for x in chosen),
)
chosen.extend(fill)
return chosen
def _rank_and_trim(self, candidates, prev_ball, limit):
scored = [(self._score_candidate(ball, prev_ball), ball) for ball in candidates]
scored.sort(key=lambda x: x[0])
return [ball for _, ball in scored[:limit]]
def _score_candidate(self, ball, prev_ball):
sum_diff = abs(sum(ball) - sum(prev_ball))
even_cnt = len([x for x in ball if x % 2 == 0])
uniq_last = len(set([x % 10 for x in ball]))
contiguous_penalty = 0
s = sorted(ball)
for i in range(1, len(s)):
if s[i] - s[i - 1] == 1:
contiguous_penalty += 1
score = 0
score += sum_diff
score += abs(even_cnt - 3) * 2
score += abs(uniq_last - 5) * 2
score += contiguous_penalty
return score
def _fallback_candidates_from_prev(self, prev_ball, need_count, exclude=None):
exclude = exclude or set()
seed = sorted(prev_ball)
out = []
delta_patterns = [
(0, 0, 0, 0, 0, 0),
(-1, 0, 0, 0, 0, 1),
(0, -1, 0, 0, 1, 0),
(0, 0, -1, 1, 0, 0),
(-2, 0, 0, 0, 0, 2),
(0, -2, 0, 0, 2, 0),
(0, 0, -2, 2, 0, 0),
(-1, -1, 0, 0, 1, 1),
(1, 0, -1, 0, 0, 0),
(0, 1, 0, -1, 0, 0),
(1, -1, 1, -1, 1, -1),
(-1, 1, -1, 1, -1, 1),
]
shift = 0
while len(out) < need_count and shift <= 8:
for delta in delta_patterns:
cand = [seed[i] + delta[i] for i in range(6)]
cand = [min(45, max(1, v + shift)) for v in cand]
cand = sorted(cand)
if len(set(cand)) != 6:
continue
t = tuple(cand)
if t in exclude:
continue
exclude.add(t)
out.append(cand)
if len(out) >= need_count:
break
shift += 1
return out
def _merge_unique_balls(self, base_balls, extra_balls):
seen = set(tuple(sorted(x)) for x in base_balls)
for ball in extra_balls:
key = tuple(sorted(ball))
if key not in seen:
base_balls.append(list(ball))
seen.add(key)
return base_balls
def _sorted_unique_balls(self, balls):
"""
Normalize (sort within ball), de-duplicate, then sort lexicographically.
Returns List[List[int]].
"""
uniq = {}
for b in balls:
key = tuple(sorted(b))
uniq[key] = list(key)
return [list(t) for t in sorted(uniq.keys())]
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측(기존/신규): 결과는 합친 후 정렬해서 predict1 결과 뒤에 붙인다.
predict2_json = []
p_no, p_ball = practice.predict2(resources_path, ymd, predict2_json)
predict3_json = []
p_no3, p_ball3 = practice.predict3(resources_path, ymd, predict3_json)
merged_predict = []
practice._merge_unique_balls(merged_predict, predict2_json)
practice._merge_unique_balls(merged_predict, predict3_json)
merged_predict = practice._sorted_unique_balls(merged_predict)
# predict1 결과에 merged_predict를 정렬된 순서로 append(중복 제거)
practice._merge_unique_balls(result_json[ymd], merged_predict)
if p_no3 == p_no:
p_ball = p_ball3
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

View File

@@ -1,216 +0,0 @@
import os
import pandas as pd
import itertools
from filter_model_3 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def find_filter_method(self, df_ball, filter_ball=None):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
for i in range(len(df_ball)-1, 19, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
resources_path = 'resources'
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path)
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, filter_ball)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

View File

@@ -1,490 +0,0 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
import json
import os
import copy
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from filter_model_3 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
TARGET_MIN_SURVIVORS = 30
TARGET_MAX_SURVIVORS = 150
PREDICT_TIMEOUT_SECONDS = 180
def __init__(self, resources_path):
self.bot = TelegramBot()
self.resources_path = resources_path
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6, 7, 10, 11, 20, 45])
result_json.append([2, 7, 17, 28, 35, 39])
result_json.append([6, 10, 19, 25, 33, 35])
result_json.append([3, 17, 20, 24, 35, 45])
result_json.append([5, 15, 18, 29, 36, 41])
result_json.append([6, 15, 20, 23, 37, 43])
result_json.append([8, 15, 19, 23, 38, 41])
result_json.append([5, 11, 19, 24, 40, 45])
result_json.append([9, 16, 18, 23, 35, 43])
result_json.append([7, 13, 19, 28, 33, 44])
result_json.append([7, 11, 18, 29, 37, 42])
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
no = BallFilter(lottoHistoryFileName).getNextNo(ymd)
print("회차: {}".format(no))
predict_start_ts = time.time()
deadline_ts = predict_start_ts + self.PREDICT_TIMEOUT_SECONDS
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = sorted(p_ball[1:7])
# 기본/강화/완화 단계별 ruleset
base_ruleset = self._get_base_ruleset()
tighten_rulesets = [
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": True,
"ban_triples_legacy": True,
"all_in_previous7": True,
"previous_neighbors": True,
},
allowed_overrides={
"ac_value": [8, 9],
"uniq_last_digit_count": [4, 5],
"even_count": [2, 3, 4],
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": True,
"ban_triples_legacy": True,
"all_in_previous7": True,
"previous_neighbors": True,
},
allowed_overrides={
"ac_value": [8, 9],
"uniq_last_digit_count": [4, 5],
"even_count": [2, 3, 4],
"sum": [112, 114, 121, 123, 126, 127, 131, 132, 138, 146, 148],
"sum_prev_diff": [13, 14, 17, 18, 26, 28, 29, 30, 32, 39, 40],
},
),
]
relax_rulesets = [
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
"previous_neighbors": False,
"all_in_previous7": False,
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
"previous_neighbors": False,
"all_in_previous7": False,
"weeks_8_count": False,
"weeks_12_count": False,
"weeks_16_count": False,
"weeks_20_count": False,
},
),
]
min_survivors = self.TARGET_MIN_SURVIVORS
max_survivors = self.TARGET_MAX_SURVIVORS
chosen = []
stage_name = "base"
current_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=base_ruleset,
stop_when_gt=max_survivors,
stage_name="base",
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
current = current_info["candidates"]
if current_info["timed_out"]:
chosen = self._finalize_on_timeout(current, p_ball, min_survivors, max_survivors)
stage_name = "base_timeout_fallback"
print("candidate_stage: {}, survivors: {}".format(stage_name, len(chosen)))
for ball in chosen:
result_json.append(ball)
return p_no, p_ball
if min_survivors <= len(current) <= max_survivors:
chosen = current
elif len(current) > max_survivors:
chosen = current
stage_name = "base_overflow"
for idx, rs in enumerate(tighten_rulesets, start=1):
t_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=rs,
stop_when_gt=max_survivors,
stage_name="tighten_{}".format(idx),
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
t = t_info["candidates"]
if t_info["timed_out"]:
chosen = self._finalize_on_timeout(t, p_ball, min_survivors, max_survivors)
stage_name = "tighten_{}_timeout_fallback".format(idx)
break
if min_survivors <= len(t) <= max_survivors:
chosen = t
stage_name = "tighten_{}".format(idx)
break
if len(t) <= max_survivors:
chosen = t
stage_name = "tighten_{}".format(idx)
if len(chosen) > max_survivors:
# 상한 가드 강제 적용: 품질 점수 상위 N개만 사용
full_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=tighten_rulesets[-1],
stop_when_gt=None,
stage_name="tighten_full_rank",
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
full_for_ranking = full_info["candidates"]
if full_info["timed_out"]:
chosen = self._finalize_on_timeout(full_for_ranking, p_ball, min_survivors, max_survivors)
stage_name = "tighten_rank_timeout_fallback"
else:
chosen = self._rank_and_trim(full_for_ranking, p_ball, max_survivors)
stage_name = "tighten_rank_trim"
else:
chosen = current
stage_name = "base_underflow"
for idx, rs in enumerate(relax_rulesets, start=1):
# relax는 하한(min_survivors)만 채우면 충분하므로 조기 종료
r_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=rs,
stop_when_gt=None,
stop_when_gte=min_survivors,
stage_name="relax_{}".format(idx),
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
r = r_info["candidates"]
chosen = r
stage_name = "relax_{}".format(idx)
if r_info["timed_out"]:
chosen = self._finalize_on_timeout(r, p_ball, min_survivors, max_survivors)
stage_name = "relax_{}_timeout_fallback".format(idx)
break
if len(r) >= min_survivors:
break
if len(chosen) == 0:
# 0개 생존 방지: 가장 완화된 규칙에서도 0개면 직전 결과와 유사한 조합으로 최소 개수 확보
stage_name = "relax_zero_fallback"
chosen = self._fallback_candidates_from_prev(p_ball, min_survivors)
elif len(chosen) < min_survivors:
# 하한 가드: 부족분은 완화 후보/고정 후보 기반으로 보강
stage_name = "{}_fill".format(stage_name)
fill = self._fallback_candidates_from_prev(p_ball, min_survivors - len(chosen), exclude=set(tuple(x) for x in chosen))
chosen.extend(fill)
print("candidate_stage: {}, survivors: {}".format(stage_name, len(chosen)))
for ball in chosen:
result_json.append(ball)
return p_no, p_ball
def _get_base_ruleset(self):
history_json = os.path.join(self.resources_path, "lotto_history.json")
base_filter = BallFilter(history_json)
return copy.deepcopy(base_filter.m1.ruleset)
def _build_ruleset(self, base_ruleset, enabled_overrides=None, allowed_overrides=None):
ruleset = copy.deepcopy(base_ruleset)
ruleset.setdefault("filters", {})
enabled_overrides = enabled_overrides or {}
allowed_overrides = allowed_overrides or {}
for key, value in enabled_overrides.items():
ruleset["filters"].setdefault(key, {})
ruleset["filters"][key]["enabled"] = bool(value)
for key, values in allowed_overrides.items():
ruleset["filters"].setdefault(key, {})
ruleset["filters"][key]["enabled"] = True
ruleset["filters"][key]["allowed"] = list(values)
return ruleset
def _collect_candidates(
self,
candidates,
no,
df_ball,
ruleset,
stop_when_gt=None,
stop_when_gte=None,
stage_name="base",
predict_start_ts=None,
deadline_ts=None,
):
lottoHistoryFileName = os.path.join(self.resources_path, "lotto_history.json")
ballFilter = BallFilter(lottoHistoryFileName, ruleset=ruleset)
result = []
last_idx = 0
for idx, ball in enumerate(itertools.combinations(candidates, 6), start=1):
last_idx = idx
if deadline_ts is not None and deadline_ts <= time.time():
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
print(" - [{}] timeout after {:,} processed (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
return {
"candidates": result,
"timed_out": True,
"processed": idx,
}
if idx % 1000000 == 0:
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
print(" - [{}] {:,} processed... (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
b = list(ball)
if len(ballFilter.filter(ball=b, no=no, until_end=False, df=df_ball)) == 0:
result.append(b)
if stop_when_gt is not None and len(result) > stop_when_gt:
return {
"candidates": result,
"timed_out": False,
"processed": idx,
}
if stop_when_gte is not None and len(result) >= stop_when_gte:
return {
"candidates": result,
"timed_out": False,
"processed": idx,
}
return {
"candidates": result,
"timed_out": False,
"processed": last_idx,
}
def _finalize_on_timeout(self, partial_candidates, prev_ball, min_survivors, max_survivors):
chosen = list(partial_candidates)
if len(chosen) > max_survivors:
chosen = self._rank_and_trim(chosen, prev_ball, max_survivors)
elif len(chosen) < min_survivors:
fill = self._fallback_candidates_from_prev(
prev_ball,
min_survivors - len(chosen),
exclude=set(tuple(x) for x in chosen),
)
chosen.extend(fill)
return chosen
def _rank_and_trim(self, candidates, prev_ball, limit):
scored = [(self._score_candidate(ball, prev_ball), ball) for ball in candidates]
scored.sort(key=lambda x: x[0])
return [ball for _, ball in scored[:limit]]
def _score_candidate(self, ball, prev_ball):
sum_diff = abs(sum(ball) - sum(prev_ball))
even_cnt = len([x for x in ball if x % 2 == 0])
uniq_last = len(set([x % 10 for x in ball]))
contiguous_penalty = 0
s = sorted(ball)
for i in range(1, len(s)):
if s[i] - s[i - 1] == 1:
contiguous_penalty += 1
score = 0
score += sum_diff
score += abs(even_cnt - 3) * 2
score += abs(uniq_last - 5) * 2
score += contiguous_penalty
return score
def _fallback_candidates_from_prev(self, prev_ball, need_count, exclude=None):
exclude = exclude or set()
seed = sorted(prev_ball)
out = []
delta_patterns = [
(0, 0, 0, 0, 0, 0),
(-1, 0, 0, 0, 0, 1),
(0, -1, 0, 0, 1, 0),
(0, 0, -1, 1, 0, 0),
(-2, 0, 0, 0, 0, 2),
(0, -2, 0, 0, 2, 0),
(0, 0, -2, 2, 0, 0),
(-1, -1, 0, 0, 1, 1),
(1, 0, -1, 0, 0, 0),
(0, 1, 0, -1, 0, 0),
(1, -1, 1, -1, 1, -1),
(-1, 1, -1, 1, -1, 1),
]
shift = 0
while len(out) < need_count and shift <= 8:
for delta in delta_patterns:
cand = [seed[i] + delta[i] for i in range(6)]
cand = [min(45, max(1, v + shift)) for v in cand]
cand = sorted(cand)
if len(set(cand)) != 6:
continue
t = tuple(cand)
if t in exclude:
continue
exclude.add(t)
out.append(cand)
if len(out) >= need_count:
break
shift += 1
return out
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

View File

@@ -1,189 +0,0 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
from DataCrawler import DataCrawler
import json
import os
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from filter_model_3 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
def __init__(self, resources_path):
self.bot = TelegramBot()
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6, 7, 10, 11, 20, 45])
result_json.append([2, 7, 17, 28, 35, 39])
result_json.append([6, 10, 19, 25, 33, 35])
result_json.append([3, 17, 20, 24, 35, 45])
result_json.append([5, 15, 18, 29, 36, 41])
result_json.append([6, 15, 20, 23, 37, 43])
result_json.append([8, 15, 19, 23, 38, 41])
result_json.append([5, 11, 19, 24, 40, 45])
result_json.append([9, 16, 18, 23, 35, 43])
result_json.append([7, 13, 19, 28, 33, 44])
result_json.append([7, 11, 18, 29, 37, 42])
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNextNo(ymd)
print("회차: {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
result_json.append(ball)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = p_ball[1:7]
return p_no, p_ball
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

View File

@@ -1216,3 +1216,14 @@
{"returnValue": "success", "drwNoDate": "2026-03-21", "drwNo": 1216, "drwtNo1": 3, "drwtNo2": 10, "drwtNo3": 14, "drwtNo4": 15, "drwtNo5": 23, "drwtNo6": 24, "bnusNo": 25}
{"returnValue": "success", "drwNoDate": "2026-03-28", "drwNo": 1217, "drwtNo1": 8, "drwtNo2": 10, "drwtNo3": 15, "drwtNo4": 20, "drwtNo5": 29, "drwtNo6": 31, "bnusNo": 41}
{"returnValue": "success", "drwNoDate": "2026-04-04", "drwNo": 1218, "drwtNo1": 3, "drwtNo2": 28, "drwtNo3": 31, "drwtNo4": 32, "drwtNo5": 42, "drwtNo6": 45, "bnusNo": 25}
{"returnValue": "success", "drwNoDate": "2026-04-11", "drwNo": 1219, "drwtNo1": 1, "drwtNo2": 2, "drwtNo3": 15, "drwtNo4": 28, "drwtNo5": 39, "drwtNo6": 45, "bnusNo": 31}
{"returnValue": "success", "drwNoDate": "2026-04-18", "drwNo": 1220, "drwtNo1": 2, "drwtNo2": 22, "drwtNo3": 25, "drwtNo4": 28, "drwtNo5": 34, "drwtNo6": 43, "bnusNo": 16}
{"returnValue": "success", "drwNoDate": "2026-04-25", "drwNo": 1221, "drwtNo1": 6, "drwtNo2": 13, "drwtNo3": 18, "drwtNo4": 28, "drwtNo5": 30, "drwtNo6": 36, "bnusNo": 9}
{"returnValue": "success", "drwNoDate": "2026-05-02", "drwNo": 1222, "drwtNo1": 4, "drwtNo2": 11, "drwtNo3": 17, "drwtNo4": 22, "drwtNo5": 32, "drwtNo6": 41, "bnusNo": 34}
{"returnValue": "success", "drwNoDate": "2026-05-09", "drwNo": 1223, "drwtNo1": 16, "drwtNo2": 18, "drwtNo3": 20, "drwtNo4": 32, "drwtNo5": 33, "drwtNo6": 39, "bnusNo": 26}
{"returnValue": "success", "drwNoDate": "2026-05-16", "drwNo": 1224, "drwtNo1": 9, "drwtNo2": 18, "drwtNo3": 21, "drwtNo4": 27, "drwtNo5": 44, "drwtNo6": 45, "bnusNo": 28}
{"returnValue": "success", "drwNoDate": "2026-05-23", "drwNo": 1225, "drwtNo1": 8, "drwtNo2": 9, "drwtNo3": 19, "drwtNo4": 25, "drwtNo5": 41, "drwtNo6": 42, "bnusNo": 33}
{"returnValue": "success", "drwNoDate": "2026-05-30", "drwNo": 1226, "drwtNo1": 4, "drwtNo2": 6, "drwtNo3": 13, "drwtNo4": 17, "drwtNo5": 26, "drwtNo6": 28, "bnusNo": 41}
{"returnValue": "success", "drwNoDate": "2026-06-06", "drwNo": 1227, "drwtNo1": 1, "drwtNo2": 14, "drwtNo3": 16, "drwtNo4": 34, "drwtNo5": 41, "drwtNo6": 44, "bnusNo": 13}
{"returnValue": "success", "drwNoDate": "2026-06-13", "drwNo": 1228, "drwtNo1": 24, "drwtNo2": 29, "drwtNo3": 30, "drwtNo4": 31, "drwtNo5": 35, "drwtNo6": 44, "bnusNo": 1}
{"returnValue": "success", "drwNoDate": "2026-06-20", "drwNo": 1229, "drwtNo1": 12, "drwtNo2": 13, "drwtNo3": 29, "drwtNo4": 34, "drwtNo5": 37, "drwtNo6": 42, "bnusNo": 16}

View File

@@ -1204,7 +1204,7 @@
1204,8,16,28,30,31,44,27
1205,1,4,16,23,31,41,2
1206,1,3,17,26,27,42,23
1207,10,22,24,27,38,45,11
1207,10,22,24,27,38,45,21
1208,6,27,30,36,38,42,25
1209,2,17,20,35,37,39,24
1210,1,7,9,17,27,38,31
@@ -1216,3 +1216,14 @@
1216,3,10,14,15,23,24,25
1217,8,10,15,20,29,31,41
1218,3,28,31,32,42,45,25
1219,1,2,15,28,39,45,31
1220,2,22,25,28,34,43,16
1221,6,13,18,28,30,36,9
1222,4,11,17,22,32,41,34
1223,16,18,20,32,33,39,26
1224,9,18,21,27,44,45,28
1225,8,9,19,25,41,42,33
1226,4,6,13,17,26,28,41
1227,1,14,16,34,41,44,13
1228,24,29,30,31,35,44,1
1229,12,13,29,34,37,42,16

View File

@@ -1,99 +0,0 @@
import os
import time
import datetime
import pandas as pd
import itertools
from filter_model_1 import BallFilter
class FilterTestReview:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def validate(self, df_ball, nos=None):
win_history = {}
win_history_size = {}
for no in nos:
print("[{} 회차]".format(no))
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = balls[1:7].copy() # copy()로 복사
bonus = balls[7]
final_candidates = []
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
final_candidates.append(ball)
match = len(set(ball) & set(answer))
if match == 6:
if no not in win_history: # 중복 방지
win_history[no] = answer.copy() # copy()로 복사
if ball not in win_dic[1]: # 같은 조합 중복 방지
win_dic[1].append(ball.copy()) # copy()로 복사
else:
if match == 3:
win_dic[5].append(ball)
elif match == 4:
win_dic[4].append(ball)
elif match == 5:
# 2등 판별: 5개 맞고 보너스 번호 포함
if bonus in ball:
win_dic[2].append(ball)
else:
win_dic[3].append(ball)
win_history_size[no] = len(final_candidates)
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
return win_history, win_history_size
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filterTestReview = FilterTestReview(resources_path)
start = time.time()
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
win_history, win_history_size = filterTestReview.validate(
df_ball,
#nos=range(1126, 21, -1),
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print("{} 회 당첨".format(len(win_history)))
sorted_win_history = sorted(win_history.keys())
for i in range(len(sorted_win_history)):
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))

View File

@@ -1,99 +0,0 @@
import os
import time
import datetime
import pandas as pd
import itertools
from filter_model_2 import BallFilter
class FilterTestReview:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def validate(self, df_ball, nos=None):
win_history = {}
win_history_size = {}
for no in nos:
print("[{} 회차]".format(no))
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = balls[1:7].copy() # copy()로 복사
bonus = balls[7]
final_candidates = []
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
final_candidates.append(ball)
match = len(set(ball) & set(answer))
if match == 6:
if no not in win_history: # 중복 방지
win_history[no] = answer.copy() # copy()로 복사
if ball not in win_dic[1]: # 같은 조합 중복 방지
win_dic[1].append(ball.copy()) # copy()로 복사
else:
if match == 3:
win_dic[5].append(ball)
elif match == 4:
win_dic[4].append(ball)
elif match == 5:
# 2등 판별: 5개 맞고 보너스 번호 포함
if bonus in ball:
win_dic[2].append(ball)
else:
win_dic[3].append(ball)
win_history_size[no] = len(final_candidates)
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
return win_history, win_history_size
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filterTestReview = FilterTestReview(resources_path)
start = time.time()
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
win_history, win_history_size = filterTestReview.validate(
df_ball,
#nos=range(1126, 21, -1),
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print("{} 회 당첨".format(len(win_history)))
sorted_win_history = sorted(win_history.keys())
for i in range(len(sorted_win_history)):
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))

View File

@@ -1,99 +0,0 @@
import os
import time
import datetime
import pandas as pd
import itertools
from filter_model_3 import BallFilter
class FilterTestReview:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def validate(self, df_ball, nos=None):
win_history = {}
win_history_size = {}
for no in nos:
print("[{} 회차]".format(no))
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = balls[1:7].copy() # copy()로 복사
bonus = balls[7]
final_candidates = []
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
final_candidates.append(ball)
match = len(set(ball) & set(answer))
if match == 6:
if no not in win_history: # 중복 방지
win_history[no] = answer.copy() # copy()로 복사
if ball not in win_dic[1]: # 같은 조합 중복 방지
win_dic[1].append(ball.copy()) # copy()로 복사
else:
if match == 3:
win_dic[5].append(ball)
elif match == 4:
win_dic[4].append(ball)
elif match == 5:
# 2등 판별: 5개 맞고 보너스 번호 포함
if bonus in ball:
win_dic[2].append(ball)
else:
win_dic[3].append(ball)
win_history_size[no] = len(final_candidates)
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
return win_history, win_history_size
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filterTestReview = FilterTestReview(resources_path)
start = time.time()
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
win_history, win_history_size = filterTestReview.validate(
df_ball,
#nos=range(1126, 21, -1),
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print("{} 회 당첨".format(len(win_history)))
sorted_win_history = sorted(win_history.keys())
for i in range(len(sorted_win_history)):
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))

View File

@@ -1,17 +1,9 @@
#!/usr/bin/env bash
# miniconda 환경 ncue에서 Python으로 인자 실행: ./scripts/run_with_ncue.sh final_filterTest.py
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$ROOT"
for base in "${MINICONDA_HOME:-}" "$HOME/miniconda3" "$HOME/miniforge3" "$HOME/anaconda3" "$HOME/mambaforge"; do
[ -n "$base" ] || continue
c="$base/bin/conda"
if [ -x "$c" ]; then
exec "$c" run -n ncue -- python "$@"
fi
done
if [ -n "${CONDA_EXE:-}" ] && [ -x "$CONDA_EXE" ]; then
exec "$CONDA_EXE" run -n ncue -- python "$@"
fi
echo "conda ncue 환경을 찾지 못했습니다. 터미널에서: conda activate ncue && python \"\$@\"" >&2
exit 1
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
export PATH="${HOME}/miniconda3/bin:${HOME}/anaconda3/bin:/opt/anaconda3/bin:${PATH}"
source "$(conda info --base)/etc/profile.d/conda.sh"
conda activate ncue
cd "${REPO_ROOT}"
exec python "$@"

236
test_1.py
View File

@@ -1,236 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_1 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
for i in range(len(df_ball) - 1, -1, -1):
no = df_ball['no'].iloc[i]
no = int(no)
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
)
parser.add_argument("--start-no", type=int, default=1001)
parser.add_argument("--end-no", type=int, default=1204)
args = parser.parse_args()
resources_path = args.resources
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

236
test_2.py
View File

@@ -1,236 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_2 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
for i in range(len(df_ball) - 1, -1, -1):
no = df_ball['no'].iloc[i]
no = int(no)
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
)
parser.add_argument("--start-no", type=int, default=1001)
parser.add_argument("--end-no", type=int, default=1204)
args = parser.parse_args()
resources_path = args.resources
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

236
test_3.py
View File

@@ -1,236 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_3 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
for i in range(len(df_ball) - 1, -1, -1):
no = df_ball['no'].iloc[i]
no = int(no)
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
)
parser.add_argument("--start-no", type=int, default=1001)
parser.add_argument("--end-no", type=int, default=1204)
args = parser.parse_args()
resources_path = args.resources
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

View File

@@ -1,405 +0,0 @@
#!/usr/bin/env python3
"""
학습 구간(1~800회) 당첨번호로 final_BallFilter.extract_final_candidates 에 쓸 허용 집합을 계산합니다.
표준 라이브러리 + pandas(df 호환)만 사용합니다.
"""
from __future__ import annotations
import csv
import re
from collections import defaultdict
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
HISTORY = ROOT / "resources" / "lotto_history.txt"
BALLFILTER_SRC = ROOT / "BallFilter_25.py"
OUT = ROOT / "final_filter_params.py"
TRAIN_LO = 1
TRAIN_HI = 800
# 학습 분포에서 너무 넓은 합집합(union)을 피하기 위해 고유값 기준 백분위 밴드 후,
# 각 회차 특성값이 밴드 밖이면 해당 값을 다시 포함(학습 당첨 100% 커버).
# 좁을수록 필터가 강해짐. 학습·검증 균형은 이 값과 final_filterTest.py 결과로 조정.
PCT_LO = 8
PCT_HI = 92
PRIME = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43}
COMPOSITE = {4, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45}
def load_draws():
rows = []
with open(HISTORY, newline="", encoding="utf-8") as f:
for p in csv.reader(f):
if not p:
continue
no = int(p[0])
balls = sorted(int(x) for x in p[1:7])
rows.append((no, balls))
rows.sort(key=lambda x: x[0])
return {no: b for no, b in rows}
def get_ac(ball):
ac = set()
for i in range(5, -1, -1):
for j in range(i - 1, -1, -1):
ac.add(ball[i] - ball[j])
return len(ac) - (6 - 1)
def interval_sum(ball):
return sum(ball[i] - ball[i - 1] for i in range(1, 6))
def first_letter_sum(ball):
acc = [str(b)[0] for b in ball if len(str(b)) == 2]
return sum(int(x) for x in acc)
def last_letter_sum(ball):
acc = [str(b)[1] for b in ball if len(str(b)) == 2] + [str(b) for b in ball if len(str(b)) == 1]
return sum(int(x) for x in acc)
def uniq_end_digits(ball):
return len({b % 10 for b in ball})
def high_low(ball):
low = sum(1 for b in ball if b < 23)
high = sum(1 for b in ball if 23 < b)
return low, high
def section10_count(ball):
section = set()
for b in ball:
section.add(int(b / 10))
return len(section)
def count_mult(ball, m):
return sum(1 for b in ball if b % m == 0)
def continus_max(ball):
w = ball
best = 1
run = 1
for i in range(1, 6):
if w[i] == w[i - 1] + 1:
run += 1
best = max(best, run)
else:
run = 1
return best
def weeks_freq(draws_map, answer, no, week):
s = set()
for w in range(1, week + 1):
prev_no = no - w
if prev_no not in draws_map:
continue
for b in draws_map[prev_no]:
s.add(b)
return sum(1 for b in answer if b in s)
def pct_band_unique(values, lo=PCT_LO, hi=PCT_HI):
"""고유값 정렬 후 백분위 구간에 들어가는 값만 유지. 고유 개수가 적으면 전부 유지."""
if not values:
return set()
u = sorted(set(values))
if len(u) <= 6:
return set(u)
n = len(u)
il = int((lo / 100.0) * (n - 1))
ih = int((hi / 100.0) * (n - 1))
low_b, high_b = u[il], u[ih]
return {x for x in u if low_b <= x <= high_b}
def parse_pair_triple_rules():
"""BallFilter_25.filterPairBall / filterTriplePairBall 에서 규칙 추출."""
text = BALLFILTER_SRC.read_text(encoding="utf-8")
pairs = []
for m in re.finditer(r"len\(set_ball & \{([^}]+)\}\) == 2", text):
parts = [int(x.strip()) for x in m.group(1).split(",")]
if len(parts) == 2:
pairs.append(frozenset(parts))
triples = []
for m in re.finditer(r"len\(set_ball & \{([^}]+)\}\) == 3", text):
parts = [int(x.strip()) for x in m.group(1).split(",")]
if len(parts) == 3:
triples.append(frozenset(parts))
return pairs, triples
def main():
draws = load_draws()
pair_rules, triple_rules = parse_pair_triple_rules()
train_draws = {n: draws[n] for n in range(TRAIN_LO, TRAIN_HI + 1) if n in draws}
# 블랙리스트: 학습 당첨 6개에 함께 등장한 쌍/삼은 제외(당첨을 막지 않음)
train_pairs_seen = set()
train_triples_seen = set()
for b in train_draws.values():
for i in range(6):
for j in range(i + 1, 6):
train_pairs_seen.add(frozenset((b[i], b[j])))
for i in range(6):
for j in range(i + 1, 6):
for k in range(j + 1, 6):
train_triples_seen.add(frozenset((b[i], b[j], b[k])))
pair_block = [p for p in pair_rules if p not in train_pairs_seen]
triple_block = [t for t in triple_rules if t not in train_triples_seen]
sets = defaultdict(set)
flags_prev = {"need_relax_previous": False, "need_relax_prev7": False}
for no in range(2, TRAIN_HI + 1):
if no not in draws or (no - 1) not in draws:
continue
ball = draws[no]
p_ball = draws[no - 1]
s = sum(ball)
sets["sum6"].add(s)
sets["sum6_diff"].add(abs(s - sum(p_ball)))
avg = s // 6
pavg = sum(p_ball) // 6
sets["avg6"].add(avg)
sets["avg6_diff"].add(abs(avg - pavg))
s3f = ball[0] + ball[1] + ball[2]
ps3f = p_ball[0] + p_ball[1] + p_ball[2]
sets["sum3f"].add(s3f)
sets["sum3f_diff"].add(abs(s3f - ps3f))
s3b = ball[3] + ball[4] + ball[5]
ps3b = p_ball[3] + p_ball[4] + p_ball[5]
sets["sum3b"].add(s3b)
sets["sum3b_diff"].add(abs(s3b - ps3b))
l, h = high_low(ball)
sets["hl_allowed"].add((l, h))
gh = ball[0] + ball[5]
pgh = p_ball[0] + p_ball[5]
sets["go_sum"].add(gh)
sets["go_sum_diff"].add(abs(gh - pgh))
iv = interval_sum(ball)
piv = interval_sum(p_ball)
sets["interval"].add(iv)
sets["interval_diff"].add(abs(iv - piv))
fl = first_letter_sum(ball)
pfl = first_letter_sum(p_ball)
sets["first_letter"].add(fl)
sets["first_letter_diff"].add(abs(fl - pfl))
ll = last_letter_sum(ball)
pll = last_letter_sum(p_ball)
sets["last_letter"].add(ll)
sets["last_letter_diff"].add(abs(ll - pll))
sets["b0"].add(ball[0])
sets["b0_diff"].add(abs(ball[0] - p_ball[0]))
sets["b5"].add(ball[5])
sets["b5_diff"].add(abs(ball[5] - p_ball[5]))
sets["uniq_end"].add(uniq_end_digits(ball))
sets["uniq_end_diff"].add(abs(uniq_end_digits(ball) - uniq_end_digits(p_ball)))
ac = get_ac(ball)
pac = get_ac(p_ball)
sets["ac"].add(ac)
sets["ac_diff"].add(abs(ac - pac))
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
sets[f"mul{m}"].add(count_mult(ball, m))
sets[f"mul{m}_diff"].add(abs(count_mult(ball, m) - count_mult(p_ball, m)))
pn = len(set(ball) & PRIME)
sets["prime_n"].add(pn)
cn = len(set(ball) & COMPOSITE)
sets["composite_n"].add(cn)
sets["composite_diff"].add(abs(cn - len(set(p_ball) & COMPOSITE)))
ev = sum(1 for b in ball if b % 2 == 0)
pev = sum(1 for b in p_ball if b % 2 == 0)
sets["even_n"].add(ev)
sets["even_diff"].add(abs(ev - pev))
sc = section10_count(ball)
psc = section10_count(p_ball)
sets["sec10"].add(sc)
sets["sec10_diff"].add(abs(sc - psc))
for wk in (8, 12, 16, 20):
ex = weeks_freq(draws, ball, no, wk)
pex = weeks_freq(draws, p_ball, no, wk)
sets[f"w{wk}"].add(ex)
sets[f"w{wk}_diff"].add(abs(ex - pex))
sets["continus_max"].add(continus_max(ball))
# filterPreviousNumber (원본과 동일)
pb_set = set(p_ball)
bad_prev = True
for i in range(6):
bi = ball[i]
if bi in pb_set or bi - 1 in pb_set or bi + 1 in pb_set:
bad_prev = False
break
if bad_prev:
flags_prev["need_relax_previous"] = True
# filterAllPreivous7
pb7 = set()
for i in range(no - 1, no - 8, -1):
if i in draws:
for x in draws[i]:
pb7.add(x)
if len(set(ball) & pb7) == 6:
flags_prev["need_relax_prev7"] = True
# 백분위로 타이트닝 후 학습 각 회차 특성 보강
keys_numeric = [
"sum6",
"sum6_diff",
"avg6",
"avg6_diff",
"sum3f",
"sum3f_diff",
"sum3b",
"sum3b_diff",
"go_sum",
"go_sum_diff",
"interval",
"interval_diff",
"first_letter",
"first_letter_diff",
"last_letter",
"last_letter_diff",
"b0",
"b0_diff",
"b5",
"b5_diff",
"uniq_end",
"uniq_end_diff",
"ac",
"ac_diff",
"prime_n",
"composite_n",
"composite_diff",
"even_n",
"even_diff",
"sec10",
"sec10_diff",
]
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
keys_numeric.extend([f"mul{m}", f"mul{m}_diff"])
for wk in (8, 12, 16, 20):
keys_numeric.extend([f"w{wk}", f"w{wk}_diff"])
keys_numeric.append("continus_max")
for k in keys_numeric:
sets[k] = pct_band_unique(sets[k])
# 고저: (0,1)/(1,0) 만 제외하는 기존 로직 유지 + 학습에 나온 (l,h) 항상 허용
hl_skip = {(l, h) for l in (0, 1) for h in (0, 1)}
def emit():
lines = [
"# -*- coding: utf-8 -*-",
'"""학습 구간 {}~{}회 기준 자동 생성 — tools/compute_final_filter_params.py"""'.format(
TRAIN_LO, TRAIN_HI
),
"",
"TRAIN_RANGE = ({}, {})".format(TRAIN_LO, TRAIN_HI),
"DISABLE_FILTER_PREVIOUS_NUMBER = {}".format(
str(flags_prev["need_relax_previous"])
),
"DISABLE_FILTER_ALL_PREVIOUS_7 = {}".format(str(flags_prev["need_relax_prev7"])),
"",
]
def sset(name, key):
v = sets[key]
lines.append("{} = {}".format(name, repr(sorted(v))))
sset("ALLOW_SUM6", "sum6")
sset("ALLOW_SUM6_DIFF", "sum6_diff")
sset("ALLOW_AVG6", "avg6")
sset("ALLOW_AVG6_DIFF", "avg6_diff")
sset("ALLOW_SUM3F", "sum3f")
sset("ALLOW_SUM3F_DIFF", "sum3f_diff")
sset("ALLOW_SUM3B", "sum3b")
sset("ALLOW_SUM3B_DIFF", "sum3b_diff")
lines.append("HL_SKIP = {}".format(repr(sorted(hl_skip))))
lines.append("HL_SEEN = {}".format(repr(sorted(sets['hl_allowed']))))
sset("ALLOW_GO_SUM", "go_sum")
sset("ALLOW_GO_SUM_DIFF", "go_sum_diff")
sset("ALLOW_INTERVAL", "interval")
sset("ALLOW_INTERVAL_DIFF", "interval_diff")
sset("ALLOW_FIRST_LETTER", "first_letter")
sset("ALLOW_FIRST_LETTER_DIFF", "first_letter_diff")
sset("ALLOW_LAST_LETTER", "last_letter")
sset("ALLOW_LAST_LETTER_DIFF", "last_letter_diff")
sset("ALLOW_B0", "b0")
sset("ALLOW_B0_DIFF", "b0_diff")
sset("ALLOW_B5", "b5")
sset("ALLOW_B5_DIFF", "b5_diff")
sset("ALLOW_UNIQ_END", "uniq_end")
sset("ALLOW_UNIQ_END_DIFF", "uniq_end_diff")
sset("ALLOW_AC", "ac")
sset("ALLOW_AC_DIFF", "ac_diff")
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
sset("ALLOW_MUL{}".format(m), "mul{}".format(m))
sset("ALLOW_MUL{}_DIFF".format(m), "mul{}_diff".format(m))
sset("ALLOW_PRIME_N", "prime_n")
sset("ALLOW_COMPOSITE_N", "composite_n")
sset("ALLOW_COMPOSITE_DIFF", "composite_diff")
sset("ALLOW_EVEN_N", "even_n")
sset("ALLOW_EVEN_DIFF", "even_diff")
sset("ALLOW_SEC10", "sec10")
sset("ALLOW_SEC10_DIFF", "sec10_diff")
for wk in (8, 12, 16, 20):
sset("ALLOW_W{}".format(wk), "w{}".format(wk))
sset("ALLOW_W{}_DIFF".format(wk), "w{}_diff".format(wk))
sset("ALLOW_CONTINUS_MAX", "continus_max")
lines.append("PAIR_BLOCKLIST = {}".format(repr([sorted(list(x)) for x in pair_block])))
lines.append("TRIPLE_BLOCKLIST = {}".format(repr([sorted(list(x)) for x in triple_block])))
lines.extend(["", "# frozenset 캐시", ""])
allow_names = []
for line in list(lines):
if line.startswith("ALLOW_") and " = " in line:
name = line.split(" = ")[0]
allow_names.append(name)
for name in allow_names:
short = name.replace("ALLOW_", "", 1)
lines.append("_F_{} = frozenset({})".format(short, name))
lines.append("_F_HL_SEEN = frozenset(HL_SEEN)")
lines.append("")
return "\n".join(lines) + "\n"
OUT.write_text(emit(), encoding="utf-8")
print("Wrote", OUT)
print("pair rules:", len(pair_rules), "-> block", len(pair_block))
print("triple rules:", len(triple_rules), "-> block", len(triple_block))
print("DISABLE_FILTER_PREVIOUS_NUMBER", flags_prev["need_relax_previous"])
print("DISABLE_FILTER_ALL_PREVIOUS_7", flags_prev["need_relax_prev7"])
if __name__ == "__main__":
main()

View File

@@ -1,231 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_1 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
for i in range(len(df_ball)-1, 19, -1):
no = df_ball['no'].iloc[i]
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=1)
parser.add_argument("--end-no", type=int, default=800)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

View File

@@ -1,231 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_2 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
for i in range(len(df_ball)-1, 19, -1):
no = df_ball['no'].iloc[i]
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=1)
parser.add_argument("--end-no", type=int, default=800)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

View File

@@ -1,231 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_3 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
for i in range(len(df_ball)-1, 19, -1):
no = df_ball['no'].iloc[i]
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=1)
parser.add_argument("--end-no", type=int, default=800)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

View File

@@ -1,234 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_1 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# validation should use full history for previous-draw/window features
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# evaluate only requested range, but allow df_ball to contain full history
for i in range(len(df_ball) - 1, -1, -1):
no = int(df_ball['no'].iloc[i])
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=801)
parser.add_argument("--end-no", type=int, default=1000)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

View File

@@ -1,234 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_2 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# validation should use full history for previous-draw/window features
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# evaluate only requested range, but allow df_ball to contain full history
for i in range(len(df_ball) - 1, -1, -1):
no = int(df_ball['no'].iloc[i])
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=801)
parser.add_argument("--end-no", type=int, default=1000)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

View File

@@ -1,234 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_3 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# validation should use full history for previous-draw/window features
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# evaluate only requested range, but allow df_ball to contain full history
for i in range(len(df_ball) - 1, -1, -1):
no = int(df_ball['no'].iloc[i])
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=801)
parser.add_argument("--end-no", type=int, default=1000)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개