Compare commits
15 Commits
013206ef67
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
79121dafdb | ||
| af6b96fc12 | |||
|
|
b82b5a58ee | ||
|
|
aa0f925d4e | ||
|
|
0d27ee88e2 | ||
|
|
d3da7346cd | ||
|
|
c329c44643 | ||
| ae47258ed1 | |||
| 919f2e19bb | |||
| bd9eea2aee | |||
| a6b170fefa | |||
| e31eefef09 | |||
| b440ec96c9 | |||
| d08e906066 | |||
| 52e8495148 |
6
.gitignore
vendored
6
.gitignore
vendored
@@ -82,9 +82,6 @@ celerybeat-schedule
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# dotenv
|
||||
.env
|
||||
|
||||
# virtualenv
|
||||
.venv
|
||||
venv/
|
||||
@@ -99,3 +96,6 @@ ENV/
|
||||
# macOS metadata
|
||||
.DS_Store
|
||||
|
||||
# 로컬 추천 번호 산출물 (Git 추적 제외)
|
||||
resources/recommend_ball.biz_25.json
|
||||
|
||||
|
||||
275
DataCrawler.py
275
DataCrawler.py
@@ -4,6 +4,9 @@ import time
|
||||
import requests
|
||||
# JSON 포맷을 다루기 위한 라이브러리를 호출합니다.
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
import random
|
||||
import socket
|
||||
|
||||
import urllib3
|
||||
|
||||
@@ -19,8 +22,15 @@ except ModuleNotFoundError:
|
||||
def sendMsg(self, msg):
|
||||
pass
|
||||
|
||||
_LOTTO_URL = "https://www.dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}"
|
||||
_REQUEST_TIMEOUT = 15
|
||||
_LOTTO_URLS = (
|
||||
"https://www.dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}",
|
||||
"https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}",
|
||||
)
|
||||
_REQUEST_TIMEOUT = float(os.environ.get("LOTTO_REQUEST_TIMEOUT", "12"))
|
||||
_FETCH_RETRIES_PER_DRAW = int(os.environ.get("LOTTO_FETCH_RETRIES", "3"))
|
||||
_BACKOFF_BASE_SECONDS = float(os.environ.get("LOTTO_BACKOFF_BASE", "0.7"))
|
||||
_MAX_CONSECUTIVE_FETCH_FAILURES = int(os.environ.get("LOTTO_MAX_CONSEC_FAIL", "8"))
|
||||
_CONNECTION_PROBE_TIMEOUT = float(os.environ.get("LOTTO_PROBE_TIMEOUT", "3"))
|
||||
_BROWSER_HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
|
||||
@@ -51,33 +61,62 @@ class DataCrawler:
|
||||
self.bot = TelegramBot()
|
||||
self._session = requests.Session()
|
||||
self._session.headers.update(_BROWSER_HEADERS)
|
||||
self._last_fetch_error = ""
|
||||
|
||||
def _can_reach_lottery_host(self):
|
||||
"""
|
||||
API 호스트 TCP 연결 가능 여부를 빠르게 확인합니다.
|
||||
완전한 보장은 아니지만, 완전 차단 상태를 조기 감지해 불필요한 대기 시간을 줄입니다.
|
||||
"""
|
||||
for host in ("www.dhlottery.co.kr", "dhlottery.co.kr"):
|
||||
try:
|
||||
with socket.create_connection((host, 443), timeout=_CONNECTION_PROBE_TIMEOUT):
|
||||
return True
|
||||
except OSError:
|
||||
continue
|
||||
return False
|
||||
|
||||
def _fetch_draw(self, drw_no):
|
||||
"""동행복권 API에서 단일 회차 결과를 가져옵니다. 실패 시 None."""
|
||||
url = _LOTTO_URL.format(int(drw_no))
|
||||
for verify in (_ssl_verify_arg(), False):
|
||||
for method in ("POST", "GET"):
|
||||
try:
|
||||
res = self._session.request(
|
||||
method,
|
||||
url,
|
||||
timeout=_REQUEST_TIMEOUT,
|
||||
verify=verify,
|
||||
)
|
||||
if res.status_code != 200:
|
||||
continue
|
||||
text = res.text.strip()
|
||||
if not text.startswith("{"):
|
||||
continue
|
||||
result = json.loads(text)
|
||||
except (
|
||||
requests.RequestException,
|
||||
ValueError,
|
||||
json.JSONDecodeError,
|
||||
):
|
||||
continue
|
||||
if isinstance(result, dict) and result.get("returnValue") == "success":
|
||||
return result
|
||||
self._last_fetch_error = ""
|
||||
verify_options = (_ssl_verify_arg(), False)
|
||||
last_error = "unknown"
|
||||
for attempt in range(1, _FETCH_RETRIES_PER_DRAW + 1):
|
||||
for raw_url in _LOTTO_URLS:
|
||||
url = raw_url.format(int(drw_no))
|
||||
for verify in verify_options:
|
||||
for method in ("POST", "GET"):
|
||||
try:
|
||||
res = self._session.request(
|
||||
method,
|
||||
url,
|
||||
timeout=_REQUEST_TIMEOUT,
|
||||
verify=verify,
|
||||
)
|
||||
if res.status_code != 200:
|
||||
last_error = "http {}".format(res.status_code)
|
||||
continue
|
||||
text = res.text.strip()
|
||||
if not text.startswith("{"):
|
||||
last_error = "non-json response"
|
||||
continue
|
||||
result = json.loads(text)
|
||||
except (
|
||||
requests.RequestException,
|
||||
ValueError,
|
||||
json.JSONDecodeError,
|
||||
) as ex:
|
||||
last_error = str(ex)
|
||||
continue
|
||||
if isinstance(result, dict) and result.get("returnValue") == "success":
|
||||
return result
|
||||
rv = result.get("returnValue") if isinstance(result, dict) else "unknown"
|
||||
last_error = "api returnValue={}".format(rv)
|
||||
if attempt < _FETCH_RETRIES_PER_DRAW:
|
||||
# 지수 백오프 + 지터로 일시적 네트워크 혼잡 완화
|
||||
delay = _BACKOFF_BASE_SECONDS * (2 ** (attempt - 1)) + random.uniform(0, 0.25)
|
||||
time.sleep(delay)
|
||||
self._last_fetch_error = last_error
|
||||
return None
|
||||
|
||||
def _append_draw_files(self, lottoHistoryFile, result):
|
||||
@@ -120,6 +159,99 @@ class DataCrawler:
|
||||
return None
|
||||
return last_json.get("drwNo")
|
||||
|
||||
def _read_draw_map_from_json(self, json_path):
|
||||
"""
|
||||
JSONL 전체를 읽어 drwNo -> record 맵으로 반환합니다.
|
||||
잘못된 라인/중복 라인은 정리 대상이며, 마지막 유효값을 유지합니다.
|
||||
"""
|
||||
draw_map = {}
|
||||
if not os.path.isfile(json_path) or os.path.getsize(json_path) == 0:
|
||||
return draw_map
|
||||
|
||||
with open(json_path, "r", encoding="utf-8") as fp:
|
||||
for line in fp:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
data = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
if (
|
||||
isinstance(data, dict)
|
||||
and data.get("returnValue") == "success"
|
||||
and isinstance(data.get("drwNo"), int)
|
||||
):
|
||||
draw_map[data["drwNo"]] = data
|
||||
|
||||
return draw_map
|
||||
|
||||
def _write_draw_map_files(self, lottoHistoryFile, draw_map):
|
||||
"""
|
||||
drwNo 오름차순으로 json/txt를 재생성합니다.
|
||||
누락 회차 보강/중복 제거 후 일관된 파일 상태를 보장합니다.
|
||||
"""
|
||||
json_path = lottoHistoryFile + ".json"
|
||||
txt_path = lottoHistoryFile + ".txt"
|
||||
with open(json_path, "w", encoding="utf-8") as json_fp, open(
|
||||
txt_path, "w", encoding="utf-8"
|
||||
) as text_fp:
|
||||
for drw_no in sorted(draw_map.keys()):
|
||||
result = draw_map[drw_no]
|
||||
json_fp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
text_fp.write(
|
||||
"%d,%d,%d,%d,%d,%d,%d,%d\n"
|
||||
% (
|
||||
drw_no,
|
||||
result["drwtNo1"],
|
||||
result["drwtNo2"],
|
||||
result["drwtNo3"],
|
||||
result["drwtNo4"],
|
||||
result["drwtNo5"],
|
||||
result["drwtNo6"],
|
||||
result["bnusNo"],
|
||||
)
|
||||
)
|
||||
|
||||
def _get_last_week_draw_date(self):
|
||||
"""
|
||||
'지난 주' 기준 토요일 날짜를 반환합니다.
|
||||
예: 금요일(2026-05-08) 실행 시 직전 토요일(2026-05-02)
|
||||
"""
|
||||
now = datetime.now()
|
||||
days_since_saturday = (now.weekday() - 5) % 7
|
||||
latest_saturday = now.date() - timedelta(days=days_since_saturday)
|
||||
# 토요일이면서 추첨 전(20시 이전)이라면 지난 주 토요일을 목표로 사용
|
||||
if now.weekday() == 5 and now.hour < 20:
|
||||
latest_saturday = latest_saturday - timedelta(days=7)
|
||||
return latest_saturday
|
||||
|
||||
def _estimate_target_draw_no(self, draw_map):
|
||||
"""
|
||||
기존 데이터의 마지막 drwNoDate와 지난 주 토요일을 비교해
|
||||
이번 실행에서 확보해야 할 목표 회차를 계산합니다.
|
||||
"""
|
||||
if not draw_map:
|
||||
return None
|
||||
|
||||
last_no = max(draw_map.keys())
|
||||
last_data = draw_map[last_no]
|
||||
last_date_str = last_data.get("drwNoDate", "")
|
||||
try:
|
||||
last_date = datetime.strptime(last_date_str, "%Y-%m-%d").date()
|
||||
except ValueError:
|
||||
return last_no
|
||||
|
||||
target_date = self._get_last_week_draw_date()
|
||||
if target_date <= last_date:
|
||||
return last_no
|
||||
|
||||
week_gap = (target_date - last_date).days // 7
|
||||
if week_gap <= 0:
|
||||
return last_no
|
||||
|
||||
return last_no + week_gap
|
||||
|
||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일 (확장자 제외)
|
||||
def craw(self, lottoHistoryFile, drwNo=None):
|
||||
@@ -161,44 +293,97 @@ class DataCrawler:
|
||||
|
||||
def excute(self, resource_path):
|
||||
"""
|
||||
resources/lotto_history.* 를 마지막 회차 이후까지 연속으로 갱신합니다.
|
||||
(기존: 날짜 비교 + 1회차만 수집 → 누락·정지가 잦음)
|
||||
resources/lotto_history.* 를 지난 주 기준으로 누락 없이 동기화합니다.
|
||||
- 마지막 회차+1만 확인하지 않고, 1~목표회차 범위에서 누락 회차를 탐지/보강
|
||||
- 중복/깨진 라인을 정리해 json/txt를 일관 상태로 재생성
|
||||
"""
|
||||
lottoHistoryFile = os.path.join(resource_path, "lotto_history")
|
||||
json_path = lottoHistoryFile + ".json"
|
||||
|
||||
last_no = self._read_last_draw_from_json(json_path)
|
||||
if last_no is None:
|
||||
self.craw(lottoHistoryFile)
|
||||
draw_map = self._read_draw_map_from_json(json_path)
|
||||
|
||||
# 기존 이력이 비었거나 깨졌으면 전체 재수집(기존 동작 유지)
|
||||
if not draw_map:
|
||||
try:
|
||||
self.craw(lottoHistoryFile)
|
||||
self.bot.sendMsg("[Lottery Crawler] full history rebuilt (no valid json).")
|
||||
except Exception:
|
||||
pass
|
||||
return True
|
||||
|
||||
added = 0
|
||||
next_no = last_no + 1
|
||||
while True:
|
||||
result = self._fetch_draw(next_no)
|
||||
if result is None:
|
||||
break
|
||||
self._append_draw_files(lottoHistoryFile, result)
|
||||
added += 1
|
||||
next_no += 1
|
||||
time.sleep(0.35)
|
||||
target_no = self._estimate_target_draw_no(draw_map)
|
||||
if target_no is None:
|
||||
target_no = max(draw_map.keys())
|
||||
|
||||
if added == 0:
|
||||
if not self._can_reach_lottery_host():
|
||||
msg = "[Lottery Crawler] network blocked: cannot reach dhlottery host."
|
||||
print(msg)
|
||||
try:
|
||||
self.bot.sendMsg(msg)
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
missing_nos = [no for no in range(1, target_no + 1) if no not in draw_map]
|
||||
added = 0
|
||||
failed = []
|
||||
aborted_missing_nos = []
|
||||
consecutive_failure = 0
|
||||
fail_reasons = {}
|
||||
for no in missing_nos:
|
||||
result = self._fetch_draw(no)
|
||||
if result is None:
|
||||
failed.append(no)
|
||||
reason = self._last_fetch_error or "unknown"
|
||||
fail_reasons[reason] = fail_reasons.get(reason, 0) + 1
|
||||
consecutive_failure += 1
|
||||
if consecutive_failure >= _MAX_CONSECUTIVE_FETCH_FAILURES:
|
||||
aborted_missing_nos = [x for x in missing_nos if x > no]
|
||||
break
|
||||
continue
|
||||
draw_map[no] = result
|
||||
added += 1
|
||||
consecutive_failure = 0
|
||||
time.sleep(0.2)
|
||||
|
||||
# 누락 보강 또는 중복 정리 여지가 있으면 파일을 재생성
|
||||
self._write_draw_map_files(lottoHistoryFile, draw_map)
|
||||
|
||||
last_no = max(draw_map.keys())
|
||||
if added == 0 and not failed:
|
||||
try:
|
||||
self.bot.sendMsg(
|
||||
"[Lottery Crawler] up to date (last drwNo={}).".format(last_no)
|
||||
"[Lottery Crawler] up to date (last drwNo={}, target={}).".format(
|
||||
last_no, target_no
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
elif failed:
|
||||
sample = ",".join(str(x) for x in failed[:10])
|
||||
reason_items = sorted(fail_reasons.items(), key=lambda x: x[1], reverse=True)
|
||||
reason_str = "; ".join("{} x{}".format(reason, count) for reason, count in reason_items[:3])
|
||||
if aborted_missing_nos:
|
||||
reason_str += " | aborted {} pending draws due to consecutive failures".format(
|
||||
len(aborted_missing_nos)
|
||||
)
|
||||
try:
|
||||
self.bot.sendMsg(
|
||||
"[Lottery Crawler] appended {}, failed {} draw(s): {}{} | {}".format(
|
||||
added,
|
||||
len(failed),
|
||||
sample,
|
||||
"..." if len(failed) > 10 else "",
|
||||
reason_str or "no reason",
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
self.bot.sendMsg(
|
||||
"[Lottery Crawler] appended {} draw(s), last drwNo={}.".format(
|
||||
added, last_no + added
|
||||
"[Lottery Crawler] appended {} draw(s), last drwNo={}, target={}.".format(
|
||||
added, last_no, target_no
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
|
||||
14
PROMPT.txt
14
PROMPT.txt
@@ -1,11 +1,12 @@
|
||||
데이터는 다음과 같습니다.
|
||||
(학습 데이터)
|
||||
- train.json, train.txt
|
||||
- 1회차부터 800회차
|
||||
- lotto_history.txt에서 회차부터 800회차
|
||||
|
||||
(검증 데이터)
|
||||
- valid.json, valid.txt
|
||||
- 801회차부터 1000회차
|
||||
- lotto_history.txt에서 801회차부터 1000회차
|
||||
|
||||
(테스트 데이터)
|
||||
- lotto_history.txt에서 1001회차부터 이후 모두
|
||||
|
||||
파일 구조를 먼저 이해하세요.
|
||||
|
||||
@@ -109,4 +110,7 @@
|
||||
먼저 진행해야할 일에 대해서 생각하고 정리하세요.
|
||||
그리고 요구사항에 대해서 시도 방법을 설계하세요.
|
||||
그리고 반복적으로 실행해서 최적화된 방법을 찾아서 적용해주세요.
|
||||
(최적화는 언제든 학습 데이터로 최적화를 해야 합니다. 그리고 검증 데이터로 테스트만 수행하세요.)
|
||||
(최적화는 언제든 학습 데이터로 최적화를 해야 합니다. 그리고 검증 데이터로 테스트만 수행하세요.)
|
||||
|
||||
당첨번호에 대한 추천 개수가 100개 미만이어야 합니다.
|
||||
1_FilterTest_25.py, BallFilter_25.py를 참고해서 최적의 final_filterTest.py, final_BallFilter.py를 작성해 주세요.
|
||||
421
README.md
421
README.md
@@ -1,343 +1,112 @@
|
||||
# 실행 순서
|
||||
# deeplottery
|
||||
|
||||
## final_BallFilter · `final_filterTest.py` (miniconda **ncue**)
|
||||
`deeplottery`는 로또 번호를 예측하는 프로젝트가 아니라, **전체 조합(45C6)을 규칙 기반으로 필터링해 후보를 줄이는 시스템**입니다.
|
||||
핵심은 `BallFilter` 엔진이며, 운영 실행(`final_practice.py`)과 검증(`final_FilterTest.py`)이 분리되어 있습니다.
|
||||
|
||||
임계값은 `tools/compute_final_filter_params.py`가 학습 구간(1~800회) 분포에서 생성하며, 결과는 `final_filter_params.py`에 기록됩니다.
|
||||
## 목표와 설계 의도
|
||||
|
||||
- 목표: 통계/패턴 기반 규칙으로 비효율 조합을 제거하고 후보군을 관리 가능한 크기로 축소
|
||||
- 설계 의도:
|
||||
- 필터 규칙은 `final_BallFilter.py` 한 곳에서 관리
|
||||
- 운영 추천 생성과 과거 회차 검증을 분리하여 반복 개선
|
||||
- 같은 엔진을 운영/검증에서 공통 사용해 일관성 유지
|
||||
|
||||
## 전체 아키텍처
|
||||
|
||||
1. 데이터 수집/갱신
|
||||
- `DataCrawler.py`가 로또 API를 호출해 `resources/lotto_history.json`, `resources/lotto_history.txt` 갱신
|
||||
2. 필터 엔진 로딩
|
||||
- `final_BallFilter.py`의 `BallFilter`가 과거 당첨 이력을 메모리로 적재
|
||||
3. 운영 후보 생성
|
||||
- `final_practice.py`가 다음 회차 기준 전체 조합을 순회하며 `BallFilter.filter()`로 통과 조합만 저장
|
||||
4. 필터 성능 검증
|
||||
- `final_FilterTest.py`가 과거 당첨번호를 기준으로 어떤 필터가 당첨을 걸렀는지/통과시켰는지 분석
|
||||
|
||||
## 핵심 파일 설명
|
||||
|
||||
- `final_BallFilter.py`
|
||||
- 프로젝트 핵심 엔진
|
||||
- `extract_final_candidates()`에서 규칙 기반 탈락 사유(`set`)를 생성
|
||||
- `filter()`는 실사용 진입점이며, 반환 `set`이 비어 있으면 통과
|
||||
- `final_practice.py`
|
||||
- 운영 실행 스크립트
|
||||
- `predict1()` 고정 11조합을 유지
|
||||
- `predict2()`는 1차 필터 통과 조합을 만든 뒤, 2차 포트폴리오 선별로 최종 추천 수를 제한
|
||||
- 총 추천 개수는 고정수 포함 최대 70게임(70,000원) 상한을 적용
|
||||
- 결과를 `resources/recommend_ball.biz_25.json`에 저장하고 Telegram 전송
|
||||
- `final_FilterTest.py`
|
||||
- 검증/분석 스크립트
|
||||
- `find_filter_method()`로 회차별 필터 적중 통계 확인
|
||||
- `find_final_candidates()`로 특정 회차 후보군 재생성
|
||||
- `DataCrawler.py`
|
||||
- 과거 이력 파일 수집/보강
|
||||
- 네트워크 실패 재시도/백오프 및 누락 회차 보완 처리
|
||||
- `TelegramBot.py`
|
||||
- 추천 결과 메시지 전송
|
||||
- `resources/`
|
||||
- `lotto_history.json`: 회차별 원본 JSON 라인 데이터
|
||||
- `lotto_history.txt`: 회차별 CSV 형태 요약 데이터
|
||||
- `recommend_ball.biz_25.json`: 회차별 추천 결과 저장 파일
|
||||
|
||||
## 실행 방법
|
||||
|
||||
Python 실행은 Miniconda `ncue` 환경을 사용합니다.
|
||||
|
||||
```bash
|
||||
conda activate ncue
|
||||
python tools/compute_final_filter_params.py
|
||||
python final_filterTest.py
|
||||
python DataCrawler.py
|
||||
python final_FilterTest.py
|
||||
python final_practice.py
|
||||
```
|
||||
|
||||
conda 경로를 쓰기 어려우면 프로젝트의 `scripts/run_with_ncue.sh`로 동일하게 실행할 수 있습니다.
|
||||
## 동작 방식 상세
|
||||
|
||||
```bash
|
||||
./scripts/run_with_ncue.sh tools/compute_final_filter_params.py
|
||||
./scripts/run_with_ncue.sh final_filterTest.py
|
||||
```
|
||||
- 입력: 1~45에서 6개 조합 전체
|
||||
- 처리:
|
||||
- 1차: `BallFilter` 규칙 필터 적용
|
||||
- 2차: 고정 11조합을 유지한 채, 겹침도 기반 포트폴리오 선별로 후보 축소
|
||||
- 출력:
|
||||
- 탈락 조합: 탈락 사유 집합 반환
|
||||
- 최종 추천 조합: 예산 상한(최대 70,000원) 내에서 저장/전송
|
||||
|
||||
* FilterFeature.py를 실행한다.
|
||||
* lotto_history.json을 읽어서 all_filter_[1-100].[cluster,csv,feature] 파일을 생성한다.
|
||||
주요 규칙 범주 예시:
|
||||
|
||||
- 합/평균 및 전주 대비 차이
|
||||
- 앞 3개/뒤 3개 합 패턴
|
||||
- 고저 비율, 끝자리 패턴, AC 값
|
||||
- 배수 개수(3/4/5/6 등)
|
||||
- 최근 N주 출현 빈도/중복 관련 규칙
|
||||
- 비선호 2개/3개 조합 제거 규칙
|
||||
|
||||
* FilterFeatureCluster.py를 실행한다.
|
||||
* 첫수는 1~10까지만 허용한다.
|
||||
* random_state 전체 내 각 cluster에 대해서 당첨 회수를 파악하여 ./resources/cluster_win_info.csv 파일을 생성한다.
|
||||
* 생성 파일
|
||||
* filtertest_1.csv: random_state 내 cluster 개수를 파악한다.
|
||||
* filtertest_2.csv: random_state 내 cluster 개수 별 전체 당첨 회수를 파악한다.
|
||||
* filtertest_3.csv: random_state 내 cluster 개수 별 최초 당첨 번호만 파악한다.
|
||||
## 디렉터리 현재 상태 (2026-05-08)
|
||||
|
||||
### 최상위 파일/디렉터리 현황
|
||||
|
||||
* 실행할 random_state와 cluster 번호 파악
|
||||
* filtertest_2.csv과 answer_pattern_analsys.xlsx을 이용하여 선별한다.
|
||||
- 실행/핵심
|
||||
- `DataCrawler.py`
|
||||
- `final_BallFilter.py`
|
||||
- `final_FilterTest.py`
|
||||
- `final_practice.py`
|
||||
- `final_Practice.py` (구버전 스크립트)
|
||||
- `TelegramBot.py`
|
||||
- 레거시 참조 파일
|
||||
- `BallFilter_22.py`, `BallFilter_25.py`
|
||||
- `1_FilterTest_22.py`, `1_FilterTest_25.py`
|
||||
- `2_FilterTestReview_22.py`, `2_FilterTestReview_25.py`
|
||||
- `3_Practice_22.py`, `3_Practice_25.py`
|
||||
- `fixed10.py`
|
||||
- 데이터/설정
|
||||
- `resources/`
|
||||
- `requirements.txt`
|
||||
- `scripts/`
|
||||
|
||||
### Git 작업 트리 상태(요약)
|
||||
|
||||
* cluster_info.json 파일 업데이트
|
||||
* 실행할 random_state와 cluster 번호를 json 형태로 등록한다.
|
||||
- 수정됨: `DataCrawler.py`, `README.md`, `final_BallFilter.py`, `final_practice.py`, `resources/lotto_history.json`, `resources/lotto_history.txt`
|
||||
- 삭제됨: `filter_model.py`, `final_filter_params.py`, `train.py`, `valid.py`
|
||||
- 신규(미추적): `resources/recommend_ball.biz_25.json`
|
||||
|
||||
## 주의 사항
|
||||
|
||||
* Util_filegen.py 실행
|
||||
* m1, amd, intel 컴퓨터에서 실행할 sh, bat 파일을 생성한다.
|
||||
* 파이썬 내에서 아래 두 부분만 수정하면 된다.
|
||||
* m1_file_max, amd_file_max, intel_file_max = 8,12,7
|
||||
* m1_proc_limit, amd_proc_limit, intel_proc_limit = 124,125,110
|
||||
|
||||
|
||||
* 각 장비에서 sh와 bat 파일 실행
|
||||
|
||||
|
||||
## Ruleset(임계값 설정) 기반으로 운영하기
|
||||
|
||||
`filter_model.BallFilter`의 주요 임계값(합/평균/앞3합/뒤3합/간격 등)을 **JSON ruleset**으로 외부화했습니다.
|
||||
이제 “코드 수정 없이” ruleset 파일만 바꿔서 실험/튜닝을 자동화할 수 있습니다.
|
||||
|
||||
- **기본 ruleset 경로**: `resources/rulesets/default.json`
|
||||
- **주의/한계**: 로또는 본질적으로 랜덤(독립/균등 가설)이며, ruleset은 “구매 조합 수를 줄이기 위한 필터”입니다. **당첨 보장/예측을 주장하지 않습니다.**
|
||||
|
||||
### valid 성능 확인 예시
|
||||
|
||||
```bash
|
||||
python scripts/eval_filters.py \
|
||||
--data valid \
|
||||
--resources resources \
|
||||
--ruleset resources/rulesets/default.json \
|
||||
--start-no 801 --end-no 1000 \
|
||||
--survivors-samples 0
|
||||
```
|
||||
|
||||
### survivors(생존 조합 수) 근사 포함 예시
|
||||
|
||||
```bash
|
||||
python scripts/eval_filters.py \
|
||||
--data valid \
|
||||
--resources resources \
|
||||
--ruleset resources/rulesets/default.json \
|
||||
--start-no 801 --end-no 1000 \
|
||||
--survivors-samples 3000
|
||||
```
|
||||
|
||||
## 자동 튜닝 → ruleset 생성 → 일괄 평가 파이프라인
|
||||
|
||||
### 1) train 기반 자동 튜닝(후보 ruleset 생성)
|
||||
|
||||
아래 스크립트는 **train 구간에서만** 임계값을 랜덤 탐색으로 튜닝한 뒤,
|
||||
`resources/rulesets/`에 `Balanced.json`, `Coverage-First.json`을 저장합니다.
|
||||
|
||||
```bash
|
||||
python scripts/tune_ruleset.py \
|
||||
--resources resources \
|
||||
--base-ruleset resources/rulesets/default.json \
|
||||
--out-dir resources/rulesets \
|
||||
--train-start 21 --train-end 800 \
|
||||
--hit-rate-min 0.01 \
|
||||
--iters 200 \
|
||||
--mc-samples 40000
|
||||
```
|
||||
|
||||
- **Coverage-First**: survivors(생존 조합 수) 최소화를 우선
|
||||
- **Balanced**: survivors를 줄이되 hit-rate도 함께 고려
|
||||
|
||||
> 주의: survivors는 전수(8,145,060조합) 대신 **풀링 Monte Carlo**로 근사하므로 오차가 있습니다.
|
||||
|
||||
### 2) valid/train 구간에서 ruleset 일괄 평가
|
||||
|
||||
```bash
|
||||
python scripts/eval_rulesets.py \
|
||||
--resources resources \
|
||||
--rulesets-dir resources/rulesets \
|
||||
--data valid \
|
||||
--start-no 801 --end-no 1000 \
|
||||
--survivors-samples 0
|
||||
```
|
||||
|
||||
# Query
|
||||
```SQL
|
||||
##### 전체 진행 확인 #####
|
||||
|
||||
with source_count as (
|
||||
select source, count(*) as source_count
|
||||
from cluster_info
|
||||
where priority not in (99)
|
||||
and source in (1,3)
|
||||
group by 1
|
||||
),
|
||||
ball_count as (
|
||||
# 1) random_state, cluster 별 추천볼 개수
|
||||
select source, random_state, cluster, ball_cnt
|
||||
from (
|
||||
SELECT source, random_state, cluster, count(*)
|
||||
as ball_cnt
|
||||
from recommend_ball
|
||||
where no=1136
|
||||
and b1 > 0
|
||||
group by 1,2,3
|
||||
union all
|
||||
SELECT source, random_state, cluster, 0 as ball_cnt
|
||||
from recommend_ball
|
||||
where no=1136
|
||||
and b1 = 0
|
||||
group by 1,2,3
|
||||
) lj
|
||||
),
|
||||
source_rc_cluster_list as (
|
||||
select ci.source, ci.random_state, ci.cluster, ci.cluster_count, ci.win_count, ci.priority, rc.source_count, bc.ball_cnt
|
||||
from cluster_info ci
|
||||
left join source_count rc on ci.source = rc.source
|
||||
left join ball_count bc on ci.source = bc.source and ci.random_state = bc.random_state and ci.cluster = bc.cluster
|
||||
where ci.priority not in (99)
|
||||
and ci.source in (1,3)
|
||||
),
|
||||
source_process as (
|
||||
select source, "done" as type, count(*) as cnt from source_rc_cluster_list
|
||||
where ball_cnt is not NULL
|
||||
group by 1,2
|
||||
union all
|
||||
select source, "yet" as type, count(*) as cnt from source_rc_cluster_list
|
||||
where ball_cnt is NULL
|
||||
group by 1,2
|
||||
)
|
||||
select source, type, cnt,
|
||||
case when source=1 then concat(round(100.0 * cnt / (select source_count from source_count where source=1),2), '%')
|
||||
when source=3 then concat(round(100.0 * cnt / (select source_count from source_count where source=3),2), '%')
|
||||
end as rate from source_process order by 1,2
|
||||
;
|
||||
|
||||
### 미진행 클러스터 확인 ###
|
||||
SELECT ci.source, ci.random_state, ci.cluster, lj.cnt
|
||||
from cluster_info ci
|
||||
left join (select source, random_state, cluster, count(*) as cnt from recommend_ball rb where no=1136 group by 1,2,3) lj on ci.source=lj.source and ci.random_state=lj.random_state and ci.cluster=lj.cluster
|
||||
where priority not in (99)
|
||||
and lj.cnt is null
|
||||
order by 1,2,3
|
||||
;
|
||||
|
||||
|
||||
|
||||
|
||||
##### 전체 cluster 확인 #####
|
||||
|
||||
with raw_data as (
|
||||
select rb.source, ci.priority, rb.random_state, rb.cluster, ci.cluster_count, ci.win_count, b1, count(*) as ball_cnt
|
||||
from recommend_ball rb left join cluster_info ci on rb.source=ci.source and rb.random_state = ci.random_state and rb.cluster = ci.cluster
|
||||
where no=1136
|
||||
group by 1,2,3,4,5,6,7
|
||||
),
|
||||
all_cluster as (
|
||||
select source, priority, random_state, cluster, ball_cnt
|
||||
from raw_data
|
||||
where (
|
||||
(source = 1 and priority in (1,2)) or
|
||||
(source = 3 and priority in (1,2))
|
||||
)
|
||||
group by 1,2,3,4
|
||||
),
|
||||
valid_total_cluster as (
|
||||
select source, priority, random_state, cluster, ball_cnt
|
||||
from raw_data
|
||||
where (
|
||||
(source = 1 and priority = 1 and
|
||||
ball_cnt BETWEEN 50 and 80
|
||||
) or
|
||||
(source = 1 and priority = 2 and (
|
||||
win_count = 12 and ball_cnt BETWEEN 50 and 80)
|
||||
) or
|
||||
(source = 3 and priority = 1 and
|
||||
(ball_cnt BETWEEN 1 and 30 or ball_cnt BETWEEN 50 and 100)
|
||||
) or
|
||||
(source = 3 and priority = 2 and (
|
||||
win_count=13 and (ball_cnt BETWEEN 1 and 30 or ball_cnt BETWEEN 50 and 100))
|
||||
) or
|
||||
(source = 1 and
|
||||
((win_count between 5 and 10) and ball_cnt BETWEEN 1 and 20)
|
||||
)
|
||||
)
|
||||
group by 1,2,3,4
|
||||
),
|
||||
valid_none_0_cluster as (
|
||||
select source, priority, random_state, cluster, ball_cnt
|
||||
from raw_data
|
||||
where b1 <> 0 AND
|
||||
(
|
||||
(source = 1 and priority = 1 and
|
||||
ball_cnt BETWEEN 50 and 80
|
||||
) or
|
||||
(source = 1 and priority = 2 and (
|
||||
win_count = 12 and ball_cnt BETWEEN 50 and 80)
|
||||
) or
|
||||
(source = 3 and priority = 1 and
|
||||
(ball_cnt BETWEEN 1 and 30 or ball_cnt BETWEEN 50 and 100)
|
||||
) or
|
||||
(source = 3 and priority = 2 and (
|
||||
win_count=13 and (ball_cnt BETWEEN 1 and 30 or ball_cnt BETWEEN 50 and 100))
|
||||
) or
|
||||
(source = 1 and
|
||||
((win_count between 5 and 10) and ball_cnt BETWEEN 1 and 20)
|
||||
)
|
||||
)
|
||||
group by 1,2,3,4
|
||||
)
|
||||
# 전체 클러스터 개수
|
||||
select 1 as col, count(*) from all_cluster
|
||||
union all
|
||||
# 조건에 해당하는 클러스터 개수
|
||||
select 2 as col, count(*) from valid_total_cluster
|
||||
union all
|
||||
# 조건에 해당하는 클러스터 중 추천이 0이 아닌 유효한 클러스터 개수
|
||||
select 3 as col, count(*) from valid_none_0_cluster
|
||||
;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
##### 가장 많은 추천볼 #####
|
||||
select b1,b2,b3,b4,b5,b6,count(*) as ball_cnt
|
||||
from recommend_ball
|
||||
where no=1136
|
||||
and b1>0
|
||||
group by 1,2,3,4,5,6
|
||||
order by 7 desc;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
##### 추천볼 분석 #####
|
||||
|
||||
with priority as (
|
||||
select source, random_state, cluster, cluster_count, win_count, priority
|
||||
from cluster_info
|
||||
where priority not in (99)
|
||||
),
|
||||
recommend as (
|
||||
select source, random_state, cluster, b1,b2,b3,b4,b5,b6
|
||||
from recommend_ball
|
||||
where b1 > 0
|
||||
and no=1136
|
||||
),
|
||||
recommend_count as (
|
||||
select source, random_state, cluster, count(*) as ball_cnt
|
||||
from recommend_ball
|
||||
where b1 > 0
|
||||
and no=1136
|
||||
group by 1,2,3
|
||||
),
|
||||
raw_data as (
|
||||
select r.source, r.random_state, r.cluster, p.cluster_count, p.win_count, p.priority, r.b1,r.b2,r.b3,r.b4,r.b5,r.b6, rc.ball_cnt
|
||||
from recommend r
|
||||
left join priority p on r.source=p.source and r.random_state=p.random_state and r.cluster=p.cluster
|
||||
left join recommend_count rc on r.source=rc.source and r.random_state=rc.random_state and r.cluster=rc.cluster
|
||||
),
|
||||
candidate as (
|
||||
select source, random_state, cluster, cluster_count, win_count, priority, b1,b2,b3,b4,b5,b6, ball_cnt
|
||||
from raw_data
|
||||
where (
|
||||
(source = 0 and b1=7)
|
||||
or (source = 1 and priority=-1 and ball_cnt<=140 and (
|
||||
b1 not in (13, 19, 28)
|
||||
and b2 not in (13, 19, 28)
|
||||
and b3 not in (13, 19, 28)
|
||||
and b4 not in (13, 19, 28)
|
||||
and b5 not in (13, 19, 28)
|
||||
and b6 not in (13, 19, 28)
|
||||
)
|
||||
)
|
||||
or (source = 3 and priority=-1 and ball_cnt<=150 and (
|
||||
b1 not in (13, 19, 28)
|
||||
and b2 not in (13, 19, 28)
|
||||
and b3 not in (13, 19, 28)
|
||||
and b4 not in (13, 19, 28)
|
||||
and b5 not in (13, 19, 28)
|
||||
and b6 not in (13, 19, 28)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
#select source, random_state,cluster,b1,b2,b3,b4,b5,b6 from candidate order by 4,5,6,7,8,9;
|
||||
, duplication as (
|
||||
# 히스트업한 추천볼에서 중복을 제거함34
|
||||
select source, random_state, cluster, cluster_count, win_count, priority, b1,b2,b3,b4,b5,b6, ball_cnt
|
||||
from (
|
||||
select source, random_state, cluster, cluster_count, win_count, priority, b1,b2,b3,b4,b5,b6, ball_cnt,
|
||||
ROW_NUMBER() OVER(PARTITION BY b1,b2,b3,b4,b5,b6 ORDER BY b1,b2,b3,b4,b5,b6) AS rnk
|
||||
from candidate
|
||||
) a
|
||||
where rnk=1
|
||||
order by source,random_state,cluster,b1,b2,b3,b4,b5,b6
|
||||
)
|
||||
select count(*) as cnt from duplication;
|
||||
#select source, priority, random_state, cluster, win_count, count(*) as cnt from duplication group by 1,2,3;
|
||||
#select b1, count(*) as ball_cnt from duplication group by 1
|
||||
#select b6, count(*) as ball_cnt from duplication group by 1
|
||||
#select source,random_state,cluster,b1,b2,b3,b4,b5,b6 from duplication order by 4,5,6,7,8,9;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
```
|
||||
- 이 프로젝트는 통계적 휴리스틱 기반의 후보 축소 도구이며 당첨을 보장하지 않습니다.
|
||||
- 필터가 강해질수록 후보 수는 줄지만, 실제 당첨 조합을 배제할 위험도 함께 증가합니다.
|
||||
|
||||
4439
filter_model_1.py
4439
filter_model_1.py
File diff suppressed because it is too large
Load Diff
1256
filter_model_2.py
1256
filter_model_2.py
File diff suppressed because it is too large
Load Diff
@@ -1,94 +0,0 @@
|
||||
"""
|
||||
filter_model_3.py
|
||||
|
||||
OR-composed BallFilter:
|
||||
- A candidate ball is ACCEPTED if it passes EITHER filter_model_1 OR filter_model_2.
|
||||
- A candidate ball is REJECTED only if it fails BOTH.
|
||||
|
||||
This keeps the same public interface used across the project:
|
||||
BallFilter(lottoHistoryFileName, ruleset_path=..., ruleset=...)
|
||||
.filter(ball, no, until_end=False, df=None, filter_ball=None) -> set[str]
|
||||
.extract_final_candidates(ball, no=None, until_end=False, df=None) -> set[str]
|
||||
|
||||
Notes:
|
||||
- The underlying filters return a non-empty set of failure reasons when rejected.
|
||||
- Callers treat "len(result) == 0" as PASS.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import filter_model_1 as fm1
|
||||
import filter_model_2 as fm2
|
||||
|
||||
|
||||
class BallFilter:
|
||||
"""
|
||||
OR composition of filter_model_1.BallFilter and filter_model_2.BallFilter.
|
||||
|
||||
- If model1 PASSES OR model2 PASSES -> return empty set()
|
||||
- If both FAIL -> return union of reasons (prefixed for debugging)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
lottoHistoryFileName: Optional[str] = None,
|
||||
# Backward compatible single ruleset knobs (applied to both if specific ones not provided)
|
||||
ruleset_path: Optional[str] = None,
|
||||
ruleset: Optional[Dict[str, Any]] = None,
|
||||
# Optional per-model overrides
|
||||
ruleset_path_1: Optional[str] = None,
|
||||
ruleset_path_2: Optional[str] = None,
|
||||
ruleset_1: Optional[Dict[str, Any]] = None,
|
||||
ruleset_2: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
rp1 = ruleset_path_1 if ruleset_path_1 is not None else ruleset_path
|
||||
rp2 = ruleset_path_2 if ruleset_path_2 is not None else ruleset_path
|
||||
r1 = ruleset_1 if ruleset_1 is not None else ruleset
|
||||
r2 = ruleset_2 if ruleset_2 is not None else ruleset
|
||||
|
||||
self.m1 = fm1.BallFilter(lottoHistoryFileName, ruleset_path=rp1, ruleset=r1)
|
||||
self.m2 = fm2.BallFilter(lottoHistoryFileName, ruleset_path=rp2, ruleset=r2)
|
||||
|
||||
#
|
||||
# Delegate common helper methods (both models expose the same API)
|
||||
#
|
||||
def getBall(self, no):
|
||||
return self.m1.getBall(no)
|
||||
|
||||
def getLastNo(self, YMD):
|
||||
return self.m1.getLastNo(YMD)
|
||||
|
||||
def getNextNo(self, YMD):
|
||||
return self.m1.getNextNo(YMD)
|
||||
|
||||
def getYMD(self, no):
|
||||
return self.m1.getYMD(no)
|
||||
|
||||
def _prefixed(self, prefix: str, reasons: set) -> set:
|
||||
# keep stable, readable debug strings
|
||||
return {f"{prefix}{r}" for r in reasons}
|
||||
|
||||
def extract_final_candidates(self, ball, no=None, until_end: bool = False, df=None):
|
||||
"""
|
||||
OR-pass semantics:
|
||||
- If either model returns empty set -> PASS (return empty set)
|
||||
- Else -> FAIL (return union of reasons)
|
||||
"""
|
||||
r1 = self.m1.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
|
||||
if len(r1) == 0:
|
||||
return set()
|
||||
r2 = self.m2.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
|
||||
if len(r2) == 0:
|
||||
return set()
|
||||
# both failed
|
||||
return self._prefixed("m1:", set(r1)) | self._prefixed("m2:", set(r2))
|
||||
|
||||
def filter(self, ball, no, until_end: bool = False, df=None, filter_ball=None):
|
||||
"""
|
||||
Keep signature compatible with existing callers.
|
||||
- filter_ball is ignored here (callers typically pre-filter before calling .filter()).
|
||||
"""
|
||||
return self.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
|
||||
|
||||
4519
final_BallFilter.py
4519
final_BallFilter.py
File diff suppressed because it is too large
Load Diff
@@ -1,50 +1,33 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
학습(1~800) / 검증(801~1000) / 테스트(1001~) 구간별 필터 통과(당첨번호가 필터를 통과하는지) 분석.
|
||||
1_FilterTest_25.py 와 동일한 흐름이며 BallFilter 대신 final_BallFilter.BallFilter 를 사용합니다.
|
||||
|
||||
실행: miniconda 환경 ncue 에서 `python final_filterTest.py` (README 참고).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import time
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import itertools
|
||||
from final_BallFilter import BallFilter
|
||||
|
||||
# PROMPT.txt 기준 구간
|
||||
TRAIN_NO = (1, 800)
|
||||
VALID_NO = (801, 1000)
|
||||
TEST_NO = (1001, 10**9)
|
||||
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
def __init__(self, resources_path: str):
|
||||
lotto_json = os.path.join(resources_path, "lotto_history.json")
|
||||
self.ballFilter = BallFilter(lotto_json)
|
||||
|
||||
def find_filter_method(self, df_ball, filter_ball=None, no_min=None, no_max=None):
|
||||
"""no_min~no_max 회차만 역순으로 검사 (None 이면 전체)."""
|
||||
def __init__(self, resources_path):
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, filter_ball=None):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
for i in range(len(df_ball)-1, 19, -1):
|
||||
|
||||
idx_list = list(range(len(df_ball) - 1, 19, -1))
|
||||
for i in idx_list:
|
||||
no = int(df_ball["no"].iloc[i])
|
||||
if no_min is not None and no < no_min:
|
||||
continue
|
||||
if no_max is not None and no > no_max:
|
||||
continue
|
||||
|
||||
answer = df_ball[df_ball["no"] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = sorted(answer[1:7])
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
@@ -53,60 +36,180 @@ class FilterTest:
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
filter_dic_1[key] = filter_dic_1.get(key, 0) + 1
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ",".join(filter_type)
|
||||
filter_dic_2[key] = filter_dic_2.get(key, 0) + 1
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
filter_dic[f_t] = filter_dic.get(f_t, 0) + 1
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[구간 {}~{}] 필터에 걸리지 않은 회차 (당첨 조합 통과)]".format(no_min, no_max))
|
||||
print("\tcount: {:,} (통과)".format(len(no_filter_ball)))
|
||||
for no in sorted(no_filter_ball.keys()):
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
|
||||
|
||||
return win_count, no_filter_ball
|
||||
return win_count
|
||||
|
||||
def report_split(self, df_ball, name: str, lo: int, hi: int):
|
||||
print("\n" + "=" * 60)
|
||||
print(" {} | 회차 {} ~ {}".format(name, lo, hi))
|
||||
print("=" * 60)
|
||||
t0 = time.time()
|
||||
wc, _ = self.find_filter_method(df_ball, no_min=lo, no_max=hi)
|
||||
elapsed = datetime.timedelta(seconds=time.time() - t0)
|
||||
span = hi - lo + 1
|
||||
rate = (wc / span * 100) if span else 0
|
||||
print("\t처리 시간: {}".format(elapsed))
|
||||
print("\t통과 회차 수: {} / {} ({:.2f}%)".format(wc, span, rate))
|
||||
if lo >= TRAIN_NO[0] and hi <= TRAIN_NO[1]:
|
||||
need = max(1, span // 100)
|
||||
print("\t(참고) 100회당 최소 1회 기준 대략 {}회 이상이면 충족".format(need))
|
||||
if lo >= VALID_NO[0] and hi <= VALID_NO[1]:
|
||||
print("\t(참고) 검증 200회 구간에서 최소 3회 이상이면 요구사항 예시 충족")
|
||||
return wc
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
ball = sorted(list(ball))
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = sorted(answer[1:7])
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = sorted(answer[1:7])
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = sorted(list(ball))
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
resources_path = os.path.join(os.path.dirname(__file__), "resources")
|
||||
csv_path = os.path.join(resources_path, "lotto_history.txt")
|
||||
df_ball = pd.read_csv(csv_path, header=None)
|
||||
df_ball.columns = ["no", "b1", "b2", "b3", "b4", "b5", "b6", "bn"]
|
||||
if __name__ == '__main__':
|
||||
|
||||
ft = FilterTest(resources_path)
|
||||
resources_path = 'resources'
|
||||
|
||||
ft.report_split(df_ball, "학습 TRAIN", TRAIN_NO[0], TRAIN_NO[1])
|
||||
ft.report_split(df_ball, "검증 VALID", VALID_NO[0], min(VALID_NO[1], int(df_ball["no"].max())))
|
||||
if int(df_ball["no"].max()) >= TEST_NO[0]:
|
||||
ft.report_split(
|
||||
df_ball,
|
||||
"테스트 TEST",
|
||||
TEST_NO[0],
|
||||
int(df_ball["no"].max()),
|
||||
)
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path)
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, filter_ball)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
File diff suppressed because one or more lines are too long
409
final_practice.py
Normal file
409
final_practice.py
Normal file
@@ -0,0 +1,409 @@
|
||||
# 웹 호출 라이브러리를 호출합니다.
|
||||
import time
|
||||
import requests
|
||||
from DataCrawler import DataCrawler
|
||||
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from collections import Counter
|
||||
from datetime import datetime, timedelta
|
||||
from TelegramBot import TelegramBot
|
||||
|
||||
from final_BallFilter import BallFilter
|
||||
|
||||
COST_PER_GAME = 1000
|
||||
MAX_BUDGET_KRW = 70000
|
||||
MAX_GAMES_PER_DRAW = MAX_BUDGET_KRW // COST_PER_GAME
|
||||
|
||||
class Practice:
|
||||
|
||||
bot = None
|
||||
preprocessor = None
|
||||
predictor = None
|
||||
|
||||
extract_count = None
|
||||
|
||||
def __init__(self, resources_path):
|
||||
self.bot = TelegramBot()
|
||||
|
||||
return
|
||||
|
||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
||||
def craw(self, lottoHistoryFile, drwNo=None):
|
||||
|
||||
ball = None
|
||||
if drwNo != None:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
||||
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
|
||||
if result['returnValue'] != 'success':
|
||||
return None
|
||||
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
else:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
||||
|
||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
||||
idx = 1
|
||||
while True:
|
||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
if result['returnValue'] != 'success':
|
||||
break
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
idx += 1
|
||||
time.sleep(0.5)
|
||||
# 저장한 파일을 종료합니다.
|
||||
jsonFp.close()
|
||||
textFp.close()
|
||||
|
||||
return ball
|
||||
|
||||
def predict1(self, result_json):
|
||||
result_json.append([6, 7, 10, 11, 20, 45])
|
||||
result_json.append([5, 12, 16, 27, 39, 45])
|
||||
result_json.append([5, 15, 18, 29, 36, 41])
|
||||
result_json.append([1, 17, 20, 25, 36, 45])
|
||||
result_json.append([6, 15, 20, 23, 37, 43])
|
||||
result_json.append([8, 15, 19, 23, 38, 41])
|
||||
result_json.append([3, 14, 20, 27, 35, 45])
|
||||
result_json.append([5, 11, 19, 24, 40, 45])
|
||||
result_json.append([5, 9, 20, 25, 32, 37])
|
||||
result_json.append([2, 13, 19, 27, 40, 43])
|
||||
result_json.append([4, 13, 17, 28, 39, 43])
|
||||
|
||||
return
|
||||
|
||||
def validate_fixed_balls(self, resources_path, ymd, fixed_balls):
|
||||
"""
|
||||
고정수 BallFilter 통과 여부를 검증한다.
|
||||
|
||||
Returns:
|
||||
dict: total, passed_count, failed_count, draw_no, details
|
||||
"""
|
||||
lotto_history_json = os.path.join(resources_path, 'lotto_history.json')
|
||||
ball_filter = BallFilter(lotto_history_json)
|
||||
draw_no = ball_filter.getNextNo(ymd)
|
||||
|
||||
lotto_history_txt = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lotto_history_txt, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
prev_row = df_ball[df_ball['no'] == draw_no - 1].values.tolist()[0]
|
||||
p_ball = prev_row[1:7]
|
||||
|
||||
details = []
|
||||
passed_count = 0
|
||||
for index, ball in enumerate(fixed_balls):
|
||||
filter_type = ball_filter.filter(
|
||||
ball=ball, no=draw_no, until_end=False, df=df_ball, p_ball=p_ball
|
||||
)
|
||||
passed = len(filter_type) == 0
|
||||
if passed:
|
||||
passed_count += 1
|
||||
details.append({
|
||||
'index': index + 1,
|
||||
'ball': ball,
|
||||
'passed': passed,
|
||||
'filter_reasons': sorted(filter_type),
|
||||
})
|
||||
|
||||
return {
|
||||
'draw_no': draw_no,
|
||||
'total': len(fixed_balls),
|
||||
'passed_count': passed_count,
|
||||
'failed_count': len(fixed_balls) - passed_count,
|
||||
'details': details,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def format_fixed_validation_summary(validation):
|
||||
"""고정수 검증 결과를 Telegram/로그용 문자열로 변환한다."""
|
||||
lines = [
|
||||
" - 고정수 필터 검증: {}/{} 통과".format(
|
||||
validation['passed_count'], validation['total']
|
||||
)
|
||||
]
|
||||
if validation['failed_count'] > 0:
|
||||
lines.append(
|
||||
" - 필터 예외 포함: {}개 (고정수 유지)".format(
|
||||
validation['failed_count']
|
||||
)
|
||||
)
|
||||
for item in validation['details']:
|
||||
if item['passed']:
|
||||
continue
|
||||
reason = item['filter_reasons'][0] if item['filter_reasons'] else 'unknown'
|
||||
lines.append(
|
||||
" * #{} {} -> {}".format(item['index'], item['ball'], reason)
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
def _can_add_ball(self, ball, fixed_balls, selected_balls, max_overlap):
|
||||
ball_set = set(ball)
|
||||
|
||||
for fixed_ball in fixed_balls:
|
||||
if len(ball_set & set(fixed_ball)) > max_overlap:
|
||||
return False
|
||||
|
||||
for selected_ball in selected_balls:
|
||||
if len(ball_set & set(selected_ball)) > max_overlap:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _portfolio_number_counts(fixed_balls, selected_balls):
|
||||
"""포트폴리오 내 번호 등장 횟수를 집계한다."""
|
||||
counts = Counter()
|
||||
for ball in fixed_balls + selected_balls:
|
||||
counts.update(ball)
|
||||
return counts
|
||||
|
||||
@staticmethod
|
||||
def _coverage_priority(ball, number_counts):
|
||||
"""낮을수록 포트폴리오에 덜 등장한 번호 위주 조합이다."""
|
||||
return sum(number_counts.get(number, 0) for number in ball)
|
||||
|
||||
def _pick_best_candidate(self, unique_candidates, selected_keys, fixed_balls, selected, max_overlap):
|
||||
"""겹침 제약을 만족하는 후보 중 번호 커버리지가 가장 넓은 조합을 고른다."""
|
||||
number_counts = self._portfolio_number_counts(fixed_balls, selected)
|
||||
best_candidate = None
|
||||
best_score = None
|
||||
best_key = None
|
||||
|
||||
for candidate in unique_candidates:
|
||||
key = tuple(candidate)
|
||||
if key in selected_keys:
|
||||
continue
|
||||
if not self._can_add_ball(candidate, fixed_balls, selected, max_overlap):
|
||||
continue
|
||||
|
||||
score = self._coverage_priority(candidate, number_counts)
|
||||
if best_candidate is None or score < best_score or (score == best_score and key < best_key):
|
||||
best_candidate = candidate
|
||||
best_score = score
|
||||
best_key = key
|
||||
|
||||
return best_candidate, best_key
|
||||
|
||||
def select_portfolio(self, fixed_balls, candidates, target_count, shuffle_seed=None):
|
||||
"""
|
||||
2차 포트폴리오 선정:
|
||||
- 중복 제거
|
||||
- shuffle_seed 기반 셔플로 순서 편향 완화
|
||||
- 고정수/선정수 간 중복도(겹치는 번호 수) 제약을 단계적으로 완화하며 선택
|
||||
- 동률 후보는 번호 커버리지가 넓은 조합 우선
|
||||
"""
|
||||
unique_candidates = []
|
||||
seen = set()
|
||||
fixed_keys = {tuple(sorted(fixed_ball)) for fixed_ball in fixed_balls}
|
||||
|
||||
for candidate in candidates:
|
||||
key = tuple(sorted(candidate))
|
||||
if key in seen or key in fixed_keys:
|
||||
continue
|
||||
seen.add(key)
|
||||
unique_candidates.append(list(key))
|
||||
|
||||
if shuffle_seed is not None:
|
||||
rng = random.Random(int(shuffle_seed))
|
||||
rng.shuffle(unique_candidates)
|
||||
|
||||
if target_count <= 0:
|
||||
return []
|
||||
|
||||
if len(unique_candidates) <= target_count:
|
||||
return unique_candidates
|
||||
|
||||
selected = []
|
||||
selected_keys = set()
|
||||
overlap_stages = [2, 3, 4, 5]
|
||||
|
||||
for max_overlap in overlap_stages:
|
||||
while len(selected) < target_count:
|
||||
best_candidate, best_key = self._pick_best_candidate(
|
||||
unique_candidates, selected_keys, fixed_balls, selected, max_overlap
|
||||
)
|
||||
if best_candidate is None:
|
||||
break
|
||||
|
||||
selected.append(best_candidate)
|
||||
selected_keys.add(best_key)
|
||||
|
||||
if len(selected) >= target_count:
|
||||
return selected
|
||||
|
||||
while len(selected) < target_count:
|
||||
best_candidate, best_key = self._pick_best_candidate(
|
||||
unique_candidates, selected_keys, fixed_balls, selected, max_overlap=6
|
||||
)
|
||||
if best_candidate is None:
|
||||
break
|
||||
selected.append(best_candidate)
|
||||
selected_keys.add(best_key)
|
||||
|
||||
return selected
|
||||
|
||||
def predict2(self, resources_path, ymd, fixed_balls, max_games_per_draw=MAX_GAMES_PER_DRAW):
|
||||
|
||||
candidates = [i for i in range(1, 46)]
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
ballFilter = BallFilter(lottoHistoryFileName)
|
||||
no = ballFilter.getNextNo(ymd)
|
||||
print("회차: {}".format(no))
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
prev_row = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
||||
p_ball = prev_row[1:7]
|
||||
|
||||
passed_candidates = []
|
||||
for idx, ball in enumerate(itertools.combinations(candidates, 6)):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed, pass: {}".format(idx, len(passed_candidates)))
|
||||
ball = list(ball)
|
||||
|
||||
filter_type = ballFilter.filter(
|
||||
ball=ball, no=no, until_end=False, df=df_ball, p_ball=p_ball
|
||||
)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if 0 < filter_size:
|
||||
continue
|
||||
|
||||
passed_candidates.append(ball)
|
||||
|
||||
variable_target_count = max(0, max_games_per_draw - len(fixed_balls))
|
||||
selected_candidates = self.select_portfolio(
|
||||
fixed_balls=fixed_balls,
|
||||
candidates=passed_candidates,
|
||||
target_count=variable_target_count,
|
||||
shuffle_seed=ymd,
|
||||
)
|
||||
|
||||
p_no = prev_row[0]
|
||||
|
||||
return p_no, p_ball, selected_candidates, len(passed_candidates), variable_target_count
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
# 데이터 수집
|
||||
#dataCrawler = DataCrawler()
|
||||
#dataCrawler.excute(resources_path)
|
||||
|
||||
today = datetime.today()
|
||||
if today.weekday() == 5:
|
||||
if today.hour > 20:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
elif today.weekday() == 6:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
|
||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
||||
ymd = this_weekend.strftime('%Y%m%d')
|
||||
|
||||
print("ymd: {}".format(ymd))
|
||||
|
||||
# 로또 예측
|
||||
practice = Practice(resources_path)
|
||||
|
||||
recommend_result_file = os.path.join(resources_path, "recommend_ball.biz_25.json")
|
||||
if os.path.isfile(recommend_result_file):
|
||||
with open(recommend_result_file, "r", encoding="utf-8") as result_fp:
|
||||
result_json = json.load(result_fp)
|
||||
result_json[ymd] = []
|
||||
else:
|
||||
result_json = {ymd: []}
|
||||
|
||||
# 매주 고정
|
||||
fixed_balls = []
|
||||
practice.predict1(fixed_balls)
|
||||
fixed_validation = practice.validate_fixed_balls(
|
||||
resources_path=resources_path,
|
||||
ymd=ymd,
|
||||
fixed_balls=fixed_balls,
|
||||
)
|
||||
print(Practice.format_fixed_validation_summary(fixed_validation))
|
||||
result_json[ymd].extend(fixed_balls)
|
||||
|
||||
# 필터 기반 예측
|
||||
p_no, p_ball, selected_candidates, passed_count, variable_target_count = practice.predict2(
|
||||
resources_path=resources_path,
|
||||
ymd=ymd,
|
||||
fixed_balls=fixed_balls,
|
||||
max_games_per_draw=MAX_GAMES_PER_DRAW
|
||||
)
|
||||
result_json[ymd].extend(selected_candidates)
|
||||
|
||||
if '_meta' not in result_json:
|
||||
result_json['_meta'] = {}
|
||||
result_json['_meta'][ymd] = {
|
||||
'fixed_validation': fixed_validation,
|
||||
'passed_count': passed_count,
|
||||
'selected_count': len(selected_candidates),
|
||||
'portfolio_shuffle_seed': ymd,
|
||||
}
|
||||
|
||||
with open(recommend_result_file, 'w', encoding='utf-8') as outFp:
|
||||
json.dump(result_json, outFp, ensure_ascii=False)
|
||||
|
||||
total_games = len(result_json[ymd])
|
||||
total_cost = total_games * COST_PER_GAME
|
||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
||||
p_str += " - 고정수: {}개\n".format(len(fixed_balls))
|
||||
p_str += Practice.format_fixed_validation_summary(fixed_validation) + "\n"
|
||||
p_str += " - 필터 통과 후보: {}개\n".format(passed_count)
|
||||
p_str += " - 추가 선정: {}개 (목표 {}개)\n".format(len(selected_candidates), variable_target_count)
|
||||
p_str += " - 총 추천: {}개, 총 금액: {:,}원 (한도 {:,}원)\n".format(total_games, total_cost, MAX_BUDGET_KRW)
|
||||
for i, ball in enumerate(result_json[ymd]):
|
||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
||||
if (i+1) % 100 == 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
p_str = ''
|
||||
|
||||
if len(result_json[ymd]) % 100 != 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
|
||||
print("size: {}".format(total_games))
|
||||
print("cost: {:,} KRW / limit: {:,} KRW".format(total_cost, MAX_BUDGET_KRW))
|
||||
|
||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
||||
|
||||
print("done...")
|
||||
1081
practice_0.py
1081
practice_0.py
File diff suppressed because it is too large
Load Diff
179
practice_1.py
179
practice_1.py
@@ -1,179 +0,0 @@
|
||||
# 웹 호출 라이브러리를 호출합니다.
|
||||
import time
|
||||
import requests
|
||||
from DataCrawler import DataCrawler
|
||||
|
||||
import json
|
||||
import os
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from datetime import datetime, timedelta
|
||||
from TelegramBot import TelegramBot
|
||||
|
||||
from filter_model_1 import BallFilter
|
||||
|
||||
class Practice:
|
||||
|
||||
bot = None
|
||||
preprocessor = None
|
||||
predictor = None
|
||||
|
||||
extract_count = None
|
||||
|
||||
def __init__(self, resources_path):
|
||||
self.bot = TelegramBot()
|
||||
|
||||
return
|
||||
|
||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
||||
def craw(self, lottoHistoryFile, drwNo=None):
|
||||
|
||||
ball = None
|
||||
if drwNo != None:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
||||
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
|
||||
if result['returnValue'] != 'success':
|
||||
return None
|
||||
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
else:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
||||
|
||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
||||
idx = 1
|
||||
while True:
|
||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
if result['returnValue'] != 'success':
|
||||
break
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
idx += 1
|
||||
time.sleep(0.5)
|
||||
# 저장한 파일을 종료합니다.
|
||||
jsonFp.close()
|
||||
textFp.close()
|
||||
|
||||
return ball
|
||||
|
||||
def predict1(self, result_json):
|
||||
result_json.append([6,7,10,11,20,45])
|
||||
return
|
||||
|
||||
def predict2(self, resources_path, ymd, result_json):
|
||||
|
||||
candidates = [i for i in range(1, 46)]
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
ballFilter = BallFilter(lottoHistoryFileName)
|
||||
no = ballFilter.getNextNo(ymd)
|
||||
print("회차: {}".format(no))
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
|
||||
nCr = list(itertools.combinations(candidates, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
ball = list(ball)
|
||||
|
||||
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if 0 < filter_size:
|
||||
continue
|
||||
|
||||
result_json.append(ball)
|
||||
|
||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
||||
p_no = p_ball[0]
|
||||
p_ball = p_ball[1:7]
|
||||
|
||||
return p_no, p_ball
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
today = datetime.today()
|
||||
if today.weekday() == 5:
|
||||
if today.hour > 20:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
elif today.weekday() == 6:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
|
||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
||||
ymd = this_weekend.strftime('%Y%m%d')
|
||||
|
||||
print("ymd: {}".format(ymd))
|
||||
|
||||
# 로또 예측
|
||||
practice = Practice(resources_path)
|
||||
|
||||
# 데이터 수집
|
||||
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
|
||||
lottoHistoryFileName = lottoHistoryFile + '.json'
|
||||
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if line != '\n':
|
||||
last_json = json.loads(line)
|
||||
|
||||
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
|
||||
|
||||
result_json = {ymd: []}
|
||||
|
||||
# 매주 고정
|
||||
practice.predict1(result_json[ymd])
|
||||
# 필터 기반 예측
|
||||
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
|
||||
|
||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
||||
for i, ball in enumerate(result_json[ymd]):
|
||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
||||
if (i+1) % 100 == 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
p_str = ''
|
||||
|
||||
if len(result_json[ymd]) % 100 != 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
|
||||
size = len(result_json[ymd])
|
||||
print("size: {}".format(size))
|
||||
|
||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
||||
|
||||
print("done...")
|
||||
179
practice_2.py
179
practice_2.py
@@ -1,179 +0,0 @@
|
||||
# 웹 호출 라이브러리를 호출합니다.
|
||||
import time
|
||||
import requests
|
||||
from DataCrawler import DataCrawler
|
||||
|
||||
import json
|
||||
import os
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from datetime import datetime, timedelta
|
||||
from TelegramBot import TelegramBot
|
||||
|
||||
from filter_model_2 import BallFilter
|
||||
|
||||
class Practice:
|
||||
|
||||
bot = None
|
||||
preprocessor = None
|
||||
predictor = None
|
||||
|
||||
extract_count = None
|
||||
|
||||
def __init__(self, resources_path):
|
||||
self.bot = TelegramBot()
|
||||
|
||||
return
|
||||
|
||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
||||
def craw(self, lottoHistoryFile, drwNo=None):
|
||||
|
||||
ball = None
|
||||
if drwNo != None:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
||||
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
|
||||
if result['returnValue'] != 'success':
|
||||
return None
|
||||
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
else:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
||||
|
||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
||||
idx = 1
|
||||
while True:
|
||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
if result['returnValue'] != 'success':
|
||||
break
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
idx += 1
|
||||
time.sleep(0.5)
|
||||
# 저장한 파일을 종료합니다.
|
||||
jsonFp.close()
|
||||
textFp.close()
|
||||
|
||||
return ball
|
||||
|
||||
def predict1(self, result_json):
|
||||
result_json.append([6,7,10,11,20,45])
|
||||
return
|
||||
|
||||
def predict2(self, resources_path, ymd, result_json):
|
||||
|
||||
candidates = [i for i in range(1, 46)]
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
ballFilter = BallFilter(lottoHistoryFileName)
|
||||
no = ballFilter.getNextNo(ymd)
|
||||
print("회차: {}".format(no))
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
|
||||
nCr = list(itertools.combinations(candidates, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
ball = list(ball)
|
||||
|
||||
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if 0 < filter_size:
|
||||
continue
|
||||
|
||||
result_json.append(ball)
|
||||
|
||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
||||
p_no = p_ball[0]
|
||||
p_ball = p_ball[1:7]
|
||||
|
||||
return p_no, p_ball
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
today = datetime.today()
|
||||
if today.weekday() == 5:
|
||||
if today.hour > 20:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
elif today.weekday() == 6:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
|
||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
||||
ymd = this_weekend.strftime('%Y%m%d')
|
||||
|
||||
print("ymd: {}".format(ymd))
|
||||
|
||||
# 로또 예측
|
||||
practice = Practice(resources_path)
|
||||
|
||||
# 데이터 수집
|
||||
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
|
||||
lottoHistoryFileName = lottoHistoryFile + '.json'
|
||||
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if line != '\n':
|
||||
last_json = json.loads(line)
|
||||
|
||||
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
|
||||
|
||||
result_json = {ymd: []}
|
||||
|
||||
# 매주 고정
|
||||
practice.predict1(result_json[ymd])
|
||||
# 필터 기반 예측
|
||||
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
|
||||
|
||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
||||
for i, ball in enumerate(result_json[ymd]):
|
||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
||||
if (i+1) % 100 == 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
p_str = ''
|
||||
|
||||
if len(result_json[ymd]) % 100 != 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
|
||||
size = len(result_json[ymd])
|
||||
print("size: {}".format(size))
|
||||
|
||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
||||
|
||||
print("done...")
|
||||
546
practice_3.py
546
practice_3.py
@@ -1,546 +0,0 @@
|
||||
# 웹 호출 라이브러리를 호출합니다.
|
||||
import time
|
||||
import requests
|
||||
from DataCrawler import DataCrawler
|
||||
|
||||
import json
|
||||
import os
|
||||
import copy
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from datetime import datetime, timedelta
|
||||
from TelegramBot import TelegramBot
|
||||
|
||||
from filter_model_3 import BallFilter
|
||||
|
||||
class Practice:
|
||||
|
||||
bot = None
|
||||
preprocessor = None
|
||||
predictor = None
|
||||
|
||||
extract_count = None
|
||||
TARGET_MIN_SURVIVORS = 30
|
||||
TARGET_MAX_SURVIVORS = 150
|
||||
PREDICT_TIMEOUT_SECONDS = 180
|
||||
|
||||
def __init__(self, resources_path):
|
||||
self.bot = TelegramBot()
|
||||
self.resources_path = resources_path
|
||||
|
||||
return
|
||||
|
||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
||||
def craw(self, lottoHistoryFile, drwNo=None):
|
||||
|
||||
ball = None
|
||||
if drwNo != None:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
||||
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
|
||||
if result['returnValue'] != 'success':
|
||||
return None
|
||||
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
else:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
||||
|
||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
||||
idx = 1
|
||||
while True:
|
||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
if result['returnValue'] != 'success':
|
||||
break
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
idx += 1
|
||||
time.sleep(0.5)
|
||||
# 저장한 파일을 종료합니다.
|
||||
jsonFp.close()
|
||||
textFp.close()
|
||||
|
||||
return ball
|
||||
|
||||
def predict1(self, result_json):
|
||||
result_json.append([6, 7, 10, 11, 20, 45])
|
||||
result_json.append([2, 7, 17, 28, 35, 39])
|
||||
result_json.append([6, 10, 19, 25, 33, 35])
|
||||
result_json.append([3, 17, 20, 24, 35, 45])
|
||||
result_json.append([5, 15, 18, 29, 36, 41])
|
||||
result_json.append([6, 15, 20, 23, 37, 43])
|
||||
result_json.append([8, 15, 19, 23, 38, 41])
|
||||
result_json.append([5, 11, 19, 24, 40, 45])
|
||||
result_json.append([9, 16, 18, 23, 35, 43])
|
||||
result_json.append([7, 13, 19, 28, 33, 44])
|
||||
result_json.append([7, 11, 18, 29, 37, 42])
|
||||
print("회차(predict1)")
|
||||
return
|
||||
|
||||
def predict2(self, resources_path, ymd, result_json):
|
||||
|
||||
candidates = [i for i in range(1, 46)]
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
ballFilter = BallFilter(lottoHistoryFileName)
|
||||
no = ballFilter.getNextNo(ymd)
|
||||
print("회차(predict2): {}".format(no))
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
|
||||
nCr = list(itertools.combinations(candidates, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
ball = list(ball)
|
||||
|
||||
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if 0 < filter_size:
|
||||
continue
|
||||
|
||||
result_json.append(ball)
|
||||
|
||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
||||
p_no = p_ball[0]
|
||||
p_ball = p_ball[1:7]
|
||||
|
||||
return p_no, p_ball
|
||||
|
||||
def predict3(self, resources_path, ymd, result_json):
|
||||
candidates = [i for i in range(1, 46)]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
no = BallFilter(lottoHistoryFileName).getNextNo(ymd)
|
||||
print("회차(predict3): {}".format(no))
|
||||
predict_start_ts = time.time()
|
||||
deadline_ts = predict_start_ts + self.PREDICT_TIMEOUT_SECONDS
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
||||
p_no = p_ball[0]
|
||||
p_ball = sorted(p_ball[1:7])
|
||||
|
||||
base_ruleset = self._get_base_ruleset()
|
||||
tighten_rulesets = [
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": True,
|
||||
"ban_triples_legacy": True,
|
||||
"all_in_previous7": True,
|
||||
"previous_neighbors": True,
|
||||
},
|
||||
allowed_overrides={
|
||||
"ac_value": [8, 9],
|
||||
"uniq_last_digit_count": [4, 5],
|
||||
"even_count": [2, 3, 4],
|
||||
},
|
||||
),
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": True,
|
||||
"ban_triples_legacy": True,
|
||||
"all_in_previous7": True,
|
||||
"previous_neighbors": True,
|
||||
},
|
||||
allowed_overrides={
|
||||
"ac_value": [8, 9],
|
||||
"uniq_last_digit_count": [4, 5],
|
||||
"even_count": [2, 3, 4],
|
||||
"sum": [112, 114, 121, 123, 126, 127, 131, 132, 138, 146, 148],
|
||||
"sum_prev_diff": [13, 14, 17, 18, 26, 28, 29, 30, 32, 39, 40],
|
||||
},
|
||||
),
|
||||
]
|
||||
relax_rulesets = [
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": False,
|
||||
"ban_triples_legacy": False,
|
||||
},
|
||||
),
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": False,
|
||||
"ban_triples_legacy": False,
|
||||
"previous_neighbors": False,
|
||||
"all_in_previous7": False,
|
||||
},
|
||||
),
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": False,
|
||||
"ban_triples_legacy": False,
|
||||
"previous_neighbors": False,
|
||||
"all_in_previous7": False,
|
||||
"weeks_8_count": False,
|
||||
"weeks_12_count": False,
|
||||
"weeks_16_count": False,
|
||||
"weeks_20_count": False,
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
min_survivors = self.TARGET_MIN_SURVIVORS
|
||||
max_survivors = self.TARGET_MAX_SURVIVORS
|
||||
chosen = []
|
||||
stage_name = "base"
|
||||
|
||||
current_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=base_ruleset,
|
||||
stop_when_gt=max_survivors,
|
||||
stage_name="base",
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
current = current_info["candidates"]
|
||||
if current_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(current, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "base_timeout_fallback"
|
||||
print("predict3 stage: {}, survivors: {}".format(stage_name, len(chosen)))
|
||||
for ball in chosen:
|
||||
result_json.append(ball)
|
||||
return p_no, p_ball
|
||||
|
||||
if min_survivors <= len(current) <= max_survivors:
|
||||
chosen = current
|
||||
elif len(current) > max_survivors:
|
||||
chosen = current
|
||||
stage_name = "base_overflow"
|
||||
for idx, rs in enumerate(tighten_rulesets, start=1):
|
||||
t_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=rs,
|
||||
stop_when_gt=max_survivors,
|
||||
stage_name="tighten_{}".format(idx),
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
t = t_info["candidates"]
|
||||
if t_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(t, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "tighten_{}_timeout_fallback".format(idx)
|
||||
break
|
||||
if min_survivors <= len(t) <= max_survivors:
|
||||
chosen = t
|
||||
stage_name = "tighten_{}".format(idx)
|
||||
break
|
||||
if len(t) <= max_survivors:
|
||||
chosen = t
|
||||
stage_name = "tighten_{}".format(idx)
|
||||
|
||||
if len(chosen) > max_survivors:
|
||||
full_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=tighten_rulesets[-1],
|
||||
stop_when_gt=None,
|
||||
stage_name="tighten_full_rank",
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
full_for_ranking = full_info["candidates"]
|
||||
if full_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(full_for_ranking, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "tighten_rank_timeout_fallback"
|
||||
else:
|
||||
chosen = self._rank_and_trim(full_for_ranking, p_ball, max_survivors)
|
||||
stage_name = "tighten_rank_trim"
|
||||
else:
|
||||
chosen = current
|
||||
stage_name = "base_underflow"
|
||||
for idx, rs in enumerate(relax_rulesets, start=1):
|
||||
r_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=rs,
|
||||
stop_when_gt=None,
|
||||
stop_when_gte=min_survivors,
|
||||
stage_name="relax_{}".format(idx),
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
r = r_info["candidates"]
|
||||
chosen = r
|
||||
stage_name = "relax_{}".format(idx)
|
||||
if r_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(r, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "relax_{}_timeout_fallback".format(idx)
|
||||
break
|
||||
if len(r) >= min_survivors:
|
||||
break
|
||||
|
||||
if len(chosen) == 0:
|
||||
stage_name = "relax_zero_fallback"
|
||||
chosen = self._fallback_candidates_from_prev(p_ball, min_survivors)
|
||||
elif len(chosen) < min_survivors:
|
||||
stage_name = "{}_fill".format(stage_name)
|
||||
fill = self._fallback_candidates_from_prev(
|
||||
p_ball,
|
||||
min_survivors - len(chosen),
|
||||
exclude=set(tuple(x) for x in chosen),
|
||||
)
|
||||
chosen.extend(fill)
|
||||
|
||||
print("predict3 stage: {}, survivors: {}".format(stage_name, len(chosen)))
|
||||
for ball in chosen:
|
||||
result_json.append(ball)
|
||||
return p_no, p_ball
|
||||
|
||||
def _get_base_ruleset(self):
|
||||
history_json = os.path.join(self.resources_path, "lotto_history.json")
|
||||
base_filter = BallFilter(history_json)
|
||||
return copy.deepcopy(base_filter.m1.ruleset)
|
||||
|
||||
def _build_ruleset(self, base_ruleset, enabled_overrides=None, allowed_overrides=None):
|
||||
ruleset = copy.deepcopy(base_ruleset)
|
||||
ruleset.setdefault("filters", {})
|
||||
enabled_overrides = enabled_overrides or {}
|
||||
allowed_overrides = allowed_overrides or {}
|
||||
for key, value in enabled_overrides.items():
|
||||
ruleset["filters"].setdefault(key, {})
|
||||
ruleset["filters"][key]["enabled"] = bool(value)
|
||||
for key, values in allowed_overrides.items():
|
||||
ruleset["filters"].setdefault(key, {})
|
||||
ruleset["filters"][key]["enabled"] = True
|
||||
ruleset["filters"][key]["allowed"] = list(values)
|
||||
return ruleset
|
||||
|
||||
def _collect_candidates(
|
||||
self,
|
||||
candidates,
|
||||
no,
|
||||
df_ball,
|
||||
ruleset,
|
||||
stop_when_gt=None,
|
||||
stop_when_gte=None,
|
||||
stage_name="base",
|
||||
predict_start_ts=None,
|
||||
deadline_ts=None,
|
||||
):
|
||||
lottoHistoryFileName = os.path.join(self.resources_path, "lotto_history.json")
|
||||
ballFilter = BallFilter(lottoHistoryFileName, ruleset=ruleset)
|
||||
result = []
|
||||
last_idx = 0
|
||||
for idx, ball in enumerate(itertools.combinations(candidates, 6), start=1):
|
||||
last_idx = idx
|
||||
if deadline_ts is not None and deadline_ts <= time.time():
|
||||
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
|
||||
print(" - [{}] timeout after {:,} processed (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
|
||||
return {"candidates": result, "timed_out": True, "processed": idx}
|
||||
if idx % 1000000 == 0:
|
||||
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
|
||||
print(" - [{}] {:,} processed... (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
|
||||
b = list(ball)
|
||||
if len(ballFilter.filter(ball=b, no=no, until_end=False, df=df_ball)) == 0:
|
||||
result.append(b)
|
||||
if stop_when_gt is not None and len(result) > stop_when_gt:
|
||||
return {"candidates": result, "timed_out": False, "processed": idx}
|
||||
if stop_when_gte is not None and len(result) >= stop_when_gte:
|
||||
return {"candidates": result, "timed_out": False, "processed": idx}
|
||||
return {"candidates": result, "timed_out": False, "processed": last_idx}
|
||||
|
||||
def _finalize_on_timeout(self, partial_candidates, prev_ball, min_survivors, max_survivors):
|
||||
chosen = list(partial_candidates)
|
||||
if len(chosen) > max_survivors:
|
||||
chosen = self._rank_and_trim(chosen, prev_ball, max_survivors)
|
||||
elif len(chosen) < min_survivors:
|
||||
fill = self._fallback_candidates_from_prev(
|
||||
prev_ball,
|
||||
min_survivors - len(chosen),
|
||||
exclude=set(tuple(x) for x in chosen),
|
||||
)
|
||||
chosen.extend(fill)
|
||||
return chosen
|
||||
|
||||
def _rank_and_trim(self, candidates, prev_ball, limit):
|
||||
scored = [(self._score_candidate(ball, prev_ball), ball) for ball in candidates]
|
||||
scored.sort(key=lambda x: x[0])
|
||||
return [ball for _, ball in scored[:limit]]
|
||||
|
||||
def _score_candidate(self, ball, prev_ball):
|
||||
sum_diff = abs(sum(ball) - sum(prev_ball))
|
||||
even_cnt = len([x for x in ball if x % 2 == 0])
|
||||
uniq_last = len(set([x % 10 for x in ball]))
|
||||
contiguous_penalty = 0
|
||||
s = sorted(ball)
|
||||
for i in range(1, len(s)):
|
||||
if s[i] - s[i - 1] == 1:
|
||||
contiguous_penalty += 1
|
||||
score = 0
|
||||
score += sum_diff
|
||||
score += abs(even_cnt - 3) * 2
|
||||
score += abs(uniq_last - 5) * 2
|
||||
score += contiguous_penalty
|
||||
return score
|
||||
|
||||
def _fallback_candidates_from_prev(self, prev_ball, need_count, exclude=None):
|
||||
exclude = exclude or set()
|
||||
seed = sorted(prev_ball)
|
||||
out = []
|
||||
delta_patterns = [
|
||||
(0, 0, 0, 0, 0, 0),
|
||||
(-1, 0, 0, 0, 0, 1),
|
||||
(0, -1, 0, 0, 1, 0),
|
||||
(0, 0, -1, 1, 0, 0),
|
||||
(-2, 0, 0, 0, 0, 2),
|
||||
(0, -2, 0, 0, 2, 0),
|
||||
(0, 0, -2, 2, 0, 0),
|
||||
(-1, -1, 0, 0, 1, 1),
|
||||
(1, 0, -1, 0, 0, 0),
|
||||
(0, 1, 0, -1, 0, 0),
|
||||
(1, -1, 1, -1, 1, -1),
|
||||
(-1, 1, -1, 1, -1, 1),
|
||||
]
|
||||
shift = 0
|
||||
while len(out) < need_count and shift <= 8:
|
||||
for delta in delta_patterns:
|
||||
cand = [seed[i] + delta[i] for i in range(6)]
|
||||
cand = [min(45, max(1, v + shift)) for v in cand]
|
||||
cand = sorted(cand)
|
||||
if len(set(cand)) != 6:
|
||||
continue
|
||||
t = tuple(cand)
|
||||
if t in exclude:
|
||||
continue
|
||||
exclude.add(t)
|
||||
out.append(cand)
|
||||
if len(out) >= need_count:
|
||||
break
|
||||
shift += 1
|
||||
return out
|
||||
|
||||
def _merge_unique_balls(self, base_balls, extra_balls):
|
||||
seen = set(tuple(sorted(x)) for x in base_balls)
|
||||
for ball in extra_balls:
|
||||
key = tuple(sorted(ball))
|
||||
if key not in seen:
|
||||
base_balls.append(list(ball))
|
||||
seen.add(key)
|
||||
return base_balls
|
||||
|
||||
def _sorted_unique_balls(self, balls):
|
||||
"""
|
||||
Normalize (sort within ball), de-duplicate, then sort lexicographically.
|
||||
Returns List[List[int]].
|
||||
"""
|
||||
uniq = {}
|
||||
for b in balls:
|
||||
key = tuple(sorted(b))
|
||||
uniq[key] = list(key)
|
||||
return [list(t) for t in sorted(uniq.keys())]
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
today = datetime.today()
|
||||
if today.weekday() == 5:
|
||||
if today.hour > 20:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
elif today.weekday() == 6:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
|
||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
||||
ymd = this_weekend.strftime('%Y%m%d')
|
||||
|
||||
print("ymd: {}".format(ymd))
|
||||
|
||||
# 로또 예측
|
||||
practice = Practice(resources_path)
|
||||
|
||||
# 데이터 수집
|
||||
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
|
||||
lottoHistoryFileName = lottoHistoryFile + '.json'
|
||||
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if line != '\n':
|
||||
last_json = json.loads(line)
|
||||
|
||||
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
|
||||
|
||||
result_json = {ymd: []}
|
||||
|
||||
# 매주 고정
|
||||
practice.predict1(result_json[ymd])
|
||||
# 필터 기반 예측(기존/신규): 결과는 합친 후 정렬해서 predict1 결과 뒤에 붙인다.
|
||||
predict2_json = []
|
||||
p_no, p_ball = practice.predict2(resources_path, ymd, predict2_json)
|
||||
|
||||
predict3_json = []
|
||||
p_no3, p_ball3 = practice.predict3(resources_path, ymd, predict3_json)
|
||||
|
||||
merged_predict = []
|
||||
practice._merge_unique_balls(merged_predict, predict2_json)
|
||||
practice._merge_unique_balls(merged_predict, predict3_json)
|
||||
merged_predict = practice._sorted_unique_balls(merged_predict)
|
||||
|
||||
# predict1 결과에 merged_predict를 정렬된 순서로 append(중복 제거)
|
||||
practice._merge_unique_balls(result_json[ymd], merged_predict)
|
||||
if p_no3 == p_no:
|
||||
p_ball = p_ball3
|
||||
|
||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
||||
for i, ball in enumerate(result_json[ymd]):
|
||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
||||
if (i+1) % 100 == 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
p_str = ''
|
||||
|
||||
if len(result_json[ymd]) % 100 != 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
|
||||
size = len(result_json[ymd])
|
||||
print("size: {}".format(size))
|
||||
|
||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
||||
|
||||
print("done...")
|
||||
@@ -1,216 +0,0 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_3 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path):
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, filter_ball=None):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
for i in range(len(df_ball)-1, 19, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
resources_path = 'resources'
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path)
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, filter_ball)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
@@ -1,490 +0,0 @@
|
||||
# 웹 호출 라이브러리를 호출합니다.
|
||||
import time
|
||||
import requests
|
||||
|
||||
import json
|
||||
import os
|
||||
import copy
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from datetime import datetime, timedelta
|
||||
from TelegramBot import TelegramBot
|
||||
|
||||
from filter_model_3 import BallFilter
|
||||
|
||||
class Practice:
|
||||
|
||||
bot = None
|
||||
preprocessor = None
|
||||
predictor = None
|
||||
|
||||
extract_count = None
|
||||
TARGET_MIN_SURVIVORS = 30
|
||||
TARGET_MAX_SURVIVORS = 150
|
||||
PREDICT_TIMEOUT_SECONDS = 180
|
||||
|
||||
def __init__(self, resources_path):
|
||||
self.bot = TelegramBot()
|
||||
self.resources_path = resources_path
|
||||
|
||||
return
|
||||
|
||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
||||
def craw(self, lottoHistoryFile, drwNo=None):
|
||||
|
||||
ball = None
|
||||
if drwNo != None:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
||||
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
|
||||
if result['returnValue'] != 'success':
|
||||
return None
|
||||
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
else:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
||||
|
||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
||||
idx = 1
|
||||
while True:
|
||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
if result['returnValue'] != 'success':
|
||||
break
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
idx += 1
|
||||
time.sleep(0.5)
|
||||
# 저장한 파일을 종료합니다.
|
||||
jsonFp.close()
|
||||
textFp.close()
|
||||
|
||||
return ball
|
||||
|
||||
def predict1(self, result_json):
|
||||
result_json.append([6, 7, 10, 11, 20, 45])
|
||||
result_json.append([2, 7, 17, 28, 35, 39])
|
||||
result_json.append([6, 10, 19, 25, 33, 35])
|
||||
result_json.append([3, 17, 20, 24, 35, 45])
|
||||
result_json.append([5, 15, 18, 29, 36, 41])
|
||||
result_json.append([6, 15, 20, 23, 37, 43])
|
||||
result_json.append([8, 15, 19, 23, 38, 41])
|
||||
result_json.append([5, 11, 19, 24, 40, 45])
|
||||
result_json.append([9, 16, 18, 23, 35, 43])
|
||||
result_json.append([7, 13, 19, 28, 33, 44])
|
||||
result_json.append([7, 11, 18, 29, 37, 42])
|
||||
return
|
||||
|
||||
def predict2(self, resources_path, ymd, result_json):
|
||||
candidates = [i for i in range(1, 46)]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
no = BallFilter(lottoHistoryFileName).getNextNo(ymd)
|
||||
print("회차: {}".format(no))
|
||||
predict_start_ts = time.time()
|
||||
deadline_ts = predict_start_ts + self.PREDICT_TIMEOUT_SECONDS
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
||||
p_no = p_ball[0]
|
||||
p_ball = sorted(p_ball[1:7])
|
||||
|
||||
# 기본/강화/완화 단계별 ruleset
|
||||
base_ruleset = self._get_base_ruleset()
|
||||
tighten_rulesets = [
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": True,
|
||||
"ban_triples_legacy": True,
|
||||
"all_in_previous7": True,
|
||||
"previous_neighbors": True,
|
||||
},
|
||||
allowed_overrides={
|
||||
"ac_value": [8, 9],
|
||||
"uniq_last_digit_count": [4, 5],
|
||||
"even_count": [2, 3, 4],
|
||||
},
|
||||
),
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": True,
|
||||
"ban_triples_legacy": True,
|
||||
"all_in_previous7": True,
|
||||
"previous_neighbors": True,
|
||||
},
|
||||
allowed_overrides={
|
||||
"ac_value": [8, 9],
|
||||
"uniq_last_digit_count": [4, 5],
|
||||
"even_count": [2, 3, 4],
|
||||
"sum": [112, 114, 121, 123, 126, 127, 131, 132, 138, 146, 148],
|
||||
"sum_prev_diff": [13, 14, 17, 18, 26, 28, 29, 30, 32, 39, 40],
|
||||
},
|
||||
),
|
||||
]
|
||||
relax_rulesets = [
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": False,
|
||||
"ban_triples_legacy": False,
|
||||
},
|
||||
),
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": False,
|
||||
"ban_triples_legacy": False,
|
||||
"previous_neighbors": False,
|
||||
"all_in_previous7": False,
|
||||
},
|
||||
),
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": False,
|
||||
"ban_triples_legacy": False,
|
||||
"previous_neighbors": False,
|
||||
"all_in_previous7": False,
|
||||
"weeks_8_count": False,
|
||||
"weeks_12_count": False,
|
||||
"weeks_16_count": False,
|
||||
"weeks_20_count": False,
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
min_survivors = self.TARGET_MIN_SURVIVORS
|
||||
max_survivors = self.TARGET_MAX_SURVIVORS
|
||||
chosen = []
|
||||
stage_name = "base"
|
||||
|
||||
current_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=base_ruleset,
|
||||
stop_when_gt=max_survivors,
|
||||
stage_name="base",
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
current = current_info["candidates"]
|
||||
if current_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(current, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "base_timeout_fallback"
|
||||
print("candidate_stage: {}, survivors: {}".format(stage_name, len(chosen)))
|
||||
for ball in chosen:
|
||||
result_json.append(ball)
|
||||
return p_no, p_ball
|
||||
|
||||
if min_survivors <= len(current) <= max_survivors:
|
||||
chosen = current
|
||||
elif len(current) > max_survivors:
|
||||
chosen = current
|
||||
stage_name = "base_overflow"
|
||||
for idx, rs in enumerate(tighten_rulesets, start=1):
|
||||
t_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=rs,
|
||||
stop_when_gt=max_survivors,
|
||||
stage_name="tighten_{}".format(idx),
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
t = t_info["candidates"]
|
||||
if t_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(t, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "tighten_{}_timeout_fallback".format(idx)
|
||||
break
|
||||
if min_survivors <= len(t) <= max_survivors:
|
||||
chosen = t
|
||||
stage_name = "tighten_{}".format(idx)
|
||||
break
|
||||
if len(t) <= max_survivors:
|
||||
chosen = t
|
||||
stage_name = "tighten_{}".format(idx)
|
||||
if len(chosen) > max_survivors:
|
||||
# 상한 가드 강제 적용: 품질 점수 상위 N개만 사용
|
||||
full_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=tighten_rulesets[-1],
|
||||
stop_when_gt=None,
|
||||
stage_name="tighten_full_rank",
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
full_for_ranking = full_info["candidates"]
|
||||
if full_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(full_for_ranking, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "tighten_rank_timeout_fallback"
|
||||
else:
|
||||
chosen = self._rank_and_trim(full_for_ranking, p_ball, max_survivors)
|
||||
stage_name = "tighten_rank_trim"
|
||||
else:
|
||||
chosen = current
|
||||
stage_name = "base_underflow"
|
||||
for idx, rs in enumerate(relax_rulesets, start=1):
|
||||
# relax는 하한(min_survivors)만 채우면 충분하므로 조기 종료
|
||||
r_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=rs,
|
||||
stop_when_gt=None,
|
||||
stop_when_gte=min_survivors,
|
||||
stage_name="relax_{}".format(idx),
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
r = r_info["candidates"]
|
||||
chosen = r
|
||||
stage_name = "relax_{}".format(idx)
|
||||
if r_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(r, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "relax_{}_timeout_fallback".format(idx)
|
||||
break
|
||||
if len(r) >= min_survivors:
|
||||
break
|
||||
|
||||
if len(chosen) == 0:
|
||||
# 0개 생존 방지: 가장 완화된 규칙에서도 0개면 직전 결과와 유사한 조합으로 최소 개수 확보
|
||||
stage_name = "relax_zero_fallback"
|
||||
chosen = self._fallback_candidates_from_prev(p_ball, min_survivors)
|
||||
elif len(chosen) < min_survivors:
|
||||
# 하한 가드: 부족분은 완화 후보/고정 후보 기반으로 보강
|
||||
stage_name = "{}_fill".format(stage_name)
|
||||
fill = self._fallback_candidates_from_prev(p_ball, min_survivors - len(chosen), exclude=set(tuple(x) for x in chosen))
|
||||
chosen.extend(fill)
|
||||
|
||||
print("candidate_stage: {}, survivors: {}".format(stage_name, len(chosen)))
|
||||
for ball in chosen:
|
||||
result_json.append(ball)
|
||||
return p_no, p_ball
|
||||
|
||||
def _get_base_ruleset(self):
|
||||
history_json = os.path.join(self.resources_path, "lotto_history.json")
|
||||
base_filter = BallFilter(history_json)
|
||||
return copy.deepcopy(base_filter.m1.ruleset)
|
||||
|
||||
def _build_ruleset(self, base_ruleset, enabled_overrides=None, allowed_overrides=None):
|
||||
ruleset = copy.deepcopy(base_ruleset)
|
||||
ruleset.setdefault("filters", {})
|
||||
enabled_overrides = enabled_overrides or {}
|
||||
allowed_overrides = allowed_overrides or {}
|
||||
for key, value in enabled_overrides.items():
|
||||
ruleset["filters"].setdefault(key, {})
|
||||
ruleset["filters"][key]["enabled"] = bool(value)
|
||||
for key, values in allowed_overrides.items():
|
||||
ruleset["filters"].setdefault(key, {})
|
||||
ruleset["filters"][key]["enabled"] = True
|
||||
ruleset["filters"][key]["allowed"] = list(values)
|
||||
return ruleset
|
||||
|
||||
def _collect_candidates(
|
||||
self,
|
||||
candidates,
|
||||
no,
|
||||
df_ball,
|
||||
ruleset,
|
||||
stop_when_gt=None,
|
||||
stop_when_gte=None,
|
||||
stage_name="base",
|
||||
predict_start_ts=None,
|
||||
deadline_ts=None,
|
||||
):
|
||||
lottoHistoryFileName = os.path.join(self.resources_path, "lotto_history.json")
|
||||
ballFilter = BallFilter(lottoHistoryFileName, ruleset=ruleset)
|
||||
result = []
|
||||
last_idx = 0
|
||||
for idx, ball in enumerate(itertools.combinations(candidates, 6), start=1):
|
||||
last_idx = idx
|
||||
if deadline_ts is not None and deadline_ts <= time.time():
|
||||
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
|
||||
print(" - [{}] timeout after {:,} processed (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
|
||||
return {
|
||||
"candidates": result,
|
||||
"timed_out": True,
|
||||
"processed": idx,
|
||||
}
|
||||
if idx % 1000000 == 0:
|
||||
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
|
||||
print(" - [{}] {:,} processed... (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
|
||||
b = list(ball)
|
||||
if len(ballFilter.filter(ball=b, no=no, until_end=False, df=df_ball)) == 0:
|
||||
result.append(b)
|
||||
if stop_when_gt is not None and len(result) > stop_when_gt:
|
||||
return {
|
||||
"candidates": result,
|
||||
"timed_out": False,
|
||||
"processed": idx,
|
||||
}
|
||||
if stop_when_gte is not None and len(result) >= stop_when_gte:
|
||||
return {
|
||||
"candidates": result,
|
||||
"timed_out": False,
|
||||
"processed": idx,
|
||||
}
|
||||
return {
|
||||
"candidates": result,
|
||||
"timed_out": False,
|
||||
"processed": last_idx,
|
||||
}
|
||||
|
||||
def _finalize_on_timeout(self, partial_candidates, prev_ball, min_survivors, max_survivors):
|
||||
chosen = list(partial_candidates)
|
||||
if len(chosen) > max_survivors:
|
||||
chosen = self._rank_and_trim(chosen, prev_ball, max_survivors)
|
||||
elif len(chosen) < min_survivors:
|
||||
fill = self._fallback_candidates_from_prev(
|
||||
prev_ball,
|
||||
min_survivors - len(chosen),
|
||||
exclude=set(tuple(x) for x in chosen),
|
||||
)
|
||||
chosen.extend(fill)
|
||||
return chosen
|
||||
|
||||
def _rank_and_trim(self, candidates, prev_ball, limit):
|
||||
scored = [(self._score_candidate(ball, prev_ball), ball) for ball in candidates]
|
||||
scored.sort(key=lambda x: x[0])
|
||||
return [ball for _, ball in scored[:limit]]
|
||||
|
||||
def _score_candidate(self, ball, prev_ball):
|
||||
sum_diff = abs(sum(ball) - sum(prev_ball))
|
||||
even_cnt = len([x for x in ball if x % 2 == 0])
|
||||
uniq_last = len(set([x % 10 for x in ball]))
|
||||
contiguous_penalty = 0
|
||||
s = sorted(ball)
|
||||
for i in range(1, len(s)):
|
||||
if s[i] - s[i - 1] == 1:
|
||||
contiguous_penalty += 1
|
||||
score = 0
|
||||
score += sum_diff
|
||||
score += abs(even_cnt - 3) * 2
|
||||
score += abs(uniq_last - 5) * 2
|
||||
score += contiguous_penalty
|
||||
return score
|
||||
|
||||
def _fallback_candidates_from_prev(self, prev_ball, need_count, exclude=None):
|
||||
exclude = exclude or set()
|
||||
seed = sorted(prev_ball)
|
||||
out = []
|
||||
delta_patterns = [
|
||||
(0, 0, 0, 0, 0, 0),
|
||||
(-1, 0, 0, 0, 0, 1),
|
||||
(0, -1, 0, 0, 1, 0),
|
||||
(0, 0, -1, 1, 0, 0),
|
||||
(-2, 0, 0, 0, 0, 2),
|
||||
(0, -2, 0, 0, 2, 0),
|
||||
(0, 0, -2, 2, 0, 0),
|
||||
(-1, -1, 0, 0, 1, 1),
|
||||
(1, 0, -1, 0, 0, 0),
|
||||
(0, 1, 0, -1, 0, 0),
|
||||
(1, -1, 1, -1, 1, -1),
|
||||
(-1, 1, -1, 1, -1, 1),
|
||||
]
|
||||
shift = 0
|
||||
while len(out) < need_count and shift <= 8:
|
||||
for delta in delta_patterns:
|
||||
cand = [seed[i] + delta[i] for i in range(6)]
|
||||
cand = [min(45, max(1, v + shift)) for v in cand]
|
||||
cand = sorted(cand)
|
||||
if len(set(cand)) != 6:
|
||||
continue
|
||||
t = tuple(cand)
|
||||
if t in exclude:
|
||||
continue
|
||||
exclude.add(t)
|
||||
out.append(cand)
|
||||
if len(out) >= need_count:
|
||||
break
|
||||
shift += 1
|
||||
return out
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
today = datetime.today()
|
||||
if today.weekday() == 5:
|
||||
if today.hour > 20:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
elif today.weekday() == 6:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
|
||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
||||
ymd = this_weekend.strftime('%Y%m%d')
|
||||
|
||||
print("ymd: {}".format(ymd))
|
||||
|
||||
# 로또 예측
|
||||
practice = Practice(resources_path)
|
||||
|
||||
# 데이터 수집
|
||||
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
|
||||
lottoHistoryFileName = lottoHistoryFile + '.json'
|
||||
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if line != '\n':
|
||||
last_json = json.loads(line)
|
||||
|
||||
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
|
||||
|
||||
result_json = {ymd: []}
|
||||
|
||||
# 매주 고정
|
||||
practice.predict1(result_json[ymd])
|
||||
# 필터 기반 예측
|
||||
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
|
||||
|
||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
||||
for i, ball in enumerate(result_json[ymd]):
|
||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
||||
if (i+1) % 100 == 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
p_str = ''
|
||||
|
||||
if len(result_json[ymd]) % 100 != 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
|
||||
size = len(result_json[ymd])
|
||||
print("size: {}".format(size))
|
||||
|
||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
||||
|
||||
print("done...")
|
||||
@@ -1,189 +0,0 @@
|
||||
# 웹 호출 라이브러리를 호출합니다.
|
||||
import time
|
||||
import requests
|
||||
from DataCrawler import DataCrawler
|
||||
|
||||
import json
|
||||
import os
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from datetime import datetime, timedelta
|
||||
from TelegramBot import TelegramBot
|
||||
|
||||
from filter_model_3 import BallFilter
|
||||
|
||||
class Practice:
|
||||
|
||||
bot = None
|
||||
preprocessor = None
|
||||
predictor = None
|
||||
|
||||
extract_count = None
|
||||
|
||||
def __init__(self, resources_path):
|
||||
self.bot = TelegramBot()
|
||||
|
||||
return
|
||||
|
||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
||||
def craw(self, lottoHistoryFile, drwNo=None):
|
||||
|
||||
ball = None
|
||||
if drwNo != None:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
||||
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
|
||||
if result['returnValue'] != 'success':
|
||||
return None
|
||||
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
else:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
||||
|
||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
||||
idx = 1
|
||||
while True:
|
||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
if result['returnValue'] != 'success':
|
||||
break
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
idx += 1
|
||||
time.sleep(0.5)
|
||||
# 저장한 파일을 종료합니다.
|
||||
jsonFp.close()
|
||||
textFp.close()
|
||||
|
||||
return ball
|
||||
|
||||
def predict1(self, result_json):
|
||||
result_json.append([6, 7, 10, 11, 20, 45])
|
||||
result_json.append([2, 7, 17, 28, 35, 39])
|
||||
result_json.append([6, 10, 19, 25, 33, 35])
|
||||
result_json.append([3, 17, 20, 24, 35, 45])
|
||||
result_json.append([5, 15, 18, 29, 36, 41])
|
||||
result_json.append([6, 15, 20, 23, 37, 43])
|
||||
result_json.append([8, 15, 19, 23, 38, 41])
|
||||
result_json.append([5, 11, 19, 24, 40, 45])
|
||||
result_json.append([9, 16, 18, 23, 35, 43])
|
||||
result_json.append([7, 13, 19, 28, 33, 44])
|
||||
result_json.append([7, 11, 18, 29, 37, 42])
|
||||
return
|
||||
|
||||
def predict2(self, resources_path, ymd, result_json):
|
||||
|
||||
candidates = [i for i in range(1, 46)]
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
ballFilter = BallFilter(lottoHistoryFileName)
|
||||
no = ballFilter.getNextNo(ymd)
|
||||
print("회차: {}".format(no))
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
|
||||
nCr = list(itertools.combinations(candidates, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
ball = list(ball)
|
||||
|
||||
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if 0 < filter_size:
|
||||
continue
|
||||
|
||||
result_json.append(ball)
|
||||
|
||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
||||
p_no = p_ball[0]
|
||||
p_ball = p_ball[1:7]
|
||||
|
||||
return p_no, p_ball
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
today = datetime.today()
|
||||
if today.weekday() == 5:
|
||||
if today.hour > 20:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
elif today.weekday() == 6:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
|
||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
||||
ymd = this_weekend.strftime('%Y%m%d')
|
||||
|
||||
print("ymd: {}".format(ymd))
|
||||
|
||||
# 로또 예측
|
||||
practice = Practice(resources_path)
|
||||
|
||||
# 데이터 수집
|
||||
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
|
||||
lottoHistoryFileName = lottoHistoryFile + '.json'
|
||||
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if line != '\n':
|
||||
last_json = json.loads(line)
|
||||
|
||||
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
|
||||
|
||||
result_json = {ymd: []}
|
||||
|
||||
# 매주 고정
|
||||
practice.predict1(result_json[ymd])
|
||||
# 필터 기반 예측
|
||||
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
|
||||
|
||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
||||
for i, ball in enumerate(result_json[ymd]):
|
||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
||||
if (i+1) % 100 == 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
p_str = ''
|
||||
|
||||
if len(result_json[ymd]) % 100 != 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
|
||||
size = len(result_json[ymd])
|
||||
print("size: {}".format(size))
|
||||
|
||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
||||
|
||||
print("done...")
|
||||
@@ -1216,3 +1216,14 @@
|
||||
{"returnValue": "success", "drwNoDate": "2026-03-21", "drwNo": 1216, "drwtNo1": 3, "drwtNo2": 10, "drwtNo3": 14, "drwtNo4": 15, "drwtNo5": 23, "drwtNo6": 24, "bnusNo": 25}
|
||||
{"returnValue": "success", "drwNoDate": "2026-03-28", "drwNo": 1217, "drwtNo1": 8, "drwtNo2": 10, "drwtNo3": 15, "drwtNo4": 20, "drwtNo5": 29, "drwtNo6": 31, "bnusNo": 41}
|
||||
{"returnValue": "success", "drwNoDate": "2026-04-04", "drwNo": 1218, "drwtNo1": 3, "drwtNo2": 28, "drwtNo3": 31, "drwtNo4": 32, "drwtNo5": 42, "drwtNo6": 45, "bnusNo": 25}
|
||||
{"returnValue": "success", "drwNoDate": "2026-04-11", "drwNo": 1219, "drwtNo1": 1, "drwtNo2": 2, "drwtNo3": 15, "drwtNo4": 28, "drwtNo5": 39, "drwtNo6": 45, "bnusNo": 31}
|
||||
{"returnValue": "success", "drwNoDate": "2026-04-18", "drwNo": 1220, "drwtNo1": 2, "drwtNo2": 22, "drwtNo3": 25, "drwtNo4": 28, "drwtNo5": 34, "drwtNo6": 43, "bnusNo": 16}
|
||||
{"returnValue": "success", "drwNoDate": "2026-04-25", "drwNo": 1221, "drwtNo1": 6, "drwtNo2": 13, "drwtNo3": 18, "drwtNo4": 28, "drwtNo5": 30, "drwtNo6": 36, "bnusNo": 9}
|
||||
{"returnValue": "success", "drwNoDate": "2026-05-02", "drwNo": 1222, "drwtNo1": 4, "drwtNo2": 11, "drwtNo3": 17, "drwtNo4": 22, "drwtNo5": 32, "drwtNo6": 41, "bnusNo": 34}
|
||||
{"returnValue": "success", "drwNoDate": "2026-05-09", "drwNo": 1223, "drwtNo1": 16, "drwtNo2": 18, "drwtNo3": 20, "drwtNo4": 32, "drwtNo5": 33, "drwtNo6": 39, "bnusNo": 26}
|
||||
{"returnValue": "success", "drwNoDate": "2026-05-16", "drwNo": 1224, "drwtNo1": 9, "drwtNo2": 18, "drwtNo3": 21, "drwtNo4": 27, "drwtNo5": 44, "drwtNo6": 45, "bnusNo": 28}
|
||||
{"returnValue": "success", "drwNoDate": "2026-05-23", "drwNo": 1225, "drwtNo1": 8, "drwtNo2": 9, "drwtNo3": 19, "drwtNo4": 25, "drwtNo5": 41, "drwtNo6": 42, "bnusNo": 33}
|
||||
{"returnValue": "success", "drwNoDate": "2026-05-30", "drwNo": 1226, "drwtNo1": 4, "drwtNo2": 6, "drwtNo3": 13, "drwtNo4": 17, "drwtNo5": 26, "drwtNo6": 28, "bnusNo": 41}
|
||||
{"returnValue": "success", "drwNoDate": "2026-06-06", "drwNo": 1227, "drwtNo1": 1, "drwtNo2": 14, "drwtNo3": 16, "drwtNo4": 34, "drwtNo5": 41, "drwtNo6": 44, "bnusNo": 13}
|
||||
{"returnValue": "success", "drwNoDate": "2026-06-13", "drwNo": 1228, "drwtNo1": 24, "drwtNo2": 29, "drwtNo3": 30, "drwtNo4": 31, "drwtNo5": 35, "drwtNo6": 44, "bnusNo": 1}
|
||||
{"returnValue": "success", "drwNoDate": "2026-06-20", "drwNo": 1229, "drwtNo1": 12, "drwtNo2": 13, "drwtNo3": 29, "drwtNo4": 34, "drwtNo5": 37, "drwtNo6": 42, "bnusNo": 16}
|
||||
|
||||
@@ -1204,7 +1204,7 @@
|
||||
1204,8,16,28,30,31,44,27
|
||||
1205,1,4,16,23,31,41,2
|
||||
1206,1,3,17,26,27,42,23
|
||||
1207,10,22,24,27,38,45,11
|
||||
1207,10,22,24,27,38,45,21
|
||||
1208,6,27,30,36,38,42,25
|
||||
1209,2,17,20,35,37,39,24
|
||||
1210,1,7,9,17,27,38,31
|
||||
@@ -1216,3 +1216,14 @@
|
||||
1216,3,10,14,15,23,24,25
|
||||
1217,8,10,15,20,29,31,41
|
||||
1218,3,28,31,32,42,45,25
|
||||
1219,1,2,15,28,39,45,31
|
||||
1220,2,22,25,28,34,43,16
|
||||
1221,6,13,18,28,30,36,9
|
||||
1222,4,11,17,22,32,41,34
|
||||
1223,16,18,20,32,33,39,26
|
||||
1224,9,18,21,27,44,45,28
|
||||
1225,8,9,19,25,41,42,33
|
||||
1226,4,6,13,17,26,28,41
|
||||
1227,1,14,16,34,41,44,13
|
||||
1228,24,29,30,31,35,44,1
|
||||
1229,12,13,29,34,37,42,16
|
||||
|
||||
99
review_1.py
99
review_1.py
@@ -1,99 +0,0 @@
|
||||
import os
|
||||
import time
|
||||
import datetime
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_1 import BallFilter
|
||||
|
||||
class FilterTestReview:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path):
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName)
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
|
||||
win_history = {}
|
||||
win_history_size = {}
|
||||
|
||||
for no in nos:
|
||||
|
||||
print("[{} 회차]".format(no))
|
||||
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = balls[1:7].copy() # copy()로 복사
|
||||
bonus = balls[7]
|
||||
|
||||
final_candidates = []
|
||||
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
ball = list(ball)
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if 0 < filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
match = len(set(ball) & set(answer))
|
||||
if match == 6:
|
||||
if no not in win_history: # 중복 방지
|
||||
win_history[no] = answer.copy() # copy()로 복사
|
||||
if ball not in win_dic[1]: # 같은 조합 중복 방지
|
||||
win_dic[1].append(ball.copy()) # copy()로 복사
|
||||
|
||||
else:
|
||||
if match == 3:
|
||||
win_dic[5].append(ball)
|
||||
elif match == 4:
|
||||
win_dic[4].append(ball)
|
||||
elif match == 5:
|
||||
# 2등 판별: 5개 맞고 보너스 번호 포함
|
||||
if bonus in ball:
|
||||
win_dic[2].append(ball)
|
||||
else:
|
||||
win_dic[3].append(ball)
|
||||
|
||||
win_history_size[no] = len(final_candidates)
|
||||
|
||||
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
|
||||
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
|
||||
|
||||
return win_history, win_history_size
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filterTestReview = FilterTestReview(resources_path)
|
||||
|
||||
start = time.time()
|
||||
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
|
||||
win_history, win_history_size = filterTestReview.validate(
|
||||
df_ball,
|
||||
#nos=range(1126, 21, -1),
|
||||
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print("{} 회 당첨".format(len(win_history)))
|
||||
sorted_win_history = sorted(win_history.keys())
|
||||
for i in range(len(sorted_win_history)):
|
||||
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))
|
||||
99
review_2.py
99
review_2.py
@@ -1,99 +0,0 @@
|
||||
import os
|
||||
import time
|
||||
import datetime
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_2 import BallFilter
|
||||
|
||||
class FilterTestReview:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path):
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName)
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
|
||||
win_history = {}
|
||||
win_history_size = {}
|
||||
|
||||
for no in nos:
|
||||
|
||||
print("[{} 회차]".format(no))
|
||||
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = balls[1:7].copy() # copy()로 복사
|
||||
bonus = balls[7]
|
||||
|
||||
final_candidates = []
|
||||
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
ball = list(ball)
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if 0 < filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
match = len(set(ball) & set(answer))
|
||||
if match == 6:
|
||||
if no not in win_history: # 중복 방지
|
||||
win_history[no] = answer.copy() # copy()로 복사
|
||||
if ball not in win_dic[1]: # 같은 조합 중복 방지
|
||||
win_dic[1].append(ball.copy()) # copy()로 복사
|
||||
|
||||
else:
|
||||
if match == 3:
|
||||
win_dic[5].append(ball)
|
||||
elif match == 4:
|
||||
win_dic[4].append(ball)
|
||||
elif match == 5:
|
||||
# 2등 판별: 5개 맞고 보너스 번호 포함
|
||||
if bonus in ball:
|
||||
win_dic[2].append(ball)
|
||||
else:
|
||||
win_dic[3].append(ball)
|
||||
|
||||
win_history_size[no] = len(final_candidates)
|
||||
|
||||
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
|
||||
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
|
||||
|
||||
return win_history, win_history_size
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filterTestReview = FilterTestReview(resources_path)
|
||||
|
||||
start = time.time()
|
||||
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
|
||||
win_history, win_history_size = filterTestReview.validate(
|
||||
df_ball,
|
||||
#nos=range(1126, 21, -1),
|
||||
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print("{} 회 당첨".format(len(win_history)))
|
||||
sorted_win_history = sorted(win_history.keys())
|
||||
for i in range(len(sorted_win_history)):
|
||||
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))
|
||||
99
review_3.py
99
review_3.py
@@ -1,99 +0,0 @@
|
||||
import os
|
||||
import time
|
||||
import datetime
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_3 import BallFilter
|
||||
|
||||
class FilterTestReview:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path):
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName)
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
|
||||
win_history = {}
|
||||
win_history_size = {}
|
||||
|
||||
for no in nos:
|
||||
|
||||
print("[{} 회차]".format(no))
|
||||
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = balls[1:7].copy() # copy()로 복사
|
||||
bonus = balls[7]
|
||||
|
||||
final_candidates = []
|
||||
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
ball = list(ball)
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if 0 < filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
match = len(set(ball) & set(answer))
|
||||
if match == 6:
|
||||
if no not in win_history: # 중복 방지
|
||||
win_history[no] = answer.copy() # copy()로 복사
|
||||
if ball not in win_dic[1]: # 같은 조합 중복 방지
|
||||
win_dic[1].append(ball.copy()) # copy()로 복사
|
||||
|
||||
else:
|
||||
if match == 3:
|
||||
win_dic[5].append(ball)
|
||||
elif match == 4:
|
||||
win_dic[4].append(ball)
|
||||
elif match == 5:
|
||||
# 2등 판별: 5개 맞고 보너스 번호 포함
|
||||
if bonus in ball:
|
||||
win_dic[2].append(ball)
|
||||
else:
|
||||
win_dic[3].append(ball)
|
||||
|
||||
win_history_size[no] = len(final_candidates)
|
||||
|
||||
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
|
||||
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
|
||||
|
||||
return win_history, win_history_size
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filterTestReview = FilterTestReview(resources_path)
|
||||
|
||||
start = time.time()
|
||||
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
|
||||
win_history, win_history_size = filterTestReview.validate(
|
||||
df_ball,
|
||||
#nos=range(1126, 21, -1),
|
||||
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print("{} 회 당첨".format(len(win_history)))
|
||||
sorted_win_history = sorted(win_history.keys())
|
||||
for i in range(len(sorted_win_history)):
|
||||
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))
|
||||
@@ -1,17 +1,9 @@
|
||||
#!/usr/bin/env bash
|
||||
# miniconda 환경 ncue에서 Python으로 인자 실행: ./scripts/run_with_ncue.sh final_filterTest.py
|
||||
set -euo pipefail
|
||||
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
cd "$ROOT"
|
||||
for base in "${MINICONDA_HOME:-}" "$HOME/miniconda3" "$HOME/miniforge3" "$HOME/anaconda3" "$HOME/mambaforge"; do
|
||||
[ -n "$base" ] || continue
|
||||
c="$base/bin/conda"
|
||||
if [ -x "$c" ]; then
|
||||
exec "$c" run -n ncue -- python "$@"
|
||||
fi
|
||||
done
|
||||
if [ -n "${CONDA_EXE:-}" ] && [ -x "$CONDA_EXE" ]; then
|
||||
exec "$CONDA_EXE" run -n ncue -- python "$@"
|
||||
fi
|
||||
echo "conda ncue 환경을 찾지 못했습니다. 터미널에서: conda activate ncue && python \"\$@\"" >&2
|
||||
exit 1
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
export PATH="${HOME}/miniconda3/bin:${HOME}/anaconda3/bin:/opt/anaconda3/bin:${PATH}"
|
||||
source "$(conda info --base)/etc/profile.d/conda.sh"
|
||||
conda activate ncue
|
||||
cd "${REPO_ROOT}"
|
||||
exec python "$@"
|
||||
|
||||
236
test_1.py
236
test_1.py
@@ -1,236 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_1 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
||||
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
|
||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
|
||||
for i in range(len(df_ball) - 1, -1, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
no = int(no)
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
total = max(0, end_no - start_no + 1)
|
||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=1001)
|
||||
parser.add_argument("--end-no", type=int, default=1204)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
236
test_2.py
236
test_2.py
@@ -1,236 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_2 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
||||
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
|
||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
|
||||
for i in range(len(df_ball) - 1, -1, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
no = int(no)
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
total = max(0, end_no - start_no + 1)
|
||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=1001)
|
||||
parser.add_argument("--end-no", type=int, default=1204)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
236
test_3.py
236
test_3.py
@@ -1,236 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_3 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
||||
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
|
||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
|
||||
for i in range(len(df_ball) - 1, -1, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
no = int(no)
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
total = max(0, end_no - start_no + 1)
|
||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=1001)
|
||||
parser.add_argument("--end-no", type=int, default=1204)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
@@ -1,405 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
학습 구간(1~800회) 당첨번호로 final_BallFilter.extract_final_candidates 에 쓸 허용 집합을 계산합니다.
|
||||
표준 라이브러리 + pandas(df 호환)만 사용합니다.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
HISTORY = ROOT / "resources" / "lotto_history.txt"
|
||||
BALLFILTER_SRC = ROOT / "BallFilter_25.py"
|
||||
OUT = ROOT / "final_filter_params.py"
|
||||
|
||||
TRAIN_LO = 1
|
||||
TRAIN_HI = 800
|
||||
|
||||
# 학습 분포에서 너무 넓은 합집합(union)을 피하기 위해 고유값 기준 백분위 밴드 후,
|
||||
# 각 회차 특성값이 밴드 밖이면 해당 값을 다시 포함(학습 당첨 100% 커버).
|
||||
# 좁을수록 필터가 강해짐. 학습·검증 균형은 이 값과 final_filterTest.py 결과로 조정.
|
||||
PCT_LO = 8
|
||||
PCT_HI = 92
|
||||
|
||||
PRIME = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43}
|
||||
COMPOSITE = {4, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45}
|
||||
|
||||
|
||||
def load_draws():
|
||||
rows = []
|
||||
with open(HISTORY, newline="", encoding="utf-8") as f:
|
||||
for p in csv.reader(f):
|
||||
if not p:
|
||||
continue
|
||||
no = int(p[0])
|
||||
balls = sorted(int(x) for x in p[1:7])
|
||||
rows.append((no, balls))
|
||||
rows.sort(key=lambda x: x[0])
|
||||
return {no: b for no, b in rows}
|
||||
|
||||
|
||||
def get_ac(ball):
|
||||
ac = set()
|
||||
for i in range(5, -1, -1):
|
||||
for j in range(i - 1, -1, -1):
|
||||
ac.add(ball[i] - ball[j])
|
||||
return len(ac) - (6 - 1)
|
||||
|
||||
|
||||
def interval_sum(ball):
|
||||
return sum(ball[i] - ball[i - 1] for i in range(1, 6))
|
||||
|
||||
|
||||
def first_letter_sum(ball):
|
||||
acc = [str(b)[0] for b in ball if len(str(b)) == 2]
|
||||
return sum(int(x) for x in acc)
|
||||
|
||||
|
||||
def last_letter_sum(ball):
|
||||
acc = [str(b)[1] for b in ball if len(str(b)) == 2] + [str(b) for b in ball if len(str(b)) == 1]
|
||||
return sum(int(x) for x in acc)
|
||||
|
||||
|
||||
def uniq_end_digits(ball):
|
||||
return len({b % 10 for b in ball})
|
||||
|
||||
|
||||
def high_low(ball):
|
||||
low = sum(1 for b in ball if b < 23)
|
||||
high = sum(1 for b in ball if 23 < b)
|
||||
return low, high
|
||||
|
||||
|
||||
def section10_count(ball):
|
||||
section = set()
|
||||
for b in ball:
|
||||
section.add(int(b / 10))
|
||||
return len(section)
|
||||
|
||||
|
||||
def count_mult(ball, m):
|
||||
return sum(1 for b in ball if b % m == 0)
|
||||
|
||||
|
||||
def continus_max(ball):
|
||||
w = ball
|
||||
best = 1
|
||||
run = 1
|
||||
for i in range(1, 6):
|
||||
if w[i] == w[i - 1] + 1:
|
||||
run += 1
|
||||
best = max(best, run)
|
||||
else:
|
||||
run = 1
|
||||
return best
|
||||
|
||||
|
||||
def weeks_freq(draws_map, answer, no, week):
|
||||
s = set()
|
||||
for w in range(1, week + 1):
|
||||
prev_no = no - w
|
||||
if prev_no not in draws_map:
|
||||
continue
|
||||
for b in draws_map[prev_no]:
|
||||
s.add(b)
|
||||
return sum(1 for b in answer if b in s)
|
||||
|
||||
|
||||
def pct_band_unique(values, lo=PCT_LO, hi=PCT_HI):
|
||||
"""고유값 정렬 후 백분위 구간에 들어가는 값만 유지. 고유 개수가 적으면 전부 유지."""
|
||||
if not values:
|
||||
return set()
|
||||
u = sorted(set(values))
|
||||
if len(u) <= 6:
|
||||
return set(u)
|
||||
n = len(u)
|
||||
il = int((lo / 100.0) * (n - 1))
|
||||
ih = int((hi / 100.0) * (n - 1))
|
||||
low_b, high_b = u[il], u[ih]
|
||||
return {x for x in u if low_b <= x <= high_b}
|
||||
|
||||
|
||||
def parse_pair_triple_rules():
|
||||
"""BallFilter_25.filterPairBall / filterTriplePairBall 에서 규칙 추출."""
|
||||
text = BALLFILTER_SRC.read_text(encoding="utf-8")
|
||||
pairs = []
|
||||
for m in re.finditer(r"len\(set_ball & \{([^}]+)\}\) == 2", text):
|
||||
parts = [int(x.strip()) for x in m.group(1).split(",")]
|
||||
if len(parts) == 2:
|
||||
pairs.append(frozenset(parts))
|
||||
triples = []
|
||||
for m in re.finditer(r"len\(set_ball & \{([^}]+)\}\) == 3", text):
|
||||
parts = [int(x.strip()) for x in m.group(1).split(",")]
|
||||
if len(parts) == 3:
|
||||
triples.append(frozenset(parts))
|
||||
return pairs, triples
|
||||
|
||||
|
||||
def main():
|
||||
draws = load_draws()
|
||||
pair_rules, triple_rules = parse_pair_triple_rules()
|
||||
|
||||
train_draws = {n: draws[n] for n in range(TRAIN_LO, TRAIN_HI + 1) if n in draws}
|
||||
|
||||
# 블랙리스트: 학습 당첨 6개에 함께 등장한 쌍/삼은 제외(당첨을 막지 않음)
|
||||
train_pairs_seen = set()
|
||||
train_triples_seen = set()
|
||||
for b in train_draws.values():
|
||||
for i in range(6):
|
||||
for j in range(i + 1, 6):
|
||||
train_pairs_seen.add(frozenset((b[i], b[j])))
|
||||
for i in range(6):
|
||||
for j in range(i + 1, 6):
|
||||
for k in range(j + 1, 6):
|
||||
train_triples_seen.add(frozenset((b[i], b[j], b[k])))
|
||||
|
||||
pair_block = [p for p in pair_rules if p not in train_pairs_seen]
|
||||
triple_block = [t for t in triple_rules if t not in train_triples_seen]
|
||||
|
||||
sets = defaultdict(set)
|
||||
flags_prev = {"need_relax_previous": False, "need_relax_prev7": False}
|
||||
|
||||
for no in range(2, TRAIN_HI + 1):
|
||||
if no not in draws or (no - 1) not in draws:
|
||||
continue
|
||||
ball = draws[no]
|
||||
p_ball = draws[no - 1]
|
||||
|
||||
s = sum(ball)
|
||||
sets["sum6"].add(s)
|
||||
sets["sum6_diff"].add(abs(s - sum(p_ball)))
|
||||
|
||||
avg = s // 6
|
||||
pavg = sum(p_ball) // 6
|
||||
sets["avg6"].add(avg)
|
||||
sets["avg6_diff"].add(abs(avg - pavg))
|
||||
|
||||
s3f = ball[0] + ball[1] + ball[2]
|
||||
ps3f = p_ball[0] + p_ball[1] + p_ball[2]
|
||||
sets["sum3f"].add(s3f)
|
||||
sets["sum3f_diff"].add(abs(s3f - ps3f))
|
||||
|
||||
s3b = ball[3] + ball[4] + ball[5]
|
||||
ps3b = p_ball[3] + p_ball[4] + p_ball[5]
|
||||
sets["sum3b"].add(s3b)
|
||||
sets["sum3b_diff"].add(abs(s3b - ps3b))
|
||||
|
||||
l, h = high_low(ball)
|
||||
sets["hl_allowed"].add((l, h))
|
||||
|
||||
gh = ball[0] + ball[5]
|
||||
pgh = p_ball[0] + p_ball[5]
|
||||
sets["go_sum"].add(gh)
|
||||
sets["go_sum_diff"].add(abs(gh - pgh))
|
||||
|
||||
iv = interval_sum(ball)
|
||||
piv = interval_sum(p_ball)
|
||||
sets["interval"].add(iv)
|
||||
sets["interval_diff"].add(abs(iv - piv))
|
||||
|
||||
fl = first_letter_sum(ball)
|
||||
pfl = first_letter_sum(p_ball)
|
||||
sets["first_letter"].add(fl)
|
||||
sets["first_letter_diff"].add(abs(fl - pfl))
|
||||
|
||||
ll = last_letter_sum(ball)
|
||||
pll = last_letter_sum(p_ball)
|
||||
sets["last_letter"].add(ll)
|
||||
sets["last_letter_diff"].add(abs(ll - pll))
|
||||
|
||||
sets["b0"].add(ball[0])
|
||||
sets["b0_diff"].add(abs(ball[0] - p_ball[0]))
|
||||
sets["b5"].add(ball[5])
|
||||
sets["b5_diff"].add(abs(ball[5] - p_ball[5]))
|
||||
|
||||
sets["uniq_end"].add(uniq_end_digits(ball))
|
||||
sets["uniq_end_diff"].add(abs(uniq_end_digits(ball) - uniq_end_digits(p_ball)))
|
||||
|
||||
ac = get_ac(ball)
|
||||
pac = get_ac(p_ball)
|
||||
sets["ac"].add(ac)
|
||||
sets["ac_diff"].add(abs(ac - pac))
|
||||
|
||||
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
|
||||
sets[f"mul{m}"].add(count_mult(ball, m))
|
||||
sets[f"mul{m}_diff"].add(abs(count_mult(ball, m) - count_mult(p_ball, m)))
|
||||
|
||||
pn = len(set(ball) & PRIME)
|
||||
sets["prime_n"].add(pn)
|
||||
|
||||
cn = len(set(ball) & COMPOSITE)
|
||||
sets["composite_n"].add(cn)
|
||||
sets["composite_diff"].add(abs(cn - len(set(p_ball) & COMPOSITE)))
|
||||
|
||||
ev = sum(1 for b in ball if b % 2 == 0)
|
||||
pev = sum(1 for b in p_ball if b % 2 == 0)
|
||||
sets["even_n"].add(ev)
|
||||
sets["even_diff"].add(abs(ev - pev))
|
||||
|
||||
sc = section10_count(ball)
|
||||
psc = section10_count(p_ball)
|
||||
sets["sec10"].add(sc)
|
||||
sets["sec10_diff"].add(abs(sc - psc))
|
||||
|
||||
for wk in (8, 12, 16, 20):
|
||||
ex = weeks_freq(draws, ball, no, wk)
|
||||
pex = weeks_freq(draws, p_ball, no, wk)
|
||||
sets[f"w{wk}"].add(ex)
|
||||
sets[f"w{wk}_diff"].add(abs(ex - pex))
|
||||
|
||||
sets["continus_max"].add(continus_max(ball))
|
||||
|
||||
# filterPreviousNumber (원본과 동일)
|
||||
pb_set = set(p_ball)
|
||||
bad_prev = True
|
||||
for i in range(6):
|
||||
bi = ball[i]
|
||||
if bi in pb_set or bi - 1 in pb_set or bi + 1 in pb_set:
|
||||
bad_prev = False
|
||||
break
|
||||
if bad_prev:
|
||||
flags_prev["need_relax_previous"] = True
|
||||
|
||||
# filterAllPreivous7
|
||||
pb7 = set()
|
||||
for i in range(no - 1, no - 8, -1):
|
||||
if i in draws:
|
||||
for x in draws[i]:
|
||||
pb7.add(x)
|
||||
if len(set(ball) & pb7) == 6:
|
||||
flags_prev["need_relax_prev7"] = True
|
||||
|
||||
# 백분위로 타이트닝 후 학습 각 회차 특성 보강
|
||||
keys_numeric = [
|
||||
"sum6",
|
||||
"sum6_diff",
|
||||
"avg6",
|
||||
"avg6_diff",
|
||||
"sum3f",
|
||||
"sum3f_diff",
|
||||
"sum3b",
|
||||
"sum3b_diff",
|
||||
"go_sum",
|
||||
"go_sum_diff",
|
||||
"interval",
|
||||
"interval_diff",
|
||||
"first_letter",
|
||||
"first_letter_diff",
|
||||
"last_letter",
|
||||
"last_letter_diff",
|
||||
"b0",
|
||||
"b0_diff",
|
||||
"b5",
|
||||
"b5_diff",
|
||||
"uniq_end",
|
||||
"uniq_end_diff",
|
||||
"ac",
|
||||
"ac_diff",
|
||||
"prime_n",
|
||||
"composite_n",
|
||||
"composite_diff",
|
||||
"even_n",
|
||||
"even_diff",
|
||||
"sec10",
|
||||
"sec10_diff",
|
||||
]
|
||||
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
|
||||
keys_numeric.extend([f"mul{m}", f"mul{m}_diff"])
|
||||
for wk in (8, 12, 16, 20):
|
||||
keys_numeric.extend([f"w{wk}", f"w{wk}_diff"])
|
||||
keys_numeric.append("continus_max")
|
||||
|
||||
for k in keys_numeric:
|
||||
sets[k] = pct_band_unique(sets[k])
|
||||
|
||||
# 고저: (0,1)/(1,0) 만 제외하는 기존 로직 유지 + 학습에 나온 (l,h) 항상 허용
|
||||
hl_skip = {(l, h) for l in (0, 1) for h in (0, 1)}
|
||||
|
||||
def emit():
|
||||
lines = [
|
||||
"# -*- coding: utf-8 -*-",
|
||||
'"""학습 구간 {}~{}회 기준 자동 생성 — tools/compute_final_filter_params.py"""'.format(
|
||||
TRAIN_LO, TRAIN_HI
|
||||
),
|
||||
"",
|
||||
"TRAIN_RANGE = ({}, {})".format(TRAIN_LO, TRAIN_HI),
|
||||
"DISABLE_FILTER_PREVIOUS_NUMBER = {}".format(
|
||||
str(flags_prev["need_relax_previous"])
|
||||
),
|
||||
"DISABLE_FILTER_ALL_PREVIOUS_7 = {}".format(str(flags_prev["need_relax_prev7"])),
|
||||
"",
|
||||
]
|
||||
|
||||
def sset(name, key):
|
||||
v = sets[key]
|
||||
lines.append("{} = {}".format(name, repr(sorted(v))))
|
||||
|
||||
sset("ALLOW_SUM6", "sum6")
|
||||
sset("ALLOW_SUM6_DIFF", "sum6_diff")
|
||||
sset("ALLOW_AVG6", "avg6")
|
||||
sset("ALLOW_AVG6_DIFF", "avg6_diff")
|
||||
sset("ALLOW_SUM3F", "sum3f")
|
||||
sset("ALLOW_SUM3F_DIFF", "sum3f_diff")
|
||||
sset("ALLOW_SUM3B", "sum3b")
|
||||
sset("ALLOW_SUM3B_DIFF", "sum3b_diff")
|
||||
lines.append("HL_SKIP = {}".format(repr(sorted(hl_skip))))
|
||||
lines.append("HL_SEEN = {}".format(repr(sorted(sets['hl_allowed']))))
|
||||
sset("ALLOW_GO_SUM", "go_sum")
|
||||
sset("ALLOW_GO_SUM_DIFF", "go_sum_diff")
|
||||
sset("ALLOW_INTERVAL", "interval")
|
||||
sset("ALLOW_INTERVAL_DIFF", "interval_diff")
|
||||
sset("ALLOW_FIRST_LETTER", "first_letter")
|
||||
sset("ALLOW_FIRST_LETTER_DIFF", "first_letter_diff")
|
||||
sset("ALLOW_LAST_LETTER", "last_letter")
|
||||
sset("ALLOW_LAST_LETTER_DIFF", "last_letter_diff")
|
||||
sset("ALLOW_B0", "b0")
|
||||
sset("ALLOW_B0_DIFF", "b0_diff")
|
||||
sset("ALLOW_B5", "b5")
|
||||
sset("ALLOW_B5_DIFF", "b5_diff")
|
||||
sset("ALLOW_UNIQ_END", "uniq_end")
|
||||
sset("ALLOW_UNIQ_END_DIFF", "uniq_end_diff")
|
||||
sset("ALLOW_AC", "ac")
|
||||
sset("ALLOW_AC_DIFF", "ac_diff")
|
||||
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
|
||||
sset("ALLOW_MUL{}".format(m), "mul{}".format(m))
|
||||
sset("ALLOW_MUL{}_DIFF".format(m), "mul{}_diff".format(m))
|
||||
sset("ALLOW_PRIME_N", "prime_n")
|
||||
sset("ALLOW_COMPOSITE_N", "composite_n")
|
||||
sset("ALLOW_COMPOSITE_DIFF", "composite_diff")
|
||||
sset("ALLOW_EVEN_N", "even_n")
|
||||
sset("ALLOW_EVEN_DIFF", "even_diff")
|
||||
sset("ALLOW_SEC10", "sec10")
|
||||
sset("ALLOW_SEC10_DIFF", "sec10_diff")
|
||||
for wk in (8, 12, 16, 20):
|
||||
sset("ALLOW_W{}".format(wk), "w{}".format(wk))
|
||||
sset("ALLOW_W{}_DIFF".format(wk), "w{}_diff".format(wk))
|
||||
sset("ALLOW_CONTINUS_MAX", "continus_max")
|
||||
|
||||
lines.append("PAIR_BLOCKLIST = {}".format(repr([sorted(list(x)) for x in pair_block])))
|
||||
lines.append("TRIPLE_BLOCKLIST = {}".format(repr([sorted(list(x)) for x in triple_block])))
|
||||
lines.extend(["", "# frozenset 캐시", ""])
|
||||
allow_names = []
|
||||
for line in list(lines):
|
||||
if line.startswith("ALLOW_") and " = " in line:
|
||||
name = line.split(" = ")[0]
|
||||
allow_names.append(name)
|
||||
for name in allow_names:
|
||||
short = name.replace("ALLOW_", "", 1)
|
||||
lines.append("_F_{} = frozenset({})".format(short, name))
|
||||
lines.append("_F_HL_SEEN = frozenset(HL_SEEN)")
|
||||
lines.append("")
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
OUT.write_text(emit(), encoding="utf-8")
|
||||
print("Wrote", OUT)
|
||||
print("pair rules:", len(pair_rules), "-> block", len(pair_block))
|
||||
print("triple rules:", len(triple_rules), "-> block", len(triple_block))
|
||||
print("DISABLE_FILTER_PREVIOUS_NUMBER", flags_prev["need_relax_previous"])
|
||||
print("DISABLE_FILTER_ALL_PREVIOUS_7", flags_prev["need_relax_prev7"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
231
train_1.py
231
train_1.py
@@ -1,231 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_1 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None):
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
for i in range(len(df_ball)-1, 19, -1):
|
||||
no = df_ball['no'].iloc[i]
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=1)
|
||||
parser.add_argument("--end-no", type=int, default=800)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
231
train_2.py
231
train_2.py
@@ -1,231 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_2 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None):
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
for i in range(len(df_ball)-1, 19, -1):
|
||||
no = df_ball['no'].iloc[i]
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=1)
|
||||
parser.add_argument("--end-no", type=int, default=800)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
231
train_3.py
231
train_3.py
@@ -1,231 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_3 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None):
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
for i in range(len(df_ball)-1, 19, -1):
|
||||
no = df_ball['no'].iloc[i]
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=1)
|
||||
parser.add_argument("--end-no", type=int, default=800)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
234
valid_1.py
234
valid_1.py
@@ -1,234 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_1 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
||||
# validation should use full history for previous-draw/window features
|
||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
# evaluate only requested range, but allow df_ball to contain full history
|
||||
for i in range(len(df_ball) - 1, -1, -1):
|
||||
no = int(df_ball['no'].iloc[i])
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
total = max(0, end_no - start_no + 1)
|
||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=801)
|
||||
parser.add_argument("--end-no", type=int, default=1000)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
234
valid_2.py
234
valid_2.py
@@ -1,234 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_2 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
||||
# validation should use full history for previous-draw/window features
|
||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
# evaluate only requested range, but allow df_ball to contain full history
|
||||
for i in range(len(df_ball) - 1, -1, -1):
|
||||
no = int(df_ball['no'].iloc[i])
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
total = max(0, end_no - start_no + 1)
|
||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=801)
|
||||
parser.add_argument("--end-no", type=int, default=1000)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
234
valid_3.py
234
valid_3.py
@@ -1,234 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_3 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
||||
# validation should use full history for previous-draw/window features
|
||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
# evaluate only requested range, but allow df_ball to contain full history
|
||||
for i in range(len(df_ball) - 1, -1, -1):
|
||||
no = int(df_ball['no'].iloc[i])
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
total = max(0, end_no - start_no + 1)
|
||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=801)
|
||||
parser.add_argument("--end-no", type=int, default=1000)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
Reference in New Issue
Block a user