init
This commit is contained in:
12
PROMPT.txt
12
PROMPT.txt
@@ -1,11 +1,12 @@
|
|||||||
데이터는 다음과 같습니다.
|
데이터는 다음과 같습니다.
|
||||||
(학습 데이터)
|
(학습 데이터)
|
||||||
- train.json, train.txt
|
- lotto_history.txt에서 회차부터 800회차
|
||||||
- 1회차부터 800회차
|
|
||||||
|
|
||||||
(검증 데이터)
|
(검증 데이터)
|
||||||
- valid.json, valid.txt
|
- lotto_history.txt에서 801회차부터 1000회차
|
||||||
- 801회차부터 1000회차
|
|
||||||
|
(테스트 데이터)
|
||||||
|
- lotto_history.txt에서 1001회차부터 이후 모두
|
||||||
|
|
||||||
파일 구조를 먼저 이해하세요.
|
파일 구조를 먼저 이해하세요.
|
||||||
|
|
||||||
@@ -110,3 +111,6 @@
|
|||||||
그리고 요구사항에 대해서 시도 방법을 설계하세요.
|
그리고 요구사항에 대해서 시도 방법을 설계하세요.
|
||||||
그리고 반복적으로 실행해서 최적화된 방법을 찾아서 적용해주세요.
|
그리고 반복적으로 실행해서 최적화된 방법을 찾아서 적용해주세요.
|
||||||
(최적화는 언제든 학습 데이터로 최적화를 해야 합니다. 그리고 검증 데이터로 테스트만 수행하세요.)
|
(최적화는 언제든 학습 데이터로 최적화를 해야 합니다. 그리고 검증 데이터로 테스트만 수행하세요.)
|
||||||
|
|
||||||
|
당첨번호에 대한 추천 개수가 100개 미만이어야 합니다.
|
||||||
|
1_FilterTest_25.py, BallFilter_25.py를 참고해서 최적의 final_filterTest.py, final_BallFilter.py를 작성해 주세요.
|
||||||
4439
filter_model_1.py
4439
filter_model_1.py
File diff suppressed because it is too large
Load Diff
1256
filter_model_2.py
1256
filter_model_2.py
File diff suppressed because it is too large
Load Diff
@@ -1,94 +0,0 @@
|
|||||||
"""
|
|
||||||
filter_model_3.py
|
|
||||||
|
|
||||||
OR-composed BallFilter:
|
|
||||||
- A candidate ball is ACCEPTED if it passes EITHER filter_model_1 OR filter_model_2.
|
|
||||||
- A candidate ball is REJECTED only if it fails BOTH.
|
|
||||||
|
|
||||||
This keeps the same public interface used across the project:
|
|
||||||
BallFilter(lottoHistoryFileName, ruleset_path=..., ruleset=...)
|
|
||||||
.filter(ball, no, until_end=False, df=None, filter_ball=None) -> set[str]
|
|
||||||
.extract_final_candidates(ball, no=None, until_end=False, df=None) -> set[str]
|
|
||||||
|
|
||||||
Notes:
|
|
||||||
- The underlying filters return a non-empty set of failure reasons when rejected.
|
|
||||||
- Callers treat "len(result) == 0" as PASS.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Any, Dict, Optional
|
|
||||||
|
|
||||||
import filter_model_1 as fm1
|
|
||||||
import filter_model_2 as fm2
|
|
||||||
|
|
||||||
|
|
||||||
class BallFilter:
|
|
||||||
"""
|
|
||||||
OR composition of filter_model_1.BallFilter and filter_model_2.BallFilter.
|
|
||||||
|
|
||||||
- If model1 PASSES OR model2 PASSES -> return empty set()
|
|
||||||
- If both FAIL -> return union of reasons (prefixed for debugging)
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
lottoHistoryFileName: Optional[str] = None,
|
|
||||||
# Backward compatible single ruleset knobs (applied to both if specific ones not provided)
|
|
||||||
ruleset_path: Optional[str] = None,
|
|
||||||
ruleset: Optional[Dict[str, Any]] = None,
|
|
||||||
# Optional per-model overrides
|
|
||||||
ruleset_path_1: Optional[str] = None,
|
|
||||||
ruleset_path_2: Optional[str] = None,
|
|
||||||
ruleset_1: Optional[Dict[str, Any]] = None,
|
|
||||||
ruleset_2: Optional[Dict[str, Any]] = None,
|
|
||||||
):
|
|
||||||
rp1 = ruleset_path_1 if ruleset_path_1 is not None else ruleset_path
|
|
||||||
rp2 = ruleset_path_2 if ruleset_path_2 is not None else ruleset_path
|
|
||||||
r1 = ruleset_1 if ruleset_1 is not None else ruleset
|
|
||||||
r2 = ruleset_2 if ruleset_2 is not None else ruleset
|
|
||||||
|
|
||||||
self.m1 = fm1.BallFilter(lottoHistoryFileName, ruleset_path=rp1, ruleset=r1)
|
|
||||||
self.m2 = fm2.BallFilter(lottoHistoryFileName, ruleset_path=rp2, ruleset=r2)
|
|
||||||
|
|
||||||
#
|
|
||||||
# Delegate common helper methods (both models expose the same API)
|
|
||||||
#
|
|
||||||
def getBall(self, no):
|
|
||||||
return self.m1.getBall(no)
|
|
||||||
|
|
||||||
def getLastNo(self, YMD):
|
|
||||||
return self.m1.getLastNo(YMD)
|
|
||||||
|
|
||||||
def getNextNo(self, YMD):
|
|
||||||
return self.m1.getNextNo(YMD)
|
|
||||||
|
|
||||||
def getYMD(self, no):
|
|
||||||
return self.m1.getYMD(no)
|
|
||||||
|
|
||||||
def _prefixed(self, prefix: str, reasons: set) -> set:
|
|
||||||
# keep stable, readable debug strings
|
|
||||||
return {f"{prefix}{r}" for r in reasons}
|
|
||||||
|
|
||||||
def extract_final_candidates(self, ball, no=None, until_end: bool = False, df=None):
|
|
||||||
"""
|
|
||||||
OR-pass semantics:
|
|
||||||
- If either model returns empty set -> PASS (return empty set)
|
|
||||||
- Else -> FAIL (return union of reasons)
|
|
||||||
"""
|
|
||||||
r1 = self.m1.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
|
|
||||||
if len(r1) == 0:
|
|
||||||
return set()
|
|
||||||
r2 = self.m2.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
|
|
||||||
if len(r2) == 0:
|
|
||||||
return set()
|
|
||||||
# both failed
|
|
||||||
return self._prefixed("m1:", set(r1)) | self._prefixed("m2:", set(r2))
|
|
||||||
|
|
||||||
def filter(self, ball, no, until_end: bool = False, df=None, filter_ball=None):
|
|
||||||
"""
|
|
||||||
Keep signature compatible with existing callers.
|
|
||||||
- filter_ball is ignored here (callers typically pre-filter before calling .filter()).
|
|
||||||
"""
|
|
||||||
return self.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
|
|
||||||
|
|
||||||
1081
practice_0.py
1081
practice_0.py
File diff suppressed because it is too large
Load Diff
179
practice_1.py
179
practice_1.py
@@ -1,179 +0,0 @@
|
|||||||
# 웹 호출 라이브러리를 호출합니다.
|
|
||||||
import time
|
|
||||||
import requests
|
|
||||||
from DataCrawler import DataCrawler
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
from TelegramBot import TelegramBot
|
|
||||||
|
|
||||||
from filter_model_1 import BallFilter
|
|
||||||
|
|
||||||
class Practice:
|
|
||||||
|
|
||||||
bot = None
|
|
||||||
preprocessor = None
|
|
||||||
predictor = None
|
|
||||||
|
|
||||||
extract_count = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path):
|
|
||||||
self.bot = TelegramBot()
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
|
||||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
|
||||||
def craw(self, lottoHistoryFile, drwNo=None):
|
|
||||||
|
|
||||||
ball = None
|
|
||||||
if drwNo != None:
|
|
||||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
|
||||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
|
||||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
|
||||||
|
|
||||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
|
||||||
# URL을 호출합니다.
|
|
||||||
res = requests.post(url)
|
|
||||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
|
||||||
result = res.json()
|
|
||||||
|
|
||||||
if result['returnValue'] != 'success':
|
|
||||||
return None
|
|
||||||
|
|
||||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
|
||||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
|
||||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
|
|
||||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
|
||||||
else:
|
|
||||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
|
||||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
|
||||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
|
||||||
|
|
||||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
|
||||||
idx = 1
|
|
||||||
while True:
|
|
||||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
|
||||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
|
||||||
# URL을 호출합니다.
|
|
||||||
res = requests.post(url)
|
|
||||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
|
||||||
result = res.json()
|
|
||||||
if result['returnValue'] != 'success':
|
|
||||||
break
|
|
||||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
|
||||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
|
||||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
|
||||||
idx += 1
|
|
||||||
time.sleep(0.5)
|
|
||||||
# 저장한 파일을 종료합니다.
|
|
||||||
jsonFp.close()
|
|
||||||
textFp.close()
|
|
||||||
|
|
||||||
return ball
|
|
||||||
|
|
||||||
def predict1(self, result_json):
|
|
||||||
result_json.append([6,7,10,11,20,45])
|
|
||||||
return
|
|
||||||
|
|
||||||
def predict2(self, resources_path, ymd, result_json):
|
|
||||||
|
|
||||||
candidates = [i for i in range(1, 46)]
|
|
||||||
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
|
||||||
ballFilter = BallFilter(lottoHistoryFileName)
|
|
||||||
no = ballFilter.getNextNo(ymd)
|
|
||||||
print("회차: {}".format(no))
|
|
||||||
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
|
|
||||||
nCr = list(itertools.combinations(candidates, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
ball = list(ball)
|
|
||||||
|
|
||||||
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if 0 < filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
result_json.append(ball)
|
|
||||||
|
|
||||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
|
||||||
p_no = p_ball[0]
|
|
||||||
p_ball = p_ball[1:7]
|
|
||||||
|
|
||||||
return p_no, p_ball
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
PROJECT_HOME = '.'
|
|
||||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
|
||||||
|
|
||||||
today = datetime.today()
|
|
||||||
if today.weekday() == 5:
|
|
||||||
if today.hour > 20:
|
|
||||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
|
||||||
else:
|
|
||||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
|
||||||
elif today.weekday() == 6:
|
|
||||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
|
||||||
else:
|
|
||||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
|
||||||
|
|
||||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
|
||||||
ymd = this_weekend.strftime('%Y%m%d')
|
|
||||||
|
|
||||||
print("ymd: {}".format(ymd))
|
|
||||||
|
|
||||||
# 로또 예측
|
|
||||||
practice = Practice(resources_path)
|
|
||||||
|
|
||||||
# 데이터 수집
|
|
||||||
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
|
|
||||||
lottoHistoryFileName = lottoHistoryFile + '.json'
|
|
||||||
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
|
|
||||||
for line in f:
|
|
||||||
if line != '\n':
|
|
||||||
last_json = json.loads(line)
|
|
||||||
|
|
||||||
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
|
|
||||||
|
|
||||||
result_json = {ymd: []}
|
|
||||||
|
|
||||||
# 매주 고정
|
|
||||||
practice.predict1(result_json[ymd])
|
|
||||||
# 필터 기반 예측
|
|
||||||
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
|
|
||||||
|
|
||||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
|
||||||
for i, ball in enumerate(result_json[ymd]):
|
|
||||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
|
||||||
if (i+1) % 100 == 0:
|
|
||||||
practice.bot.sendMsg("{}".format(p_str))
|
|
||||||
p_str = ''
|
|
||||||
|
|
||||||
if len(result_json[ymd]) % 100 != 0:
|
|
||||||
practice.bot.sendMsg("{}".format(p_str))
|
|
||||||
|
|
||||||
size = len(result_json[ymd])
|
|
||||||
print("size: {}".format(size))
|
|
||||||
|
|
||||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
|
||||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
|
||||||
|
|
||||||
print("done...")
|
|
||||||
179
practice_2.py
179
practice_2.py
@@ -1,179 +0,0 @@
|
|||||||
# 웹 호출 라이브러리를 호출합니다.
|
|
||||||
import time
|
|
||||||
import requests
|
|
||||||
from DataCrawler import DataCrawler
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
from TelegramBot import TelegramBot
|
|
||||||
|
|
||||||
from filter_model_2 import BallFilter
|
|
||||||
|
|
||||||
class Practice:
|
|
||||||
|
|
||||||
bot = None
|
|
||||||
preprocessor = None
|
|
||||||
predictor = None
|
|
||||||
|
|
||||||
extract_count = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path):
|
|
||||||
self.bot = TelegramBot()
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
|
||||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
|
||||||
def craw(self, lottoHistoryFile, drwNo=None):
|
|
||||||
|
|
||||||
ball = None
|
|
||||||
if drwNo != None:
|
|
||||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
|
||||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
|
||||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
|
||||||
|
|
||||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
|
||||||
# URL을 호출합니다.
|
|
||||||
res = requests.post(url)
|
|
||||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
|
||||||
result = res.json()
|
|
||||||
|
|
||||||
if result['returnValue'] != 'success':
|
|
||||||
return None
|
|
||||||
|
|
||||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
|
||||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
|
||||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
|
|
||||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
|
||||||
else:
|
|
||||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
|
||||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
|
||||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
|
||||||
|
|
||||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
|
||||||
idx = 1
|
|
||||||
while True:
|
|
||||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
|
||||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
|
||||||
# URL을 호출합니다.
|
|
||||||
res = requests.post(url)
|
|
||||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
|
||||||
result = res.json()
|
|
||||||
if result['returnValue'] != 'success':
|
|
||||||
break
|
|
||||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
|
||||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
|
||||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
|
||||||
idx += 1
|
|
||||||
time.sleep(0.5)
|
|
||||||
# 저장한 파일을 종료합니다.
|
|
||||||
jsonFp.close()
|
|
||||||
textFp.close()
|
|
||||||
|
|
||||||
return ball
|
|
||||||
|
|
||||||
def predict1(self, result_json):
|
|
||||||
result_json.append([6,7,10,11,20,45])
|
|
||||||
return
|
|
||||||
|
|
||||||
def predict2(self, resources_path, ymd, result_json):
|
|
||||||
|
|
||||||
candidates = [i for i in range(1, 46)]
|
|
||||||
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
|
||||||
ballFilter = BallFilter(lottoHistoryFileName)
|
|
||||||
no = ballFilter.getNextNo(ymd)
|
|
||||||
print("회차: {}".format(no))
|
|
||||||
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
|
|
||||||
nCr = list(itertools.combinations(candidates, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
ball = list(ball)
|
|
||||||
|
|
||||||
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if 0 < filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
result_json.append(ball)
|
|
||||||
|
|
||||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
|
||||||
p_no = p_ball[0]
|
|
||||||
p_ball = p_ball[1:7]
|
|
||||||
|
|
||||||
return p_no, p_ball
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
PROJECT_HOME = '.'
|
|
||||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
|
||||||
|
|
||||||
today = datetime.today()
|
|
||||||
if today.weekday() == 5:
|
|
||||||
if today.hour > 20:
|
|
||||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
|
||||||
else:
|
|
||||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
|
||||||
elif today.weekday() == 6:
|
|
||||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
|
||||||
else:
|
|
||||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
|
||||||
|
|
||||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
|
||||||
ymd = this_weekend.strftime('%Y%m%d')
|
|
||||||
|
|
||||||
print("ymd: {}".format(ymd))
|
|
||||||
|
|
||||||
# 로또 예측
|
|
||||||
practice = Practice(resources_path)
|
|
||||||
|
|
||||||
# 데이터 수집
|
|
||||||
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
|
|
||||||
lottoHistoryFileName = lottoHistoryFile + '.json'
|
|
||||||
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
|
|
||||||
for line in f:
|
|
||||||
if line != '\n':
|
|
||||||
last_json = json.loads(line)
|
|
||||||
|
|
||||||
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
|
|
||||||
|
|
||||||
result_json = {ymd: []}
|
|
||||||
|
|
||||||
# 매주 고정
|
|
||||||
practice.predict1(result_json[ymd])
|
|
||||||
# 필터 기반 예측
|
|
||||||
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
|
|
||||||
|
|
||||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
|
||||||
for i, ball in enumerate(result_json[ymd]):
|
|
||||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
|
||||||
if (i+1) % 100 == 0:
|
|
||||||
practice.bot.sendMsg("{}".format(p_str))
|
|
||||||
p_str = ''
|
|
||||||
|
|
||||||
if len(result_json[ymd]) % 100 != 0:
|
|
||||||
practice.bot.sendMsg("{}".format(p_str))
|
|
||||||
|
|
||||||
size = len(result_json[ymd])
|
|
||||||
print("size: {}".format(size))
|
|
||||||
|
|
||||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
|
||||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
|
||||||
|
|
||||||
print("done...")
|
|
||||||
546
practice_3.py
546
practice_3.py
@@ -1,546 +0,0 @@
|
|||||||
# 웹 호출 라이브러리를 호출합니다.
|
|
||||||
import time
|
|
||||||
import requests
|
|
||||||
from DataCrawler import DataCrawler
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import copy
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
from TelegramBot import TelegramBot
|
|
||||||
|
|
||||||
from filter_model_3 import BallFilter
|
|
||||||
|
|
||||||
class Practice:
|
|
||||||
|
|
||||||
bot = None
|
|
||||||
preprocessor = None
|
|
||||||
predictor = None
|
|
||||||
|
|
||||||
extract_count = None
|
|
||||||
TARGET_MIN_SURVIVORS = 30
|
|
||||||
TARGET_MAX_SURVIVORS = 150
|
|
||||||
PREDICT_TIMEOUT_SECONDS = 180
|
|
||||||
|
|
||||||
def __init__(self, resources_path):
|
|
||||||
self.bot = TelegramBot()
|
|
||||||
self.resources_path = resources_path
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
|
||||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
|
||||||
def craw(self, lottoHistoryFile, drwNo=None):
|
|
||||||
|
|
||||||
ball = None
|
|
||||||
if drwNo != None:
|
|
||||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
|
||||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
|
||||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
|
||||||
|
|
||||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
|
||||||
# URL을 호출합니다.
|
|
||||||
res = requests.post(url)
|
|
||||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
|
||||||
result = res.json()
|
|
||||||
|
|
||||||
if result['returnValue'] != 'success':
|
|
||||||
return None
|
|
||||||
|
|
||||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
|
||||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
|
||||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
|
|
||||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
|
||||||
else:
|
|
||||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
|
||||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
|
||||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
|
||||||
|
|
||||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
|
||||||
idx = 1
|
|
||||||
while True:
|
|
||||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
|
||||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
|
||||||
# URL을 호출합니다.
|
|
||||||
res = requests.post(url)
|
|
||||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
|
||||||
result = res.json()
|
|
||||||
if result['returnValue'] != 'success':
|
|
||||||
break
|
|
||||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
|
||||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
|
||||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
|
||||||
idx += 1
|
|
||||||
time.sleep(0.5)
|
|
||||||
# 저장한 파일을 종료합니다.
|
|
||||||
jsonFp.close()
|
|
||||||
textFp.close()
|
|
||||||
|
|
||||||
return ball
|
|
||||||
|
|
||||||
def predict1(self, result_json):
|
|
||||||
result_json.append([6, 7, 10, 11, 20, 45])
|
|
||||||
result_json.append([2, 7, 17, 28, 35, 39])
|
|
||||||
result_json.append([6, 10, 19, 25, 33, 35])
|
|
||||||
result_json.append([3, 17, 20, 24, 35, 45])
|
|
||||||
result_json.append([5, 15, 18, 29, 36, 41])
|
|
||||||
result_json.append([6, 15, 20, 23, 37, 43])
|
|
||||||
result_json.append([8, 15, 19, 23, 38, 41])
|
|
||||||
result_json.append([5, 11, 19, 24, 40, 45])
|
|
||||||
result_json.append([9, 16, 18, 23, 35, 43])
|
|
||||||
result_json.append([7, 13, 19, 28, 33, 44])
|
|
||||||
result_json.append([7, 11, 18, 29, 37, 42])
|
|
||||||
print("회차(predict1)")
|
|
||||||
return
|
|
||||||
|
|
||||||
def predict2(self, resources_path, ymd, result_json):
|
|
||||||
|
|
||||||
candidates = [i for i in range(1, 46)]
|
|
||||||
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
|
||||||
ballFilter = BallFilter(lottoHistoryFileName)
|
|
||||||
no = ballFilter.getNextNo(ymd)
|
|
||||||
print("회차(predict2): {}".format(no))
|
|
||||||
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
|
|
||||||
nCr = list(itertools.combinations(candidates, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
ball = list(ball)
|
|
||||||
|
|
||||||
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if 0 < filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
result_json.append(ball)
|
|
||||||
|
|
||||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
|
||||||
p_no = p_ball[0]
|
|
||||||
p_ball = p_ball[1:7]
|
|
||||||
|
|
||||||
return p_no, p_ball
|
|
||||||
|
|
||||||
def predict3(self, resources_path, ymd, result_json):
|
|
||||||
candidates = [i for i in range(1, 46)]
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
|
||||||
no = BallFilter(lottoHistoryFileName).getNextNo(ymd)
|
|
||||||
print("회차(predict3): {}".format(no))
|
|
||||||
predict_start_ts = time.time()
|
|
||||||
deadline_ts = predict_start_ts + self.PREDICT_TIMEOUT_SECONDS
|
|
||||||
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
|
||||||
p_no = p_ball[0]
|
|
||||||
p_ball = sorted(p_ball[1:7])
|
|
||||||
|
|
||||||
base_ruleset = self._get_base_ruleset()
|
|
||||||
tighten_rulesets = [
|
|
||||||
self._build_ruleset(
|
|
||||||
base_ruleset=base_ruleset,
|
|
||||||
enabled_overrides={
|
|
||||||
"paper_patterns": True,
|
|
||||||
"ban_triples_legacy": True,
|
|
||||||
"all_in_previous7": True,
|
|
||||||
"previous_neighbors": True,
|
|
||||||
},
|
|
||||||
allowed_overrides={
|
|
||||||
"ac_value": [8, 9],
|
|
||||||
"uniq_last_digit_count": [4, 5],
|
|
||||||
"even_count": [2, 3, 4],
|
|
||||||
},
|
|
||||||
),
|
|
||||||
self._build_ruleset(
|
|
||||||
base_ruleset=base_ruleset,
|
|
||||||
enabled_overrides={
|
|
||||||
"paper_patterns": True,
|
|
||||||
"ban_triples_legacy": True,
|
|
||||||
"all_in_previous7": True,
|
|
||||||
"previous_neighbors": True,
|
|
||||||
},
|
|
||||||
allowed_overrides={
|
|
||||||
"ac_value": [8, 9],
|
|
||||||
"uniq_last_digit_count": [4, 5],
|
|
||||||
"even_count": [2, 3, 4],
|
|
||||||
"sum": [112, 114, 121, 123, 126, 127, 131, 132, 138, 146, 148],
|
|
||||||
"sum_prev_diff": [13, 14, 17, 18, 26, 28, 29, 30, 32, 39, 40],
|
|
||||||
},
|
|
||||||
),
|
|
||||||
]
|
|
||||||
relax_rulesets = [
|
|
||||||
self._build_ruleset(
|
|
||||||
base_ruleset=base_ruleset,
|
|
||||||
enabled_overrides={
|
|
||||||
"paper_patterns": False,
|
|
||||||
"ban_triples_legacy": False,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
self._build_ruleset(
|
|
||||||
base_ruleset=base_ruleset,
|
|
||||||
enabled_overrides={
|
|
||||||
"paper_patterns": False,
|
|
||||||
"ban_triples_legacy": False,
|
|
||||||
"previous_neighbors": False,
|
|
||||||
"all_in_previous7": False,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
self._build_ruleset(
|
|
||||||
base_ruleset=base_ruleset,
|
|
||||||
enabled_overrides={
|
|
||||||
"paper_patterns": False,
|
|
||||||
"ban_triples_legacy": False,
|
|
||||||
"previous_neighbors": False,
|
|
||||||
"all_in_previous7": False,
|
|
||||||
"weeks_8_count": False,
|
|
||||||
"weeks_12_count": False,
|
|
||||||
"weeks_16_count": False,
|
|
||||||
"weeks_20_count": False,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
min_survivors = self.TARGET_MIN_SURVIVORS
|
|
||||||
max_survivors = self.TARGET_MAX_SURVIVORS
|
|
||||||
chosen = []
|
|
||||||
stage_name = "base"
|
|
||||||
|
|
||||||
current_info = self._collect_candidates(
|
|
||||||
candidates=candidates,
|
|
||||||
no=no,
|
|
||||||
df_ball=df_ball,
|
|
||||||
ruleset=base_ruleset,
|
|
||||||
stop_when_gt=max_survivors,
|
|
||||||
stage_name="base",
|
|
||||||
predict_start_ts=predict_start_ts,
|
|
||||||
deadline_ts=deadline_ts,
|
|
||||||
)
|
|
||||||
current = current_info["candidates"]
|
|
||||||
if current_info["timed_out"]:
|
|
||||||
chosen = self._finalize_on_timeout(current, p_ball, min_survivors, max_survivors)
|
|
||||||
stage_name = "base_timeout_fallback"
|
|
||||||
print("predict3 stage: {}, survivors: {}".format(stage_name, len(chosen)))
|
|
||||||
for ball in chosen:
|
|
||||||
result_json.append(ball)
|
|
||||||
return p_no, p_ball
|
|
||||||
|
|
||||||
if min_survivors <= len(current) <= max_survivors:
|
|
||||||
chosen = current
|
|
||||||
elif len(current) > max_survivors:
|
|
||||||
chosen = current
|
|
||||||
stage_name = "base_overflow"
|
|
||||||
for idx, rs in enumerate(tighten_rulesets, start=1):
|
|
||||||
t_info = self._collect_candidates(
|
|
||||||
candidates=candidates,
|
|
||||||
no=no,
|
|
||||||
df_ball=df_ball,
|
|
||||||
ruleset=rs,
|
|
||||||
stop_when_gt=max_survivors,
|
|
||||||
stage_name="tighten_{}".format(idx),
|
|
||||||
predict_start_ts=predict_start_ts,
|
|
||||||
deadline_ts=deadline_ts,
|
|
||||||
)
|
|
||||||
t = t_info["candidates"]
|
|
||||||
if t_info["timed_out"]:
|
|
||||||
chosen = self._finalize_on_timeout(t, p_ball, min_survivors, max_survivors)
|
|
||||||
stage_name = "tighten_{}_timeout_fallback".format(idx)
|
|
||||||
break
|
|
||||||
if min_survivors <= len(t) <= max_survivors:
|
|
||||||
chosen = t
|
|
||||||
stage_name = "tighten_{}".format(idx)
|
|
||||||
break
|
|
||||||
if len(t) <= max_survivors:
|
|
||||||
chosen = t
|
|
||||||
stage_name = "tighten_{}".format(idx)
|
|
||||||
|
|
||||||
if len(chosen) > max_survivors:
|
|
||||||
full_info = self._collect_candidates(
|
|
||||||
candidates=candidates,
|
|
||||||
no=no,
|
|
||||||
df_ball=df_ball,
|
|
||||||
ruleset=tighten_rulesets[-1],
|
|
||||||
stop_when_gt=None,
|
|
||||||
stage_name="tighten_full_rank",
|
|
||||||
predict_start_ts=predict_start_ts,
|
|
||||||
deadline_ts=deadline_ts,
|
|
||||||
)
|
|
||||||
full_for_ranking = full_info["candidates"]
|
|
||||||
if full_info["timed_out"]:
|
|
||||||
chosen = self._finalize_on_timeout(full_for_ranking, p_ball, min_survivors, max_survivors)
|
|
||||||
stage_name = "tighten_rank_timeout_fallback"
|
|
||||||
else:
|
|
||||||
chosen = self._rank_and_trim(full_for_ranking, p_ball, max_survivors)
|
|
||||||
stage_name = "tighten_rank_trim"
|
|
||||||
else:
|
|
||||||
chosen = current
|
|
||||||
stage_name = "base_underflow"
|
|
||||||
for idx, rs in enumerate(relax_rulesets, start=1):
|
|
||||||
r_info = self._collect_candidates(
|
|
||||||
candidates=candidates,
|
|
||||||
no=no,
|
|
||||||
df_ball=df_ball,
|
|
||||||
ruleset=rs,
|
|
||||||
stop_when_gt=None,
|
|
||||||
stop_when_gte=min_survivors,
|
|
||||||
stage_name="relax_{}".format(idx),
|
|
||||||
predict_start_ts=predict_start_ts,
|
|
||||||
deadline_ts=deadline_ts,
|
|
||||||
)
|
|
||||||
r = r_info["candidates"]
|
|
||||||
chosen = r
|
|
||||||
stage_name = "relax_{}".format(idx)
|
|
||||||
if r_info["timed_out"]:
|
|
||||||
chosen = self._finalize_on_timeout(r, p_ball, min_survivors, max_survivors)
|
|
||||||
stage_name = "relax_{}_timeout_fallback".format(idx)
|
|
||||||
break
|
|
||||||
if len(r) >= min_survivors:
|
|
||||||
break
|
|
||||||
|
|
||||||
if len(chosen) == 0:
|
|
||||||
stage_name = "relax_zero_fallback"
|
|
||||||
chosen = self._fallback_candidates_from_prev(p_ball, min_survivors)
|
|
||||||
elif len(chosen) < min_survivors:
|
|
||||||
stage_name = "{}_fill".format(stage_name)
|
|
||||||
fill = self._fallback_candidates_from_prev(
|
|
||||||
p_ball,
|
|
||||||
min_survivors - len(chosen),
|
|
||||||
exclude=set(tuple(x) for x in chosen),
|
|
||||||
)
|
|
||||||
chosen.extend(fill)
|
|
||||||
|
|
||||||
print("predict3 stage: {}, survivors: {}".format(stage_name, len(chosen)))
|
|
||||||
for ball in chosen:
|
|
||||||
result_json.append(ball)
|
|
||||||
return p_no, p_ball
|
|
||||||
|
|
||||||
def _get_base_ruleset(self):
|
|
||||||
history_json = os.path.join(self.resources_path, "lotto_history.json")
|
|
||||||
base_filter = BallFilter(history_json)
|
|
||||||
return copy.deepcopy(base_filter.m1.ruleset)
|
|
||||||
|
|
||||||
def _build_ruleset(self, base_ruleset, enabled_overrides=None, allowed_overrides=None):
|
|
||||||
ruleset = copy.deepcopy(base_ruleset)
|
|
||||||
ruleset.setdefault("filters", {})
|
|
||||||
enabled_overrides = enabled_overrides or {}
|
|
||||||
allowed_overrides = allowed_overrides or {}
|
|
||||||
for key, value in enabled_overrides.items():
|
|
||||||
ruleset["filters"].setdefault(key, {})
|
|
||||||
ruleset["filters"][key]["enabled"] = bool(value)
|
|
||||||
for key, values in allowed_overrides.items():
|
|
||||||
ruleset["filters"].setdefault(key, {})
|
|
||||||
ruleset["filters"][key]["enabled"] = True
|
|
||||||
ruleset["filters"][key]["allowed"] = list(values)
|
|
||||||
return ruleset
|
|
||||||
|
|
||||||
def _collect_candidates(
|
|
||||||
self,
|
|
||||||
candidates,
|
|
||||||
no,
|
|
||||||
df_ball,
|
|
||||||
ruleset,
|
|
||||||
stop_when_gt=None,
|
|
||||||
stop_when_gte=None,
|
|
||||||
stage_name="base",
|
|
||||||
predict_start_ts=None,
|
|
||||||
deadline_ts=None,
|
|
||||||
):
|
|
||||||
lottoHistoryFileName = os.path.join(self.resources_path, "lotto_history.json")
|
|
||||||
ballFilter = BallFilter(lottoHistoryFileName, ruleset=ruleset)
|
|
||||||
result = []
|
|
||||||
last_idx = 0
|
|
||||||
for idx, ball in enumerate(itertools.combinations(candidates, 6), start=1):
|
|
||||||
last_idx = idx
|
|
||||||
if deadline_ts is not None and deadline_ts <= time.time():
|
|
||||||
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
|
|
||||||
print(" - [{}] timeout after {:,} processed (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
|
|
||||||
return {"candidates": result, "timed_out": True, "processed": idx}
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
|
|
||||||
print(" - [{}] {:,} processed... (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
|
|
||||||
b = list(ball)
|
|
||||||
if len(ballFilter.filter(ball=b, no=no, until_end=False, df=df_ball)) == 0:
|
|
||||||
result.append(b)
|
|
||||||
if stop_when_gt is not None and len(result) > stop_when_gt:
|
|
||||||
return {"candidates": result, "timed_out": False, "processed": idx}
|
|
||||||
if stop_when_gte is not None and len(result) >= stop_when_gte:
|
|
||||||
return {"candidates": result, "timed_out": False, "processed": idx}
|
|
||||||
return {"candidates": result, "timed_out": False, "processed": last_idx}
|
|
||||||
|
|
||||||
def _finalize_on_timeout(self, partial_candidates, prev_ball, min_survivors, max_survivors):
|
|
||||||
chosen = list(partial_candidates)
|
|
||||||
if len(chosen) > max_survivors:
|
|
||||||
chosen = self._rank_and_trim(chosen, prev_ball, max_survivors)
|
|
||||||
elif len(chosen) < min_survivors:
|
|
||||||
fill = self._fallback_candidates_from_prev(
|
|
||||||
prev_ball,
|
|
||||||
min_survivors - len(chosen),
|
|
||||||
exclude=set(tuple(x) for x in chosen),
|
|
||||||
)
|
|
||||||
chosen.extend(fill)
|
|
||||||
return chosen
|
|
||||||
|
|
||||||
def _rank_and_trim(self, candidates, prev_ball, limit):
|
|
||||||
scored = [(self._score_candidate(ball, prev_ball), ball) for ball in candidates]
|
|
||||||
scored.sort(key=lambda x: x[0])
|
|
||||||
return [ball for _, ball in scored[:limit]]
|
|
||||||
|
|
||||||
def _score_candidate(self, ball, prev_ball):
|
|
||||||
sum_diff = abs(sum(ball) - sum(prev_ball))
|
|
||||||
even_cnt = len([x for x in ball if x % 2 == 0])
|
|
||||||
uniq_last = len(set([x % 10 for x in ball]))
|
|
||||||
contiguous_penalty = 0
|
|
||||||
s = sorted(ball)
|
|
||||||
for i in range(1, len(s)):
|
|
||||||
if s[i] - s[i - 1] == 1:
|
|
||||||
contiguous_penalty += 1
|
|
||||||
score = 0
|
|
||||||
score += sum_diff
|
|
||||||
score += abs(even_cnt - 3) * 2
|
|
||||||
score += abs(uniq_last - 5) * 2
|
|
||||||
score += contiguous_penalty
|
|
||||||
return score
|
|
||||||
|
|
||||||
def _fallback_candidates_from_prev(self, prev_ball, need_count, exclude=None):
|
|
||||||
exclude = exclude or set()
|
|
||||||
seed = sorted(prev_ball)
|
|
||||||
out = []
|
|
||||||
delta_patterns = [
|
|
||||||
(0, 0, 0, 0, 0, 0),
|
|
||||||
(-1, 0, 0, 0, 0, 1),
|
|
||||||
(0, -1, 0, 0, 1, 0),
|
|
||||||
(0, 0, -1, 1, 0, 0),
|
|
||||||
(-2, 0, 0, 0, 0, 2),
|
|
||||||
(0, -2, 0, 0, 2, 0),
|
|
||||||
(0, 0, -2, 2, 0, 0),
|
|
||||||
(-1, -1, 0, 0, 1, 1),
|
|
||||||
(1, 0, -1, 0, 0, 0),
|
|
||||||
(0, 1, 0, -1, 0, 0),
|
|
||||||
(1, -1, 1, -1, 1, -1),
|
|
||||||
(-1, 1, -1, 1, -1, 1),
|
|
||||||
]
|
|
||||||
shift = 0
|
|
||||||
while len(out) < need_count and shift <= 8:
|
|
||||||
for delta in delta_patterns:
|
|
||||||
cand = [seed[i] + delta[i] for i in range(6)]
|
|
||||||
cand = [min(45, max(1, v + shift)) for v in cand]
|
|
||||||
cand = sorted(cand)
|
|
||||||
if len(set(cand)) != 6:
|
|
||||||
continue
|
|
||||||
t = tuple(cand)
|
|
||||||
if t in exclude:
|
|
||||||
continue
|
|
||||||
exclude.add(t)
|
|
||||||
out.append(cand)
|
|
||||||
if len(out) >= need_count:
|
|
||||||
break
|
|
||||||
shift += 1
|
|
||||||
return out
|
|
||||||
|
|
||||||
def _merge_unique_balls(self, base_balls, extra_balls):
|
|
||||||
seen = set(tuple(sorted(x)) for x in base_balls)
|
|
||||||
for ball in extra_balls:
|
|
||||||
key = tuple(sorted(ball))
|
|
||||||
if key not in seen:
|
|
||||||
base_balls.append(list(ball))
|
|
||||||
seen.add(key)
|
|
||||||
return base_balls
|
|
||||||
|
|
||||||
def _sorted_unique_balls(self, balls):
|
|
||||||
"""
|
|
||||||
Normalize (sort within ball), de-duplicate, then sort lexicographically.
|
|
||||||
Returns List[List[int]].
|
|
||||||
"""
|
|
||||||
uniq = {}
|
|
||||||
for b in balls:
|
|
||||||
key = tuple(sorted(b))
|
|
||||||
uniq[key] = list(key)
|
|
||||||
return [list(t) for t in sorted(uniq.keys())]
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
PROJECT_HOME = '.'
|
|
||||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
|
||||||
|
|
||||||
today = datetime.today()
|
|
||||||
if today.weekday() == 5:
|
|
||||||
if today.hour > 20:
|
|
||||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
|
||||||
else:
|
|
||||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
|
||||||
elif today.weekday() == 6:
|
|
||||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
|
||||||
else:
|
|
||||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
|
||||||
|
|
||||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
|
||||||
ymd = this_weekend.strftime('%Y%m%d')
|
|
||||||
|
|
||||||
print("ymd: {}".format(ymd))
|
|
||||||
|
|
||||||
# 로또 예측
|
|
||||||
practice = Practice(resources_path)
|
|
||||||
|
|
||||||
# 데이터 수집
|
|
||||||
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
|
|
||||||
lottoHistoryFileName = lottoHistoryFile + '.json'
|
|
||||||
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
|
|
||||||
for line in f:
|
|
||||||
if line != '\n':
|
|
||||||
last_json = json.loads(line)
|
|
||||||
|
|
||||||
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
|
|
||||||
|
|
||||||
result_json = {ymd: []}
|
|
||||||
|
|
||||||
# 매주 고정
|
|
||||||
practice.predict1(result_json[ymd])
|
|
||||||
# 필터 기반 예측(기존/신규): 결과는 합친 후 정렬해서 predict1 결과 뒤에 붙인다.
|
|
||||||
predict2_json = []
|
|
||||||
p_no, p_ball = practice.predict2(resources_path, ymd, predict2_json)
|
|
||||||
|
|
||||||
predict3_json = []
|
|
||||||
p_no3, p_ball3 = practice.predict3(resources_path, ymd, predict3_json)
|
|
||||||
|
|
||||||
merged_predict = []
|
|
||||||
practice._merge_unique_balls(merged_predict, predict2_json)
|
|
||||||
practice._merge_unique_balls(merged_predict, predict3_json)
|
|
||||||
merged_predict = practice._sorted_unique_balls(merged_predict)
|
|
||||||
|
|
||||||
# predict1 결과에 merged_predict를 정렬된 순서로 append(중복 제거)
|
|
||||||
practice._merge_unique_balls(result_json[ymd], merged_predict)
|
|
||||||
if p_no3 == p_no:
|
|
||||||
p_ball = p_ball3
|
|
||||||
|
|
||||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
|
||||||
for i, ball in enumerate(result_json[ymd]):
|
|
||||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
|
||||||
if (i+1) % 100 == 0:
|
|
||||||
practice.bot.sendMsg("{}".format(p_str))
|
|
||||||
p_str = ''
|
|
||||||
|
|
||||||
if len(result_json[ymd]) % 100 != 0:
|
|
||||||
practice.bot.sendMsg("{}".format(p_str))
|
|
||||||
|
|
||||||
size = len(result_json[ymd])
|
|
||||||
print("size: {}".format(size))
|
|
||||||
|
|
||||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
|
||||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
|
||||||
|
|
||||||
print("done...")
|
|
||||||
@@ -1,216 +0,0 @@
|
|||||||
import os
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from filter_model_3 import BallFilter
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
class FilterTest:
|
|
||||||
|
|
||||||
ballFilter = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path):
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
|
||||||
self.ballFilter = BallFilter(lottoHistoryFileName)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def find_filter_method(self, df_ball, filter_ball=None):
|
|
||||||
win_count = 0
|
|
||||||
|
|
||||||
no_filter_ball = {}
|
|
||||||
|
|
||||||
printLog = True
|
|
||||||
filter_dic = {}
|
|
||||||
filter_dic_len = {}
|
|
||||||
filter_dic_1 = {}
|
|
||||||
filter_dic_2 = {}
|
|
||||||
for i in range(len(df_ball)-1, 19, -1):
|
|
||||||
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
|
||||||
filter_type = list(filter_type)
|
|
||||||
size = len(filter_type)
|
|
||||||
|
|
||||||
if size == 0:
|
|
||||||
win_count += 1
|
|
||||||
no_filter_ball[no] = answer
|
|
||||||
print("\t", no)
|
|
||||||
elif size == 1:
|
|
||||||
key = filter_type[0]
|
|
||||||
if key not in filter_dic_1:
|
|
||||||
filter_dic_1[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_1[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
elif size == 2:
|
|
||||||
key = ','.join(filter_type)
|
|
||||||
if key not in filter_dic_2:
|
|
||||||
filter_dic_2[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_2[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
else:
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
|
|
||||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
|
||||||
if size not in filter_dic_len:
|
|
||||||
filter_dic_len[size] = []
|
|
||||||
filter_dic_len[size].append(filter_type)
|
|
||||||
|
|
||||||
for f_t in filter_type:
|
|
||||||
if f_t not in filter_dic:
|
|
||||||
filter_dic[f_t] = 1
|
|
||||||
else:
|
|
||||||
filter_dic[f_t] += 1
|
|
||||||
|
|
||||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
|
||||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
|
||||||
for filter_count in sorted_filter_dic_len:
|
|
||||||
for filter_type in filter_dic_len[filter_count]:
|
|
||||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
|
||||||
|
|
||||||
print("\n\t[걸러진 유일 필터]")
|
|
||||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_1)):
|
|
||||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
|
||||||
|
|
||||||
print("\n\t[2개 필터에 걸린 경우]")
|
|
||||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_2)):
|
|
||||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
|
||||||
|
|
||||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
|
||||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic)):
|
|
||||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
|
||||||
|
|
||||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
|
||||||
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
|
|
||||||
for no in no_filter_ball:
|
|
||||||
print("\t\t>", no, no_filter_ball[no])
|
|
||||||
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
|
|
||||||
|
|
||||||
return win_count
|
|
||||||
|
|
||||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
|
||||||
final_candidates = []
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
final_candidates.append(ball)
|
|
||||||
|
|
||||||
return final_candidates
|
|
||||||
|
|
||||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
|
||||||
|
|
||||||
win_count = 0
|
|
||||||
for i in range(len(df_ball)-1, 0, -1):
|
|
||||||
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
|
||||||
|
|
||||||
if len(filter_type) == 0:
|
|
||||||
win_count += 1
|
|
||||||
print("\t\t>{}. {}".format(no, answer))
|
|
||||||
|
|
||||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def validate(self, df_ball, nos=None):
|
|
||||||
win_history = {}
|
|
||||||
|
|
||||||
for no in nos:
|
|
||||||
print(no, "processing...")
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
ball = list(ball)
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
|
||||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
|
||||||
win_history[no] = answer
|
|
||||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
|
||||||
break
|
|
||||||
|
|
||||||
return win_history
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
resources_path = 'resources'
|
|
||||||
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
filter_ball=[]
|
|
||||||
filterTest = FilterTest(resources_path)
|
|
||||||
|
|
||||||
print("STEP #1. 필터 방법 추출")
|
|
||||||
start = time.time()
|
|
||||||
win_count = filterTest.find_filter_method(df_ball, filter_ball)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
"""
|
|
||||||
print("\n\n")
|
|
||||||
no = df_ball['no'].values[-1]
|
|
||||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = ball[1:7]
|
|
||||||
|
|
||||||
print("STEP #0. 최종 후보 선정")
|
|
||||||
start = time.time()
|
|
||||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
print(" > size: {}".format(len(final_candidates)))
|
|
||||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
|
||||||
with open(file_name, 'w+') as outFp:
|
|
||||||
for ball in final_candidates:
|
|
||||||
ball_str = [str(b) for b in answer]
|
|
||||||
outFp.write("{}\n".format(','.join(ball_str)))
|
|
||||||
|
|
||||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
|
||||||
"""
|
|
||||||
|
|
||||||
#print("\n\n")
|
|
||||||
#print("STEP #2. 당첨 회수 확인")
|
|
||||||
#filterTest.check_filter_method(df_ball, win_count)
|
|
||||||
|
|
||||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
|
||||||
@@ -1,490 +0,0 @@
|
|||||||
# 웹 호출 라이브러리를 호출합니다.
|
|
||||||
import time
|
|
||||||
import requests
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import copy
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
from TelegramBot import TelegramBot
|
|
||||||
|
|
||||||
from filter_model_3 import BallFilter
|
|
||||||
|
|
||||||
class Practice:
|
|
||||||
|
|
||||||
bot = None
|
|
||||||
preprocessor = None
|
|
||||||
predictor = None
|
|
||||||
|
|
||||||
extract_count = None
|
|
||||||
TARGET_MIN_SURVIVORS = 30
|
|
||||||
TARGET_MAX_SURVIVORS = 150
|
|
||||||
PREDICT_TIMEOUT_SECONDS = 180
|
|
||||||
|
|
||||||
def __init__(self, resources_path):
|
|
||||||
self.bot = TelegramBot()
|
|
||||||
self.resources_path = resources_path
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
|
||||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
|
||||||
def craw(self, lottoHistoryFile, drwNo=None):
|
|
||||||
|
|
||||||
ball = None
|
|
||||||
if drwNo != None:
|
|
||||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
|
||||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
|
||||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
|
||||||
|
|
||||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
|
||||||
# URL을 호출합니다.
|
|
||||||
res = requests.post(url)
|
|
||||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
|
||||||
result = res.json()
|
|
||||||
|
|
||||||
if result['returnValue'] != 'success':
|
|
||||||
return None
|
|
||||||
|
|
||||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
|
||||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
|
||||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
|
|
||||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
|
||||||
else:
|
|
||||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
|
||||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
|
||||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
|
||||||
|
|
||||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
|
||||||
idx = 1
|
|
||||||
while True:
|
|
||||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
|
||||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
|
||||||
# URL을 호출합니다.
|
|
||||||
res = requests.post(url)
|
|
||||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
|
||||||
result = res.json()
|
|
||||||
if result['returnValue'] != 'success':
|
|
||||||
break
|
|
||||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
|
||||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
|
||||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
|
||||||
idx += 1
|
|
||||||
time.sleep(0.5)
|
|
||||||
# 저장한 파일을 종료합니다.
|
|
||||||
jsonFp.close()
|
|
||||||
textFp.close()
|
|
||||||
|
|
||||||
return ball
|
|
||||||
|
|
||||||
def predict1(self, result_json):
|
|
||||||
result_json.append([6, 7, 10, 11, 20, 45])
|
|
||||||
result_json.append([2, 7, 17, 28, 35, 39])
|
|
||||||
result_json.append([6, 10, 19, 25, 33, 35])
|
|
||||||
result_json.append([3, 17, 20, 24, 35, 45])
|
|
||||||
result_json.append([5, 15, 18, 29, 36, 41])
|
|
||||||
result_json.append([6, 15, 20, 23, 37, 43])
|
|
||||||
result_json.append([8, 15, 19, 23, 38, 41])
|
|
||||||
result_json.append([5, 11, 19, 24, 40, 45])
|
|
||||||
result_json.append([9, 16, 18, 23, 35, 43])
|
|
||||||
result_json.append([7, 13, 19, 28, 33, 44])
|
|
||||||
result_json.append([7, 11, 18, 29, 37, 42])
|
|
||||||
return
|
|
||||||
|
|
||||||
def predict2(self, resources_path, ymd, result_json):
|
|
||||||
candidates = [i for i in range(1, 46)]
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
|
||||||
no = BallFilter(lottoHistoryFileName).getNextNo(ymd)
|
|
||||||
print("회차: {}".format(no))
|
|
||||||
predict_start_ts = time.time()
|
|
||||||
deadline_ts = predict_start_ts + self.PREDICT_TIMEOUT_SECONDS
|
|
||||||
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
|
||||||
p_no = p_ball[0]
|
|
||||||
p_ball = sorted(p_ball[1:7])
|
|
||||||
|
|
||||||
# 기본/강화/완화 단계별 ruleset
|
|
||||||
base_ruleset = self._get_base_ruleset()
|
|
||||||
tighten_rulesets = [
|
|
||||||
self._build_ruleset(
|
|
||||||
base_ruleset=base_ruleset,
|
|
||||||
enabled_overrides={
|
|
||||||
"paper_patterns": True,
|
|
||||||
"ban_triples_legacy": True,
|
|
||||||
"all_in_previous7": True,
|
|
||||||
"previous_neighbors": True,
|
|
||||||
},
|
|
||||||
allowed_overrides={
|
|
||||||
"ac_value": [8, 9],
|
|
||||||
"uniq_last_digit_count": [4, 5],
|
|
||||||
"even_count": [2, 3, 4],
|
|
||||||
},
|
|
||||||
),
|
|
||||||
self._build_ruleset(
|
|
||||||
base_ruleset=base_ruleset,
|
|
||||||
enabled_overrides={
|
|
||||||
"paper_patterns": True,
|
|
||||||
"ban_triples_legacy": True,
|
|
||||||
"all_in_previous7": True,
|
|
||||||
"previous_neighbors": True,
|
|
||||||
},
|
|
||||||
allowed_overrides={
|
|
||||||
"ac_value": [8, 9],
|
|
||||||
"uniq_last_digit_count": [4, 5],
|
|
||||||
"even_count": [2, 3, 4],
|
|
||||||
"sum": [112, 114, 121, 123, 126, 127, 131, 132, 138, 146, 148],
|
|
||||||
"sum_prev_diff": [13, 14, 17, 18, 26, 28, 29, 30, 32, 39, 40],
|
|
||||||
},
|
|
||||||
),
|
|
||||||
]
|
|
||||||
relax_rulesets = [
|
|
||||||
self._build_ruleset(
|
|
||||||
base_ruleset=base_ruleset,
|
|
||||||
enabled_overrides={
|
|
||||||
"paper_patterns": False,
|
|
||||||
"ban_triples_legacy": False,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
self._build_ruleset(
|
|
||||||
base_ruleset=base_ruleset,
|
|
||||||
enabled_overrides={
|
|
||||||
"paper_patterns": False,
|
|
||||||
"ban_triples_legacy": False,
|
|
||||||
"previous_neighbors": False,
|
|
||||||
"all_in_previous7": False,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
self._build_ruleset(
|
|
||||||
base_ruleset=base_ruleset,
|
|
||||||
enabled_overrides={
|
|
||||||
"paper_patterns": False,
|
|
||||||
"ban_triples_legacy": False,
|
|
||||||
"previous_neighbors": False,
|
|
||||||
"all_in_previous7": False,
|
|
||||||
"weeks_8_count": False,
|
|
||||||
"weeks_12_count": False,
|
|
||||||
"weeks_16_count": False,
|
|
||||||
"weeks_20_count": False,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
min_survivors = self.TARGET_MIN_SURVIVORS
|
|
||||||
max_survivors = self.TARGET_MAX_SURVIVORS
|
|
||||||
chosen = []
|
|
||||||
stage_name = "base"
|
|
||||||
|
|
||||||
current_info = self._collect_candidates(
|
|
||||||
candidates=candidates,
|
|
||||||
no=no,
|
|
||||||
df_ball=df_ball,
|
|
||||||
ruleset=base_ruleset,
|
|
||||||
stop_when_gt=max_survivors,
|
|
||||||
stage_name="base",
|
|
||||||
predict_start_ts=predict_start_ts,
|
|
||||||
deadline_ts=deadline_ts,
|
|
||||||
)
|
|
||||||
current = current_info["candidates"]
|
|
||||||
if current_info["timed_out"]:
|
|
||||||
chosen = self._finalize_on_timeout(current, p_ball, min_survivors, max_survivors)
|
|
||||||
stage_name = "base_timeout_fallback"
|
|
||||||
print("candidate_stage: {}, survivors: {}".format(stage_name, len(chosen)))
|
|
||||||
for ball in chosen:
|
|
||||||
result_json.append(ball)
|
|
||||||
return p_no, p_ball
|
|
||||||
|
|
||||||
if min_survivors <= len(current) <= max_survivors:
|
|
||||||
chosen = current
|
|
||||||
elif len(current) > max_survivors:
|
|
||||||
chosen = current
|
|
||||||
stage_name = "base_overflow"
|
|
||||||
for idx, rs in enumerate(tighten_rulesets, start=1):
|
|
||||||
t_info = self._collect_candidates(
|
|
||||||
candidates=candidates,
|
|
||||||
no=no,
|
|
||||||
df_ball=df_ball,
|
|
||||||
ruleset=rs,
|
|
||||||
stop_when_gt=max_survivors,
|
|
||||||
stage_name="tighten_{}".format(idx),
|
|
||||||
predict_start_ts=predict_start_ts,
|
|
||||||
deadline_ts=deadline_ts,
|
|
||||||
)
|
|
||||||
t = t_info["candidates"]
|
|
||||||
if t_info["timed_out"]:
|
|
||||||
chosen = self._finalize_on_timeout(t, p_ball, min_survivors, max_survivors)
|
|
||||||
stage_name = "tighten_{}_timeout_fallback".format(idx)
|
|
||||||
break
|
|
||||||
if min_survivors <= len(t) <= max_survivors:
|
|
||||||
chosen = t
|
|
||||||
stage_name = "tighten_{}".format(idx)
|
|
||||||
break
|
|
||||||
if len(t) <= max_survivors:
|
|
||||||
chosen = t
|
|
||||||
stage_name = "tighten_{}".format(idx)
|
|
||||||
if len(chosen) > max_survivors:
|
|
||||||
# 상한 가드 강제 적용: 품질 점수 상위 N개만 사용
|
|
||||||
full_info = self._collect_candidates(
|
|
||||||
candidates=candidates,
|
|
||||||
no=no,
|
|
||||||
df_ball=df_ball,
|
|
||||||
ruleset=tighten_rulesets[-1],
|
|
||||||
stop_when_gt=None,
|
|
||||||
stage_name="tighten_full_rank",
|
|
||||||
predict_start_ts=predict_start_ts,
|
|
||||||
deadline_ts=deadline_ts,
|
|
||||||
)
|
|
||||||
full_for_ranking = full_info["candidates"]
|
|
||||||
if full_info["timed_out"]:
|
|
||||||
chosen = self._finalize_on_timeout(full_for_ranking, p_ball, min_survivors, max_survivors)
|
|
||||||
stage_name = "tighten_rank_timeout_fallback"
|
|
||||||
else:
|
|
||||||
chosen = self._rank_and_trim(full_for_ranking, p_ball, max_survivors)
|
|
||||||
stage_name = "tighten_rank_trim"
|
|
||||||
else:
|
|
||||||
chosen = current
|
|
||||||
stage_name = "base_underflow"
|
|
||||||
for idx, rs in enumerate(relax_rulesets, start=1):
|
|
||||||
# relax는 하한(min_survivors)만 채우면 충분하므로 조기 종료
|
|
||||||
r_info = self._collect_candidates(
|
|
||||||
candidates=candidates,
|
|
||||||
no=no,
|
|
||||||
df_ball=df_ball,
|
|
||||||
ruleset=rs,
|
|
||||||
stop_when_gt=None,
|
|
||||||
stop_when_gte=min_survivors,
|
|
||||||
stage_name="relax_{}".format(idx),
|
|
||||||
predict_start_ts=predict_start_ts,
|
|
||||||
deadline_ts=deadline_ts,
|
|
||||||
)
|
|
||||||
r = r_info["candidates"]
|
|
||||||
chosen = r
|
|
||||||
stage_name = "relax_{}".format(idx)
|
|
||||||
if r_info["timed_out"]:
|
|
||||||
chosen = self._finalize_on_timeout(r, p_ball, min_survivors, max_survivors)
|
|
||||||
stage_name = "relax_{}_timeout_fallback".format(idx)
|
|
||||||
break
|
|
||||||
if len(r) >= min_survivors:
|
|
||||||
break
|
|
||||||
|
|
||||||
if len(chosen) == 0:
|
|
||||||
# 0개 생존 방지: 가장 완화된 규칙에서도 0개면 직전 결과와 유사한 조합으로 최소 개수 확보
|
|
||||||
stage_name = "relax_zero_fallback"
|
|
||||||
chosen = self._fallback_candidates_from_prev(p_ball, min_survivors)
|
|
||||||
elif len(chosen) < min_survivors:
|
|
||||||
# 하한 가드: 부족분은 완화 후보/고정 후보 기반으로 보강
|
|
||||||
stage_name = "{}_fill".format(stage_name)
|
|
||||||
fill = self._fallback_candidates_from_prev(p_ball, min_survivors - len(chosen), exclude=set(tuple(x) for x in chosen))
|
|
||||||
chosen.extend(fill)
|
|
||||||
|
|
||||||
print("candidate_stage: {}, survivors: {}".format(stage_name, len(chosen)))
|
|
||||||
for ball in chosen:
|
|
||||||
result_json.append(ball)
|
|
||||||
return p_no, p_ball
|
|
||||||
|
|
||||||
def _get_base_ruleset(self):
|
|
||||||
history_json = os.path.join(self.resources_path, "lotto_history.json")
|
|
||||||
base_filter = BallFilter(history_json)
|
|
||||||
return copy.deepcopy(base_filter.m1.ruleset)
|
|
||||||
|
|
||||||
def _build_ruleset(self, base_ruleset, enabled_overrides=None, allowed_overrides=None):
|
|
||||||
ruleset = copy.deepcopy(base_ruleset)
|
|
||||||
ruleset.setdefault("filters", {})
|
|
||||||
enabled_overrides = enabled_overrides or {}
|
|
||||||
allowed_overrides = allowed_overrides or {}
|
|
||||||
for key, value in enabled_overrides.items():
|
|
||||||
ruleset["filters"].setdefault(key, {})
|
|
||||||
ruleset["filters"][key]["enabled"] = bool(value)
|
|
||||||
for key, values in allowed_overrides.items():
|
|
||||||
ruleset["filters"].setdefault(key, {})
|
|
||||||
ruleset["filters"][key]["enabled"] = True
|
|
||||||
ruleset["filters"][key]["allowed"] = list(values)
|
|
||||||
return ruleset
|
|
||||||
|
|
||||||
def _collect_candidates(
|
|
||||||
self,
|
|
||||||
candidates,
|
|
||||||
no,
|
|
||||||
df_ball,
|
|
||||||
ruleset,
|
|
||||||
stop_when_gt=None,
|
|
||||||
stop_when_gte=None,
|
|
||||||
stage_name="base",
|
|
||||||
predict_start_ts=None,
|
|
||||||
deadline_ts=None,
|
|
||||||
):
|
|
||||||
lottoHistoryFileName = os.path.join(self.resources_path, "lotto_history.json")
|
|
||||||
ballFilter = BallFilter(lottoHistoryFileName, ruleset=ruleset)
|
|
||||||
result = []
|
|
||||||
last_idx = 0
|
|
||||||
for idx, ball in enumerate(itertools.combinations(candidates, 6), start=1):
|
|
||||||
last_idx = idx
|
|
||||||
if deadline_ts is not None and deadline_ts <= time.time():
|
|
||||||
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
|
|
||||||
print(" - [{}] timeout after {:,} processed (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
|
|
||||||
return {
|
|
||||||
"candidates": result,
|
|
||||||
"timed_out": True,
|
|
||||||
"processed": idx,
|
|
||||||
}
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
|
|
||||||
print(" - [{}] {:,} processed... (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
|
|
||||||
b = list(ball)
|
|
||||||
if len(ballFilter.filter(ball=b, no=no, until_end=False, df=df_ball)) == 0:
|
|
||||||
result.append(b)
|
|
||||||
if stop_when_gt is not None and len(result) > stop_when_gt:
|
|
||||||
return {
|
|
||||||
"candidates": result,
|
|
||||||
"timed_out": False,
|
|
||||||
"processed": idx,
|
|
||||||
}
|
|
||||||
if stop_when_gte is not None and len(result) >= stop_when_gte:
|
|
||||||
return {
|
|
||||||
"candidates": result,
|
|
||||||
"timed_out": False,
|
|
||||||
"processed": idx,
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
"candidates": result,
|
|
||||||
"timed_out": False,
|
|
||||||
"processed": last_idx,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _finalize_on_timeout(self, partial_candidates, prev_ball, min_survivors, max_survivors):
|
|
||||||
chosen = list(partial_candidates)
|
|
||||||
if len(chosen) > max_survivors:
|
|
||||||
chosen = self._rank_and_trim(chosen, prev_ball, max_survivors)
|
|
||||||
elif len(chosen) < min_survivors:
|
|
||||||
fill = self._fallback_candidates_from_prev(
|
|
||||||
prev_ball,
|
|
||||||
min_survivors - len(chosen),
|
|
||||||
exclude=set(tuple(x) for x in chosen),
|
|
||||||
)
|
|
||||||
chosen.extend(fill)
|
|
||||||
return chosen
|
|
||||||
|
|
||||||
def _rank_and_trim(self, candidates, prev_ball, limit):
|
|
||||||
scored = [(self._score_candidate(ball, prev_ball), ball) for ball in candidates]
|
|
||||||
scored.sort(key=lambda x: x[0])
|
|
||||||
return [ball for _, ball in scored[:limit]]
|
|
||||||
|
|
||||||
def _score_candidate(self, ball, prev_ball):
|
|
||||||
sum_diff = abs(sum(ball) - sum(prev_ball))
|
|
||||||
even_cnt = len([x for x in ball if x % 2 == 0])
|
|
||||||
uniq_last = len(set([x % 10 for x in ball]))
|
|
||||||
contiguous_penalty = 0
|
|
||||||
s = sorted(ball)
|
|
||||||
for i in range(1, len(s)):
|
|
||||||
if s[i] - s[i - 1] == 1:
|
|
||||||
contiguous_penalty += 1
|
|
||||||
score = 0
|
|
||||||
score += sum_diff
|
|
||||||
score += abs(even_cnt - 3) * 2
|
|
||||||
score += abs(uniq_last - 5) * 2
|
|
||||||
score += contiguous_penalty
|
|
||||||
return score
|
|
||||||
|
|
||||||
def _fallback_candidates_from_prev(self, prev_ball, need_count, exclude=None):
|
|
||||||
exclude = exclude or set()
|
|
||||||
seed = sorted(prev_ball)
|
|
||||||
out = []
|
|
||||||
delta_patterns = [
|
|
||||||
(0, 0, 0, 0, 0, 0),
|
|
||||||
(-1, 0, 0, 0, 0, 1),
|
|
||||||
(0, -1, 0, 0, 1, 0),
|
|
||||||
(0, 0, -1, 1, 0, 0),
|
|
||||||
(-2, 0, 0, 0, 0, 2),
|
|
||||||
(0, -2, 0, 0, 2, 0),
|
|
||||||
(0, 0, -2, 2, 0, 0),
|
|
||||||
(-1, -1, 0, 0, 1, 1),
|
|
||||||
(1, 0, -1, 0, 0, 0),
|
|
||||||
(0, 1, 0, -1, 0, 0),
|
|
||||||
(1, -1, 1, -1, 1, -1),
|
|
||||||
(-1, 1, -1, 1, -1, 1),
|
|
||||||
]
|
|
||||||
shift = 0
|
|
||||||
while len(out) < need_count and shift <= 8:
|
|
||||||
for delta in delta_patterns:
|
|
||||||
cand = [seed[i] + delta[i] for i in range(6)]
|
|
||||||
cand = [min(45, max(1, v + shift)) for v in cand]
|
|
||||||
cand = sorted(cand)
|
|
||||||
if len(set(cand)) != 6:
|
|
||||||
continue
|
|
||||||
t = tuple(cand)
|
|
||||||
if t in exclude:
|
|
||||||
continue
|
|
||||||
exclude.add(t)
|
|
||||||
out.append(cand)
|
|
||||||
if len(out) >= need_count:
|
|
||||||
break
|
|
||||||
shift += 1
|
|
||||||
return out
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
PROJECT_HOME = '.'
|
|
||||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
|
||||||
|
|
||||||
today = datetime.today()
|
|
||||||
if today.weekday() == 5:
|
|
||||||
if today.hour > 20:
|
|
||||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
|
||||||
else:
|
|
||||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
|
||||||
elif today.weekday() == 6:
|
|
||||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
|
||||||
else:
|
|
||||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
|
||||||
|
|
||||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
|
||||||
ymd = this_weekend.strftime('%Y%m%d')
|
|
||||||
|
|
||||||
print("ymd: {}".format(ymd))
|
|
||||||
|
|
||||||
# 로또 예측
|
|
||||||
practice = Practice(resources_path)
|
|
||||||
|
|
||||||
# 데이터 수집
|
|
||||||
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
|
|
||||||
lottoHistoryFileName = lottoHistoryFile + '.json'
|
|
||||||
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
|
|
||||||
for line in f:
|
|
||||||
if line != '\n':
|
|
||||||
last_json = json.loads(line)
|
|
||||||
|
|
||||||
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
|
|
||||||
|
|
||||||
result_json = {ymd: []}
|
|
||||||
|
|
||||||
# 매주 고정
|
|
||||||
practice.predict1(result_json[ymd])
|
|
||||||
# 필터 기반 예측
|
|
||||||
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
|
|
||||||
|
|
||||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
|
||||||
for i, ball in enumerate(result_json[ymd]):
|
|
||||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
|
||||||
if (i+1) % 100 == 0:
|
|
||||||
practice.bot.sendMsg("{}".format(p_str))
|
|
||||||
p_str = ''
|
|
||||||
|
|
||||||
if len(result_json[ymd]) % 100 != 0:
|
|
||||||
practice.bot.sendMsg("{}".format(p_str))
|
|
||||||
|
|
||||||
size = len(result_json[ymd])
|
|
||||||
print("size: {}".format(size))
|
|
||||||
|
|
||||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
|
||||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
|
||||||
|
|
||||||
print("done...")
|
|
||||||
@@ -1,189 +0,0 @@
|
|||||||
# 웹 호출 라이브러리를 호출합니다.
|
|
||||||
import time
|
|
||||||
import requests
|
|
||||||
from DataCrawler import DataCrawler
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
from TelegramBot import TelegramBot
|
|
||||||
|
|
||||||
from filter_model_3 import BallFilter
|
|
||||||
|
|
||||||
class Practice:
|
|
||||||
|
|
||||||
bot = None
|
|
||||||
preprocessor = None
|
|
||||||
predictor = None
|
|
||||||
|
|
||||||
extract_count = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path):
|
|
||||||
self.bot = TelegramBot()
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
|
||||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
|
||||||
def craw(self, lottoHistoryFile, drwNo=None):
|
|
||||||
|
|
||||||
ball = None
|
|
||||||
if drwNo != None:
|
|
||||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
|
||||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
|
||||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
|
||||||
|
|
||||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
|
||||||
# URL을 호출합니다.
|
|
||||||
res = requests.post(url)
|
|
||||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
|
||||||
result = res.json()
|
|
||||||
|
|
||||||
if result['returnValue'] != 'success':
|
|
||||||
return None
|
|
||||||
|
|
||||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
|
||||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
|
||||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
|
|
||||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
|
||||||
else:
|
|
||||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
|
||||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
|
||||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
|
||||||
|
|
||||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
|
||||||
idx = 1
|
|
||||||
while True:
|
|
||||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
|
||||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
|
||||||
# URL을 호출합니다.
|
|
||||||
res = requests.post(url)
|
|
||||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
|
||||||
result = res.json()
|
|
||||||
if result['returnValue'] != 'success':
|
|
||||||
break
|
|
||||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
|
||||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
|
||||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
|
||||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
|
||||||
idx += 1
|
|
||||||
time.sleep(0.5)
|
|
||||||
# 저장한 파일을 종료합니다.
|
|
||||||
jsonFp.close()
|
|
||||||
textFp.close()
|
|
||||||
|
|
||||||
return ball
|
|
||||||
|
|
||||||
def predict1(self, result_json):
|
|
||||||
result_json.append([6, 7, 10, 11, 20, 45])
|
|
||||||
result_json.append([2, 7, 17, 28, 35, 39])
|
|
||||||
result_json.append([6, 10, 19, 25, 33, 35])
|
|
||||||
result_json.append([3, 17, 20, 24, 35, 45])
|
|
||||||
result_json.append([5, 15, 18, 29, 36, 41])
|
|
||||||
result_json.append([6, 15, 20, 23, 37, 43])
|
|
||||||
result_json.append([8, 15, 19, 23, 38, 41])
|
|
||||||
result_json.append([5, 11, 19, 24, 40, 45])
|
|
||||||
result_json.append([9, 16, 18, 23, 35, 43])
|
|
||||||
result_json.append([7, 13, 19, 28, 33, 44])
|
|
||||||
result_json.append([7, 11, 18, 29, 37, 42])
|
|
||||||
return
|
|
||||||
|
|
||||||
def predict2(self, resources_path, ymd, result_json):
|
|
||||||
|
|
||||||
candidates = [i for i in range(1, 46)]
|
|
||||||
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
|
||||||
ballFilter = BallFilter(lottoHistoryFileName)
|
|
||||||
no = ballFilter.getNextNo(ymd)
|
|
||||||
print("회차: {}".format(no))
|
|
||||||
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
|
|
||||||
nCr = list(itertools.combinations(candidates, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
ball = list(ball)
|
|
||||||
|
|
||||||
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if 0 < filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
result_json.append(ball)
|
|
||||||
|
|
||||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
|
||||||
p_no = p_ball[0]
|
|
||||||
p_ball = p_ball[1:7]
|
|
||||||
|
|
||||||
return p_no, p_ball
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
PROJECT_HOME = '.'
|
|
||||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
|
||||||
|
|
||||||
today = datetime.today()
|
|
||||||
if today.weekday() == 5:
|
|
||||||
if today.hour > 20:
|
|
||||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
|
||||||
else:
|
|
||||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
|
||||||
elif today.weekday() == 6:
|
|
||||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
|
||||||
else:
|
|
||||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
|
||||||
|
|
||||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
|
||||||
ymd = this_weekend.strftime('%Y%m%d')
|
|
||||||
|
|
||||||
print("ymd: {}".format(ymd))
|
|
||||||
|
|
||||||
# 로또 예측
|
|
||||||
practice = Practice(resources_path)
|
|
||||||
|
|
||||||
# 데이터 수집
|
|
||||||
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
|
|
||||||
lottoHistoryFileName = lottoHistoryFile + '.json'
|
|
||||||
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
|
|
||||||
for line in f:
|
|
||||||
if line != '\n':
|
|
||||||
last_json = json.loads(line)
|
|
||||||
|
|
||||||
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
|
|
||||||
|
|
||||||
result_json = {ymd: []}
|
|
||||||
|
|
||||||
# 매주 고정
|
|
||||||
practice.predict1(result_json[ymd])
|
|
||||||
# 필터 기반 예측
|
|
||||||
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
|
|
||||||
|
|
||||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
|
||||||
for i, ball in enumerate(result_json[ymd]):
|
|
||||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
|
||||||
if (i+1) % 100 == 0:
|
|
||||||
practice.bot.sendMsg("{}".format(p_str))
|
|
||||||
p_str = ''
|
|
||||||
|
|
||||||
if len(result_json[ymd]) % 100 != 0:
|
|
||||||
practice.bot.sendMsg("{}".format(p_str))
|
|
||||||
|
|
||||||
size = len(result_json[ymd])
|
|
||||||
print("size: {}".format(size))
|
|
||||||
|
|
||||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
|
||||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
|
||||||
|
|
||||||
print("done...")
|
|
||||||
@@ -1216,3 +1216,4 @@
|
|||||||
{"returnValue": "success", "drwNoDate": "2026-03-21", "drwNo": 1216, "drwtNo1": 3, "drwtNo2": 10, "drwtNo3": 14, "drwtNo4": 15, "drwtNo5": 23, "drwtNo6": 24, "bnusNo": 25}
|
{"returnValue": "success", "drwNoDate": "2026-03-21", "drwNo": 1216, "drwtNo1": 3, "drwtNo2": 10, "drwtNo3": 14, "drwtNo4": 15, "drwtNo5": 23, "drwtNo6": 24, "bnusNo": 25}
|
||||||
{"returnValue": "success", "drwNoDate": "2026-03-28", "drwNo": 1217, "drwtNo1": 8, "drwtNo2": 10, "drwtNo3": 15, "drwtNo4": 20, "drwtNo5": 29, "drwtNo6": 31, "bnusNo": 41}
|
{"returnValue": "success", "drwNoDate": "2026-03-28", "drwNo": 1217, "drwtNo1": 8, "drwtNo2": 10, "drwtNo3": 15, "drwtNo4": 20, "drwtNo5": 29, "drwtNo6": 31, "bnusNo": 41}
|
||||||
{"returnValue": "success", "drwNoDate": "2026-04-04", "drwNo": 1218, "drwtNo1": 3, "drwtNo2": 28, "drwtNo3": 31, "drwtNo4": 32, "drwtNo5": 42, "drwtNo6": 45, "bnusNo": 25}
|
{"returnValue": "success", "drwNoDate": "2026-04-04", "drwNo": 1218, "drwtNo1": 3, "drwtNo2": 28, "drwtNo3": 31, "drwtNo4": 32, "drwtNo5": 42, "drwtNo6": 45, "bnusNo": 25}
|
||||||
|
{"returnValue": "success", "drwNoDate": "2026-04-11", "drwNo": 1219, "drwtNo1": 1, "drwtNo2": 2, "drwtNo3": 15, "drwtNo4": 28, "drwtNo5": 39, "drwtNo6": 45, "bnusNo": 31}
|
||||||
|
|||||||
@@ -1216,3 +1216,4 @@
|
|||||||
1216,3,10,14,15,23,24,25
|
1216,3,10,14,15,23,24,25
|
||||||
1217,8,10,15,20,29,31,41
|
1217,8,10,15,20,29,31,41
|
||||||
1218,3,28,31,32,42,45,25
|
1218,3,28,31,32,42,45,25
|
||||||
|
1219,1,2,15,28,39,45,31
|
||||||
|
|||||||
99
review_1.py
99
review_1.py
@@ -1,99 +0,0 @@
|
|||||||
import os
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from filter_model_1 import BallFilter
|
|
||||||
|
|
||||||
class FilterTestReview:
|
|
||||||
|
|
||||||
ballFilter = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path):
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
|
||||||
self.ballFilter = BallFilter(lottoHistoryFileName)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def validate(self, df_ball, nos=None):
|
|
||||||
|
|
||||||
win_history = {}
|
|
||||||
win_history_size = {}
|
|
||||||
|
|
||||||
for no in nos:
|
|
||||||
|
|
||||||
print("[{} 회차]".format(no))
|
|
||||||
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = balls[1:7].copy() # copy()로 복사
|
|
||||||
bonus = balls[7]
|
|
||||||
|
|
||||||
final_candidates = []
|
|
||||||
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
ball = list(ball)
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if 0 < filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
final_candidates.append(ball)
|
|
||||||
|
|
||||||
match = len(set(ball) & set(answer))
|
|
||||||
if match == 6:
|
|
||||||
if no not in win_history: # 중복 방지
|
|
||||||
win_history[no] = answer.copy() # copy()로 복사
|
|
||||||
if ball not in win_dic[1]: # 같은 조합 중복 방지
|
|
||||||
win_dic[1].append(ball.copy()) # copy()로 복사
|
|
||||||
|
|
||||||
else:
|
|
||||||
if match == 3:
|
|
||||||
win_dic[5].append(ball)
|
|
||||||
elif match == 4:
|
|
||||||
win_dic[4].append(ball)
|
|
||||||
elif match == 5:
|
|
||||||
# 2등 판별: 5개 맞고 보너스 번호 포함
|
|
||||||
if bonus in ball:
|
|
||||||
win_dic[2].append(ball)
|
|
||||||
else:
|
|
||||||
win_dic[3].append(ball)
|
|
||||||
|
|
||||||
win_history_size[no] = len(final_candidates)
|
|
||||||
|
|
||||||
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
|
|
||||||
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
|
|
||||||
|
|
||||||
return win_history, win_history_size
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
PROJECT_HOME = '.'
|
|
||||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
|
||||||
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
filterTestReview = FilterTestReview(resources_path)
|
|
||||||
|
|
||||||
start = time.time()
|
|
||||||
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
|
|
||||||
win_history, win_history_size = filterTestReview.validate(
|
|
||||||
df_ball,
|
|
||||||
#nos=range(1126, 21, -1),
|
|
||||||
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
print("{} 회 당첨".format(len(win_history)))
|
|
||||||
sorted_win_history = sorted(win_history.keys())
|
|
||||||
for i in range(len(sorted_win_history)):
|
|
||||||
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))
|
|
||||||
99
review_2.py
99
review_2.py
@@ -1,99 +0,0 @@
|
|||||||
import os
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from filter_model_2 import BallFilter
|
|
||||||
|
|
||||||
class FilterTestReview:
|
|
||||||
|
|
||||||
ballFilter = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path):
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
|
||||||
self.ballFilter = BallFilter(lottoHistoryFileName)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def validate(self, df_ball, nos=None):
|
|
||||||
|
|
||||||
win_history = {}
|
|
||||||
win_history_size = {}
|
|
||||||
|
|
||||||
for no in nos:
|
|
||||||
|
|
||||||
print("[{} 회차]".format(no))
|
|
||||||
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = balls[1:7].copy() # copy()로 복사
|
|
||||||
bonus = balls[7]
|
|
||||||
|
|
||||||
final_candidates = []
|
|
||||||
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
ball = list(ball)
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if 0 < filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
final_candidates.append(ball)
|
|
||||||
|
|
||||||
match = len(set(ball) & set(answer))
|
|
||||||
if match == 6:
|
|
||||||
if no not in win_history: # 중복 방지
|
|
||||||
win_history[no] = answer.copy() # copy()로 복사
|
|
||||||
if ball not in win_dic[1]: # 같은 조합 중복 방지
|
|
||||||
win_dic[1].append(ball.copy()) # copy()로 복사
|
|
||||||
|
|
||||||
else:
|
|
||||||
if match == 3:
|
|
||||||
win_dic[5].append(ball)
|
|
||||||
elif match == 4:
|
|
||||||
win_dic[4].append(ball)
|
|
||||||
elif match == 5:
|
|
||||||
# 2등 판별: 5개 맞고 보너스 번호 포함
|
|
||||||
if bonus in ball:
|
|
||||||
win_dic[2].append(ball)
|
|
||||||
else:
|
|
||||||
win_dic[3].append(ball)
|
|
||||||
|
|
||||||
win_history_size[no] = len(final_candidates)
|
|
||||||
|
|
||||||
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
|
|
||||||
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
|
|
||||||
|
|
||||||
return win_history, win_history_size
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
PROJECT_HOME = '.'
|
|
||||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
|
||||||
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
filterTestReview = FilterTestReview(resources_path)
|
|
||||||
|
|
||||||
start = time.time()
|
|
||||||
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
|
|
||||||
win_history, win_history_size = filterTestReview.validate(
|
|
||||||
df_ball,
|
|
||||||
#nos=range(1126, 21, -1),
|
|
||||||
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
print("{} 회 당첨".format(len(win_history)))
|
|
||||||
sorted_win_history = sorted(win_history.keys())
|
|
||||||
for i in range(len(sorted_win_history)):
|
|
||||||
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))
|
|
||||||
99
review_3.py
99
review_3.py
@@ -1,99 +0,0 @@
|
|||||||
import os
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from filter_model_3 import BallFilter
|
|
||||||
|
|
||||||
class FilterTestReview:
|
|
||||||
|
|
||||||
ballFilter = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path):
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
|
||||||
self.ballFilter = BallFilter(lottoHistoryFileName)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def validate(self, df_ball, nos=None):
|
|
||||||
|
|
||||||
win_history = {}
|
|
||||||
win_history_size = {}
|
|
||||||
|
|
||||||
for no in nos:
|
|
||||||
|
|
||||||
print("[{} 회차]".format(no))
|
|
||||||
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = balls[1:7].copy() # copy()로 복사
|
|
||||||
bonus = balls[7]
|
|
||||||
|
|
||||||
final_candidates = []
|
|
||||||
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
ball = list(ball)
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if 0 < filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
final_candidates.append(ball)
|
|
||||||
|
|
||||||
match = len(set(ball) & set(answer))
|
|
||||||
if match == 6:
|
|
||||||
if no not in win_history: # 중복 방지
|
|
||||||
win_history[no] = answer.copy() # copy()로 복사
|
|
||||||
if ball not in win_dic[1]: # 같은 조합 중복 방지
|
|
||||||
win_dic[1].append(ball.copy()) # copy()로 복사
|
|
||||||
|
|
||||||
else:
|
|
||||||
if match == 3:
|
|
||||||
win_dic[5].append(ball)
|
|
||||||
elif match == 4:
|
|
||||||
win_dic[4].append(ball)
|
|
||||||
elif match == 5:
|
|
||||||
# 2등 판별: 5개 맞고 보너스 번호 포함
|
|
||||||
if bonus in ball:
|
|
||||||
win_dic[2].append(ball)
|
|
||||||
else:
|
|
||||||
win_dic[3].append(ball)
|
|
||||||
|
|
||||||
win_history_size[no] = len(final_candidates)
|
|
||||||
|
|
||||||
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
|
|
||||||
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
|
|
||||||
|
|
||||||
return win_history, win_history_size
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
PROJECT_HOME = '.'
|
|
||||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
|
||||||
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
filterTestReview = FilterTestReview(resources_path)
|
|
||||||
|
|
||||||
start = time.time()
|
|
||||||
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
|
|
||||||
win_history, win_history_size = filterTestReview.validate(
|
|
||||||
df_ball,
|
|
||||||
#nos=range(1126, 21, -1),
|
|
||||||
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
print("{} 회 당첨".format(len(win_history)))
|
|
||||||
sorted_win_history = sorted(win_history.keys())
|
|
||||||
for i in range(len(sorted_win_history)):
|
|
||||||
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))
|
|
||||||
236
test_1.py
236
test_1.py
@@ -1,236 +0,0 @@
|
|||||||
import os
|
|
||||||
import argparse
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from filter_model_1 import BallFilter
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
class FilterTest:
|
|
||||||
|
|
||||||
ballFilter = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
|
||||||
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
|
||||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
|
|
||||||
win_count = 0
|
|
||||||
|
|
||||||
no_filter_ball = {}
|
|
||||||
|
|
||||||
printLog = True
|
|
||||||
filter_dic = {}
|
|
||||||
filter_dic_len = {}
|
|
||||||
filter_dic_1 = {}
|
|
||||||
filter_dic_2 = {}
|
|
||||||
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
|
|
||||||
for i in range(len(df_ball) - 1, -1, -1):
|
|
||||||
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
no = int(no)
|
|
||||||
if no < start_no or end_no < no:
|
|
||||||
continue
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
|
||||||
filter_type = list(filter_type)
|
|
||||||
size = len(filter_type)
|
|
||||||
|
|
||||||
if size == 0:
|
|
||||||
win_count += 1
|
|
||||||
no_filter_ball[no] = answer
|
|
||||||
print("\t", no)
|
|
||||||
elif size == 1:
|
|
||||||
key = filter_type[0]
|
|
||||||
if key not in filter_dic_1:
|
|
||||||
filter_dic_1[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_1[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
elif size == 2:
|
|
||||||
key = ','.join(filter_type)
|
|
||||||
if key not in filter_dic_2:
|
|
||||||
filter_dic_2[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_2[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
else:
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
|
|
||||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
|
||||||
if size not in filter_dic_len:
|
|
||||||
filter_dic_len[size] = []
|
|
||||||
filter_dic_len[size].append(filter_type)
|
|
||||||
|
|
||||||
for f_t in filter_type:
|
|
||||||
if f_t not in filter_dic:
|
|
||||||
filter_dic[f_t] = 1
|
|
||||||
else:
|
|
||||||
filter_dic[f_t] += 1
|
|
||||||
|
|
||||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
|
||||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
|
||||||
for filter_count in sorted_filter_dic_len:
|
|
||||||
for filter_type in filter_dic_len[filter_count]:
|
|
||||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
|
||||||
|
|
||||||
print("\n\t[걸러진 유일 필터]")
|
|
||||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_1)):
|
|
||||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
|
||||||
|
|
||||||
print("\n\t[2개 필터에 걸린 경우]")
|
|
||||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_2)):
|
|
||||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
|
||||||
|
|
||||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
|
||||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic)):
|
|
||||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
|
||||||
|
|
||||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
|
||||||
total = max(0, end_no - start_no + 1)
|
|
||||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
|
||||||
for no in no_filter_ball:
|
|
||||||
print("\t\t>", no, no_filter_ball[no])
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
|
||||||
|
|
||||||
return win_count
|
|
||||||
|
|
||||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
|
||||||
final_candidates = []
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
final_candidates.append(ball)
|
|
||||||
|
|
||||||
return final_candidates
|
|
||||||
|
|
||||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
|
||||||
|
|
||||||
win_count = 0
|
|
||||||
for i in range(len(df_ball)-1, 0, -1):
|
|
||||||
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
|
||||||
|
|
||||||
if len(filter_type) == 0:
|
|
||||||
win_count += 1
|
|
||||||
print("\t\t>{}. {}".format(no, answer))
|
|
||||||
|
|
||||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def validate(self, df_ball, nos=None):
|
|
||||||
win_history = {}
|
|
||||||
|
|
||||||
for no in nos:
|
|
||||||
print(no, "processing...")
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
ball = list(ball)
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
|
||||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
|
||||||
win_history[no] = answer
|
|
||||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
|
||||||
break
|
|
||||||
|
|
||||||
return win_history
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--resources", default="resources")
|
|
||||||
parser.add_argument(
|
|
||||||
"--ruleset",
|
|
||||||
default=None,
|
|
||||||
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
|
|
||||||
)
|
|
||||||
parser.add_argument("--start-no", type=int, default=1001)
|
|
||||||
parser.add_argument("--end-no", type=int, default=1204)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
resources_path = args.resources
|
|
||||||
|
|
||||||
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
filter_ball=[]
|
|
||||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
|
||||||
|
|
||||||
print("STEP #1. 필터 방법 추출")
|
|
||||||
start = time.time()
|
|
||||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
"""
|
|
||||||
print("\n\n")
|
|
||||||
no = df_ball['no'].values[-1]
|
|
||||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = ball[1:7]
|
|
||||||
|
|
||||||
print("STEP #0. 최종 후보 선정")
|
|
||||||
start = time.time()
|
|
||||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
print(" > size: {}".format(len(final_candidates)))
|
|
||||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
|
||||||
with open(file_name, 'w+') as outFp:
|
|
||||||
for ball in final_candidates:
|
|
||||||
ball_str = [str(b) for b in answer]
|
|
||||||
outFp.write("{}\n".format(','.join(ball_str)))
|
|
||||||
|
|
||||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
|
||||||
"""
|
|
||||||
|
|
||||||
#print("\n\n")
|
|
||||||
#print("STEP #2. 당첨 회수 확인")
|
|
||||||
#filterTest.check_filter_method(df_ball, win_count)
|
|
||||||
|
|
||||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
|
||||||
236
test_2.py
236
test_2.py
@@ -1,236 +0,0 @@
|
|||||||
import os
|
|
||||||
import argparse
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from filter_model_2 import BallFilter
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
class FilterTest:
|
|
||||||
|
|
||||||
ballFilter = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
|
||||||
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
|
||||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
|
|
||||||
win_count = 0
|
|
||||||
|
|
||||||
no_filter_ball = {}
|
|
||||||
|
|
||||||
printLog = True
|
|
||||||
filter_dic = {}
|
|
||||||
filter_dic_len = {}
|
|
||||||
filter_dic_1 = {}
|
|
||||||
filter_dic_2 = {}
|
|
||||||
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
|
|
||||||
for i in range(len(df_ball) - 1, -1, -1):
|
|
||||||
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
no = int(no)
|
|
||||||
if no < start_no or end_no < no:
|
|
||||||
continue
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
|
||||||
filter_type = list(filter_type)
|
|
||||||
size = len(filter_type)
|
|
||||||
|
|
||||||
if size == 0:
|
|
||||||
win_count += 1
|
|
||||||
no_filter_ball[no] = answer
|
|
||||||
print("\t", no)
|
|
||||||
elif size == 1:
|
|
||||||
key = filter_type[0]
|
|
||||||
if key not in filter_dic_1:
|
|
||||||
filter_dic_1[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_1[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
elif size == 2:
|
|
||||||
key = ','.join(filter_type)
|
|
||||||
if key not in filter_dic_2:
|
|
||||||
filter_dic_2[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_2[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
else:
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
|
|
||||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
|
||||||
if size not in filter_dic_len:
|
|
||||||
filter_dic_len[size] = []
|
|
||||||
filter_dic_len[size].append(filter_type)
|
|
||||||
|
|
||||||
for f_t in filter_type:
|
|
||||||
if f_t not in filter_dic:
|
|
||||||
filter_dic[f_t] = 1
|
|
||||||
else:
|
|
||||||
filter_dic[f_t] += 1
|
|
||||||
|
|
||||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
|
||||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
|
||||||
for filter_count in sorted_filter_dic_len:
|
|
||||||
for filter_type in filter_dic_len[filter_count]:
|
|
||||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
|
||||||
|
|
||||||
print("\n\t[걸러진 유일 필터]")
|
|
||||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_1)):
|
|
||||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
|
||||||
|
|
||||||
print("\n\t[2개 필터에 걸린 경우]")
|
|
||||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_2)):
|
|
||||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
|
||||||
|
|
||||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
|
||||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic)):
|
|
||||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
|
||||||
|
|
||||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
|
||||||
total = max(0, end_no - start_no + 1)
|
|
||||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
|
||||||
for no in no_filter_ball:
|
|
||||||
print("\t\t>", no, no_filter_ball[no])
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
|
||||||
|
|
||||||
return win_count
|
|
||||||
|
|
||||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
|
||||||
final_candidates = []
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
final_candidates.append(ball)
|
|
||||||
|
|
||||||
return final_candidates
|
|
||||||
|
|
||||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
|
||||||
|
|
||||||
win_count = 0
|
|
||||||
for i in range(len(df_ball)-1, 0, -1):
|
|
||||||
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
|
||||||
|
|
||||||
if len(filter_type) == 0:
|
|
||||||
win_count += 1
|
|
||||||
print("\t\t>{}. {}".format(no, answer))
|
|
||||||
|
|
||||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def validate(self, df_ball, nos=None):
|
|
||||||
win_history = {}
|
|
||||||
|
|
||||||
for no in nos:
|
|
||||||
print(no, "processing...")
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
ball = list(ball)
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
|
||||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
|
||||||
win_history[no] = answer
|
|
||||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
|
||||||
break
|
|
||||||
|
|
||||||
return win_history
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--resources", default="resources")
|
|
||||||
parser.add_argument(
|
|
||||||
"--ruleset",
|
|
||||||
default=None,
|
|
||||||
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
|
|
||||||
)
|
|
||||||
parser.add_argument("--start-no", type=int, default=1001)
|
|
||||||
parser.add_argument("--end-no", type=int, default=1204)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
resources_path = args.resources
|
|
||||||
|
|
||||||
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
filter_ball=[]
|
|
||||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
|
||||||
|
|
||||||
print("STEP #1. 필터 방법 추출")
|
|
||||||
start = time.time()
|
|
||||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
"""
|
|
||||||
print("\n\n")
|
|
||||||
no = df_ball['no'].values[-1]
|
|
||||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = ball[1:7]
|
|
||||||
|
|
||||||
print("STEP #0. 최종 후보 선정")
|
|
||||||
start = time.time()
|
|
||||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
print(" > size: {}".format(len(final_candidates)))
|
|
||||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
|
||||||
with open(file_name, 'w+') as outFp:
|
|
||||||
for ball in final_candidates:
|
|
||||||
ball_str = [str(b) for b in answer]
|
|
||||||
outFp.write("{}\n".format(','.join(ball_str)))
|
|
||||||
|
|
||||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
|
||||||
"""
|
|
||||||
|
|
||||||
#print("\n\n")
|
|
||||||
#print("STEP #2. 당첨 회수 확인")
|
|
||||||
#filterTest.check_filter_method(df_ball, win_count)
|
|
||||||
|
|
||||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
|
||||||
236
test_3.py
236
test_3.py
@@ -1,236 +0,0 @@
|
|||||||
import os
|
|
||||||
import argparse
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from filter_model_3 import BallFilter
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
class FilterTest:
|
|
||||||
|
|
||||||
ballFilter = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
|
||||||
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
|
||||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
|
|
||||||
win_count = 0
|
|
||||||
|
|
||||||
no_filter_ball = {}
|
|
||||||
|
|
||||||
printLog = True
|
|
||||||
filter_dic = {}
|
|
||||||
filter_dic_len = {}
|
|
||||||
filter_dic_1 = {}
|
|
||||||
filter_dic_2 = {}
|
|
||||||
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
|
|
||||||
for i in range(len(df_ball) - 1, -1, -1):
|
|
||||||
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
no = int(no)
|
|
||||||
if no < start_no or end_no < no:
|
|
||||||
continue
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
|
||||||
filter_type = list(filter_type)
|
|
||||||
size = len(filter_type)
|
|
||||||
|
|
||||||
if size == 0:
|
|
||||||
win_count += 1
|
|
||||||
no_filter_ball[no] = answer
|
|
||||||
print("\t", no)
|
|
||||||
elif size == 1:
|
|
||||||
key = filter_type[0]
|
|
||||||
if key not in filter_dic_1:
|
|
||||||
filter_dic_1[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_1[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
elif size == 2:
|
|
||||||
key = ','.join(filter_type)
|
|
||||||
if key not in filter_dic_2:
|
|
||||||
filter_dic_2[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_2[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
else:
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
|
|
||||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
|
||||||
if size not in filter_dic_len:
|
|
||||||
filter_dic_len[size] = []
|
|
||||||
filter_dic_len[size].append(filter_type)
|
|
||||||
|
|
||||||
for f_t in filter_type:
|
|
||||||
if f_t not in filter_dic:
|
|
||||||
filter_dic[f_t] = 1
|
|
||||||
else:
|
|
||||||
filter_dic[f_t] += 1
|
|
||||||
|
|
||||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
|
||||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
|
||||||
for filter_count in sorted_filter_dic_len:
|
|
||||||
for filter_type in filter_dic_len[filter_count]:
|
|
||||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
|
||||||
|
|
||||||
print("\n\t[걸러진 유일 필터]")
|
|
||||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_1)):
|
|
||||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
|
||||||
|
|
||||||
print("\n\t[2개 필터에 걸린 경우]")
|
|
||||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_2)):
|
|
||||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
|
||||||
|
|
||||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
|
||||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic)):
|
|
||||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
|
||||||
|
|
||||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
|
||||||
total = max(0, end_no - start_no + 1)
|
|
||||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
|
||||||
for no in no_filter_ball:
|
|
||||||
print("\t\t>", no, no_filter_ball[no])
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
|
||||||
|
|
||||||
return win_count
|
|
||||||
|
|
||||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
|
||||||
final_candidates = []
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
final_candidates.append(ball)
|
|
||||||
|
|
||||||
return final_candidates
|
|
||||||
|
|
||||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
|
||||||
|
|
||||||
win_count = 0
|
|
||||||
for i in range(len(df_ball)-1, 0, -1):
|
|
||||||
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
|
||||||
|
|
||||||
if len(filter_type) == 0:
|
|
||||||
win_count += 1
|
|
||||||
print("\t\t>{}. {}".format(no, answer))
|
|
||||||
|
|
||||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def validate(self, df_ball, nos=None):
|
|
||||||
win_history = {}
|
|
||||||
|
|
||||||
for no in nos:
|
|
||||||
print(no, "processing...")
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
ball = list(ball)
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
|
||||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
|
||||||
win_history[no] = answer
|
|
||||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
|
||||||
break
|
|
||||||
|
|
||||||
return win_history
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--resources", default="resources")
|
|
||||||
parser.add_argument(
|
|
||||||
"--ruleset",
|
|
||||||
default=None,
|
|
||||||
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
|
|
||||||
)
|
|
||||||
parser.add_argument("--start-no", type=int, default=1001)
|
|
||||||
parser.add_argument("--end-no", type=int, default=1204)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
resources_path = args.resources
|
|
||||||
|
|
||||||
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
filter_ball=[]
|
|
||||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
|
||||||
|
|
||||||
print("STEP #1. 필터 방법 추출")
|
|
||||||
start = time.time()
|
|
||||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
"""
|
|
||||||
print("\n\n")
|
|
||||||
no = df_ball['no'].values[-1]
|
|
||||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = ball[1:7]
|
|
||||||
|
|
||||||
print("STEP #0. 최종 후보 선정")
|
|
||||||
start = time.time()
|
|
||||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
print(" > size: {}".format(len(final_candidates)))
|
|
||||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
|
||||||
with open(file_name, 'w+') as outFp:
|
|
||||||
for ball in final_candidates:
|
|
||||||
ball_str = [str(b) for b in answer]
|
|
||||||
outFp.write("{}\n".format(','.join(ball_str)))
|
|
||||||
|
|
||||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
|
||||||
"""
|
|
||||||
|
|
||||||
#print("\n\n")
|
|
||||||
#print("STEP #2. 당첨 회수 확인")
|
|
||||||
#filterTest.check_filter_method(df_ball, win_count)
|
|
||||||
|
|
||||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
|
||||||
@@ -1,405 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
학습 구간(1~800회) 당첨번호로 final_BallFilter.extract_final_candidates 에 쓸 허용 집합을 계산합니다.
|
|
||||||
표준 라이브러리 + pandas(df 호환)만 사용합니다.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import csv
|
|
||||||
import re
|
|
||||||
from collections import defaultdict
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
ROOT = Path(__file__).resolve().parents[1]
|
|
||||||
HISTORY = ROOT / "resources" / "lotto_history.txt"
|
|
||||||
BALLFILTER_SRC = ROOT / "BallFilter_25.py"
|
|
||||||
OUT = ROOT / "final_filter_params.py"
|
|
||||||
|
|
||||||
TRAIN_LO = 1
|
|
||||||
TRAIN_HI = 800
|
|
||||||
|
|
||||||
# 학습 분포에서 너무 넓은 합집합(union)을 피하기 위해 고유값 기준 백분위 밴드 후,
|
|
||||||
# 각 회차 특성값이 밴드 밖이면 해당 값을 다시 포함(학습 당첨 100% 커버).
|
|
||||||
# 좁을수록 필터가 강해짐. 학습·검증 균형은 이 값과 final_filterTest.py 결과로 조정.
|
|
||||||
PCT_LO = 8
|
|
||||||
PCT_HI = 92
|
|
||||||
|
|
||||||
PRIME = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43}
|
|
||||||
COMPOSITE = {4, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45}
|
|
||||||
|
|
||||||
|
|
||||||
def load_draws():
|
|
||||||
rows = []
|
|
||||||
with open(HISTORY, newline="", encoding="utf-8") as f:
|
|
||||||
for p in csv.reader(f):
|
|
||||||
if not p:
|
|
||||||
continue
|
|
||||||
no = int(p[0])
|
|
||||||
balls = sorted(int(x) for x in p[1:7])
|
|
||||||
rows.append((no, balls))
|
|
||||||
rows.sort(key=lambda x: x[0])
|
|
||||||
return {no: b for no, b in rows}
|
|
||||||
|
|
||||||
|
|
||||||
def get_ac(ball):
|
|
||||||
ac = set()
|
|
||||||
for i in range(5, -1, -1):
|
|
||||||
for j in range(i - 1, -1, -1):
|
|
||||||
ac.add(ball[i] - ball[j])
|
|
||||||
return len(ac) - (6 - 1)
|
|
||||||
|
|
||||||
|
|
||||||
def interval_sum(ball):
|
|
||||||
return sum(ball[i] - ball[i - 1] for i in range(1, 6))
|
|
||||||
|
|
||||||
|
|
||||||
def first_letter_sum(ball):
|
|
||||||
acc = [str(b)[0] for b in ball if len(str(b)) == 2]
|
|
||||||
return sum(int(x) for x in acc)
|
|
||||||
|
|
||||||
|
|
||||||
def last_letter_sum(ball):
|
|
||||||
acc = [str(b)[1] for b in ball if len(str(b)) == 2] + [str(b) for b in ball if len(str(b)) == 1]
|
|
||||||
return sum(int(x) for x in acc)
|
|
||||||
|
|
||||||
|
|
||||||
def uniq_end_digits(ball):
|
|
||||||
return len({b % 10 for b in ball})
|
|
||||||
|
|
||||||
|
|
||||||
def high_low(ball):
|
|
||||||
low = sum(1 for b in ball if b < 23)
|
|
||||||
high = sum(1 for b in ball if 23 < b)
|
|
||||||
return low, high
|
|
||||||
|
|
||||||
|
|
||||||
def section10_count(ball):
|
|
||||||
section = set()
|
|
||||||
for b in ball:
|
|
||||||
section.add(int(b / 10))
|
|
||||||
return len(section)
|
|
||||||
|
|
||||||
|
|
||||||
def count_mult(ball, m):
|
|
||||||
return sum(1 for b in ball if b % m == 0)
|
|
||||||
|
|
||||||
|
|
||||||
def continus_max(ball):
|
|
||||||
w = ball
|
|
||||||
best = 1
|
|
||||||
run = 1
|
|
||||||
for i in range(1, 6):
|
|
||||||
if w[i] == w[i - 1] + 1:
|
|
||||||
run += 1
|
|
||||||
best = max(best, run)
|
|
||||||
else:
|
|
||||||
run = 1
|
|
||||||
return best
|
|
||||||
|
|
||||||
|
|
||||||
def weeks_freq(draws_map, answer, no, week):
|
|
||||||
s = set()
|
|
||||||
for w in range(1, week + 1):
|
|
||||||
prev_no = no - w
|
|
||||||
if prev_no not in draws_map:
|
|
||||||
continue
|
|
||||||
for b in draws_map[prev_no]:
|
|
||||||
s.add(b)
|
|
||||||
return sum(1 for b in answer if b in s)
|
|
||||||
|
|
||||||
|
|
||||||
def pct_band_unique(values, lo=PCT_LO, hi=PCT_HI):
|
|
||||||
"""고유값 정렬 후 백분위 구간에 들어가는 값만 유지. 고유 개수가 적으면 전부 유지."""
|
|
||||||
if not values:
|
|
||||||
return set()
|
|
||||||
u = sorted(set(values))
|
|
||||||
if len(u) <= 6:
|
|
||||||
return set(u)
|
|
||||||
n = len(u)
|
|
||||||
il = int((lo / 100.0) * (n - 1))
|
|
||||||
ih = int((hi / 100.0) * (n - 1))
|
|
||||||
low_b, high_b = u[il], u[ih]
|
|
||||||
return {x for x in u if low_b <= x <= high_b}
|
|
||||||
|
|
||||||
|
|
||||||
def parse_pair_triple_rules():
|
|
||||||
"""BallFilter_25.filterPairBall / filterTriplePairBall 에서 규칙 추출."""
|
|
||||||
text = BALLFILTER_SRC.read_text(encoding="utf-8")
|
|
||||||
pairs = []
|
|
||||||
for m in re.finditer(r"len\(set_ball & \{([^}]+)\}\) == 2", text):
|
|
||||||
parts = [int(x.strip()) for x in m.group(1).split(",")]
|
|
||||||
if len(parts) == 2:
|
|
||||||
pairs.append(frozenset(parts))
|
|
||||||
triples = []
|
|
||||||
for m in re.finditer(r"len\(set_ball & \{([^}]+)\}\) == 3", text):
|
|
||||||
parts = [int(x.strip()) for x in m.group(1).split(",")]
|
|
||||||
if len(parts) == 3:
|
|
||||||
triples.append(frozenset(parts))
|
|
||||||
return pairs, triples
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
draws = load_draws()
|
|
||||||
pair_rules, triple_rules = parse_pair_triple_rules()
|
|
||||||
|
|
||||||
train_draws = {n: draws[n] for n in range(TRAIN_LO, TRAIN_HI + 1) if n in draws}
|
|
||||||
|
|
||||||
# 블랙리스트: 학습 당첨 6개에 함께 등장한 쌍/삼은 제외(당첨을 막지 않음)
|
|
||||||
train_pairs_seen = set()
|
|
||||||
train_triples_seen = set()
|
|
||||||
for b in train_draws.values():
|
|
||||||
for i in range(6):
|
|
||||||
for j in range(i + 1, 6):
|
|
||||||
train_pairs_seen.add(frozenset((b[i], b[j])))
|
|
||||||
for i in range(6):
|
|
||||||
for j in range(i + 1, 6):
|
|
||||||
for k in range(j + 1, 6):
|
|
||||||
train_triples_seen.add(frozenset((b[i], b[j], b[k])))
|
|
||||||
|
|
||||||
pair_block = [p for p in pair_rules if p not in train_pairs_seen]
|
|
||||||
triple_block = [t for t in triple_rules if t not in train_triples_seen]
|
|
||||||
|
|
||||||
sets = defaultdict(set)
|
|
||||||
flags_prev = {"need_relax_previous": False, "need_relax_prev7": False}
|
|
||||||
|
|
||||||
for no in range(2, TRAIN_HI + 1):
|
|
||||||
if no not in draws or (no - 1) not in draws:
|
|
||||||
continue
|
|
||||||
ball = draws[no]
|
|
||||||
p_ball = draws[no - 1]
|
|
||||||
|
|
||||||
s = sum(ball)
|
|
||||||
sets["sum6"].add(s)
|
|
||||||
sets["sum6_diff"].add(abs(s - sum(p_ball)))
|
|
||||||
|
|
||||||
avg = s // 6
|
|
||||||
pavg = sum(p_ball) // 6
|
|
||||||
sets["avg6"].add(avg)
|
|
||||||
sets["avg6_diff"].add(abs(avg - pavg))
|
|
||||||
|
|
||||||
s3f = ball[0] + ball[1] + ball[2]
|
|
||||||
ps3f = p_ball[0] + p_ball[1] + p_ball[2]
|
|
||||||
sets["sum3f"].add(s3f)
|
|
||||||
sets["sum3f_diff"].add(abs(s3f - ps3f))
|
|
||||||
|
|
||||||
s3b = ball[3] + ball[4] + ball[5]
|
|
||||||
ps3b = p_ball[3] + p_ball[4] + p_ball[5]
|
|
||||||
sets["sum3b"].add(s3b)
|
|
||||||
sets["sum3b_diff"].add(abs(s3b - ps3b))
|
|
||||||
|
|
||||||
l, h = high_low(ball)
|
|
||||||
sets["hl_allowed"].add((l, h))
|
|
||||||
|
|
||||||
gh = ball[0] + ball[5]
|
|
||||||
pgh = p_ball[0] + p_ball[5]
|
|
||||||
sets["go_sum"].add(gh)
|
|
||||||
sets["go_sum_diff"].add(abs(gh - pgh))
|
|
||||||
|
|
||||||
iv = interval_sum(ball)
|
|
||||||
piv = interval_sum(p_ball)
|
|
||||||
sets["interval"].add(iv)
|
|
||||||
sets["interval_diff"].add(abs(iv - piv))
|
|
||||||
|
|
||||||
fl = first_letter_sum(ball)
|
|
||||||
pfl = first_letter_sum(p_ball)
|
|
||||||
sets["first_letter"].add(fl)
|
|
||||||
sets["first_letter_diff"].add(abs(fl - pfl))
|
|
||||||
|
|
||||||
ll = last_letter_sum(ball)
|
|
||||||
pll = last_letter_sum(p_ball)
|
|
||||||
sets["last_letter"].add(ll)
|
|
||||||
sets["last_letter_diff"].add(abs(ll - pll))
|
|
||||||
|
|
||||||
sets["b0"].add(ball[0])
|
|
||||||
sets["b0_diff"].add(abs(ball[0] - p_ball[0]))
|
|
||||||
sets["b5"].add(ball[5])
|
|
||||||
sets["b5_diff"].add(abs(ball[5] - p_ball[5]))
|
|
||||||
|
|
||||||
sets["uniq_end"].add(uniq_end_digits(ball))
|
|
||||||
sets["uniq_end_diff"].add(abs(uniq_end_digits(ball) - uniq_end_digits(p_ball)))
|
|
||||||
|
|
||||||
ac = get_ac(ball)
|
|
||||||
pac = get_ac(p_ball)
|
|
||||||
sets["ac"].add(ac)
|
|
||||||
sets["ac_diff"].add(abs(ac - pac))
|
|
||||||
|
|
||||||
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
|
|
||||||
sets[f"mul{m}"].add(count_mult(ball, m))
|
|
||||||
sets[f"mul{m}_diff"].add(abs(count_mult(ball, m) - count_mult(p_ball, m)))
|
|
||||||
|
|
||||||
pn = len(set(ball) & PRIME)
|
|
||||||
sets["prime_n"].add(pn)
|
|
||||||
|
|
||||||
cn = len(set(ball) & COMPOSITE)
|
|
||||||
sets["composite_n"].add(cn)
|
|
||||||
sets["composite_diff"].add(abs(cn - len(set(p_ball) & COMPOSITE)))
|
|
||||||
|
|
||||||
ev = sum(1 for b in ball if b % 2 == 0)
|
|
||||||
pev = sum(1 for b in p_ball if b % 2 == 0)
|
|
||||||
sets["even_n"].add(ev)
|
|
||||||
sets["even_diff"].add(abs(ev - pev))
|
|
||||||
|
|
||||||
sc = section10_count(ball)
|
|
||||||
psc = section10_count(p_ball)
|
|
||||||
sets["sec10"].add(sc)
|
|
||||||
sets["sec10_diff"].add(abs(sc - psc))
|
|
||||||
|
|
||||||
for wk in (8, 12, 16, 20):
|
|
||||||
ex = weeks_freq(draws, ball, no, wk)
|
|
||||||
pex = weeks_freq(draws, p_ball, no, wk)
|
|
||||||
sets[f"w{wk}"].add(ex)
|
|
||||||
sets[f"w{wk}_diff"].add(abs(ex - pex))
|
|
||||||
|
|
||||||
sets["continus_max"].add(continus_max(ball))
|
|
||||||
|
|
||||||
# filterPreviousNumber (원본과 동일)
|
|
||||||
pb_set = set(p_ball)
|
|
||||||
bad_prev = True
|
|
||||||
for i in range(6):
|
|
||||||
bi = ball[i]
|
|
||||||
if bi in pb_set or bi - 1 in pb_set or bi + 1 in pb_set:
|
|
||||||
bad_prev = False
|
|
||||||
break
|
|
||||||
if bad_prev:
|
|
||||||
flags_prev["need_relax_previous"] = True
|
|
||||||
|
|
||||||
# filterAllPreivous7
|
|
||||||
pb7 = set()
|
|
||||||
for i in range(no - 1, no - 8, -1):
|
|
||||||
if i in draws:
|
|
||||||
for x in draws[i]:
|
|
||||||
pb7.add(x)
|
|
||||||
if len(set(ball) & pb7) == 6:
|
|
||||||
flags_prev["need_relax_prev7"] = True
|
|
||||||
|
|
||||||
# 백분위로 타이트닝 후 학습 각 회차 특성 보강
|
|
||||||
keys_numeric = [
|
|
||||||
"sum6",
|
|
||||||
"sum6_diff",
|
|
||||||
"avg6",
|
|
||||||
"avg6_diff",
|
|
||||||
"sum3f",
|
|
||||||
"sum3f_diff",
|
|
||||||
"sum3b",
|
|
||||||
"sum3b_diff",
|
|
||||||
"go_sum",
|
|
||||||
"go_sum_diff",
|
|
||||||
"interval",
|
|
||||||
"interval_diff",
|
|
||||||
"first_letter",
|
|
||||||
"first_letter_diff",
|
|
||||||
"last_letter",
|
|
||||||
"last_letter_diff",
|
|
||||||
"b0",
|
|
||||||
"b0_diff",
|
|
||||||
"b5",
|
|
||||||
"b5_diff",
|
|
||||||
"uniq_end",
|
|
||||||
"uniq_end_diff",
|
|
||||||
"ac",
|
|
||||||
"ac_diff",
|
|
||||||
"prime_n",
|
|
||||||
"composite_n",
|
|
||||||
"composite_diff",
|
|
||||||
"even_n",
|
|
||||||
"even_diff",
|
|
||||||
"sec10",
|
|
||||||
"sec10_diff",
|
|
||||||
]
|
|
||||||
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
|
|
||||||
keys_numeric.extend([f"mul{m}", f"mul{m}_diff"])
|
|
||||||
for wk in (8, 12, 16, 20):
|
|
||||||
keys_numeric.extend([f"w{wk}", f"w{wk}_diff"])
|
|
||||||
keys_numeric.append("continus_max")
|
|
||||||
|
|
||||||
for k in keys_numeric:
|
|
||||||
sets[k] = pct_band_unique(sets[k])
|
|
||||||
|
|
||||||
# 고저: (0,1)/(1,0) 만 제외하는 기존 로직 유지 + 학습에 나온 (l,h) 항상 허용
|
|
||||||
hl_skip = {(l, h) for l in (0, 1) for h in (0, 1)}
|
|
||||||
|
|
||||||
def emit():
|
|
||||||
lines = [
|
|
||||||
"# -*- coding: utf-8 -*-",
|
|
||||||
'"""학습 구간 {}~{}회 기준 자동 생성 — tools/compute_final_filter_params.py"""'.format(
|
|
||||||
TRAIN_LO, TRAIN_HI
|
|
||||||
),
|
|
||||||
"",
|
|
||||||
"TRAIN_RANGE = ({}, {})".format(TRAIN_LO, TRAIN_HI),
|
|
||||||
"DISABLE_FILTER_PREVIOUS_NUMBER = {}".format(
|
|
||||||
str(flags_prev["need_relax_previous"])
|
|
||||||
),
|
|
||||||
"DISABLE_FILTER_ALL_PREVIOUS_7 = {}".format(str(flags_prev["need_relax_prev7"])),
|
|
||||||
"",
|
|
||||||
]
|
|
||||||
|
|
||||||
def sset(name, key):
|
|
||||||
v = sets[key]
|
|
||||||
lines.append("{} = {}".format(name, repr(sorted(v))))
|
|
||||||
|
|
||||||
sset("ALLOW_SUM6", "sum6")
|
|
||||||
sset("ALLOW_SUM6_DIFF", "sum6_diff")
|
|
||||||
sset("ALLOW_AVG6", "avg6")
|
|
||||||
sset("ALLOW_AVG6_DIFF", "avg6_diff")
|
|
||||||
sset("ALLOW_SUM3F", "sum3f")
|
|
||||||
sset("ALLOW_SUM3F_DIFF", "sum3f_diff")
|
|
||||||
sset("ALLOW_SUM3B", "sum3b")
|
|
||||||
sset("ALLOW_SUM3B_DIFF", "sum3b_diff")
|
|
||||||
lines.append("HL_SKIP = {}".format(repr(sorted(hl_skip))))
|
|
||||||
lines.append("HL_SEEN = {}".format(repr(sorted(sets['hl_allowed']))))
|
|
||||||
sset("ALLOW_GO_SUM", "go_sum")
|
|
||||||
sset("ALLOW_GO_SUM_DIFF", "go_sum_diff")
|
|
||||||
sset("ALLOW_INTERVAL", "interval")
|
|
||||||
sset("ALLOW_INTERVAL_DIFF", "interval_diff")
|
|
||||||
sset("ALLOW_FIRST_LETTER", "first_letter")
|
|
||||||
sset("ALLOW_FIRST_LETTER_DIFF", "first_letter_diff")
|
|
||||||
sset("ALLOW_LAST_LETTER", "last_letter")
|
|
||||||
sset("ALLOW_LAST_LETTER_DIFF", "last_letter_diff")
|
|
||||||
sset("ALLOW_B0", "b0")
|
|
||||||
sset("ALLOW_B0_DIFF", "b0_diff")
|
|
||||||
sset("ALLOW_B5", "b5")
|
|
||||||
sset("ALLOW_B5_DIFF", "b5_diff")
|
|
||||||
sset("ALLOW_UNIQ_END", "uniq_end")
|
|
||||||
sset("ALLOW_UNIQ_END_DIFF", "uniq_end_diff")
|
|
||||||
sset("ALLOW_AC", "ac")
|
|
||||||
sset("ALLOW_AC_DIFF", "ac_diff")
|
|
||||||
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
|
|
||||||
sset("ALLOW_MUL{}".format(m), "mul{}".format(m))
|
|
||||||
sset("ALLOW_MUL{}_DIFF".format(m), "mul{}_diff".format(m))
|
|
||||||
sset("ALLOW_PRIME_N", "prime_n")
|
|
||||||
sset("ALLOW_COMPOSITE_N", "composite_n")
|
|
||||||
sset("ALLOW_COMPOSITE_DIFF", "composite_diff")
|
|
||||||
sset("ALLOW_EVEN_N", "even_n")
|
|
||||||
sset("ALLOW_EVEN_DIFF", "even_diff")
|
|
||||||
sset("ALLOW_SEC10", "sec10")
|
|
||||||
sset("ALLOW_SEC10_DIFF", "sec10_diff")
|
|
||||||
for wk in (8, 12, 16, 20):
|
|
||||||
sset("ALLOW_W{}".format(wk), "w{}".format(wk))
|
|
||||||
sset("ALLOW_W{}_DIFF".format(wk), "w{}_diff".format(wk))
|
|
||||||
sset("ALLOW_CONTINUS_MAX", "continus_max")
|
|
||||||
|
|
||||||
lines.append("PAIR_BLOCKLIST = {}".format(repr([sorted(list(x)) for x in pair_block])))
|
|
||||||
lines.append("TRIPLE_BLOCKLIST = {}".format(repr([sorted(list(x)) for x in triple_block])))
|
|
||||||
lines.extend(["", "# frozenset 캐시", ""])
|
|
||||||
allow_names = []
|
|
||||||
for line in list(lines):
|
|
||||||
if line.startswith("ALLOW_") and " = " in line:
|
|
||||||
name = line.split(" = ")[0]
|
|
||||||
allow_names.append(name)
|
|
||||||
for name in allow_names:
|
|
||||||
short = name.replace("ALLOW_", "", 1)
|
|
||||||
lines.append("_F_{} = frozenset({})".format(short, name))
|
|
||||||
lines.append("_F_HL_SEEN = frozenset(HL_SEEN)")
|
|
||||||
lines.append("")
|
|
||||||
return "\n".join(lines) + "\n"
|
|
||||||
|
|
||||||
OUT.write_text(emit(), encoding="utf-8")
|
|
||||||
print("Wrote", OUT)
|
|
||||||
print("pair rules:", len(pair_rules), "-> block", len(pair_block))
|
|
||||||
print("triple rules:", len(triple_rules), "-> block", len(triple_block))
|
|
||||||
print("DISABLE_FILTER_PREVIOUS_NUMBER", flags_prev["need_relax_previous"])
|
|
||||||
print("DISABLE_FILTER_ALL_PREVIOUS_7", flags_prev["need_relax_prev7"])
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
231
train_1.py
231
train_1.py
@@ -1,231 +0,0 @@
|
|||||||
import os
|
|
||||||
import argparse
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from filter_model_1 import BallFilter
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
class FilterTest:
|
|
||||||
|
|
||||||
ballFilter = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path, ruleset_path=None):
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
|
||||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def find_filter_method(self, df_ball, start_no, end_no):
|
|
||||||
win_count = 0
|
|
||||||
|
|
||||||
no_filter_ball = {}
|
|
||||||
|
|
||||||
printLog = True
|
|
||||||
filter_dic = {}
|
|
||||||
filter_dic_len = {}
|
|
||||||
filter_dic_1 = {}
|
|
||||||
filter_dic_2 = {}
|
|
||||||
for i in range(len(df_ball)-1, 19, -1):
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
if no < start_no or end_no < no:
|
|
||||||
continue
|
|
||||||
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
|
||||||
filter_type = list(filter_type)
|
|
||||||
size = len(filter_type)
|
|
||||||
|
|
||||||
if size == 0:
|
|
||||||
win_count += 1
|
|
||||||
no_filter_ball[no] = answer
|
|
||||||
print("\t", no)
|
|
||||||
elif size == 1:
|
|
||||||
key = filter_type[0]
|
|
||||||
if key not in filter_dic_1:
|
|
||||||
filter_dic_1[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_1[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
elif size == 2:
|
|
||||||
key = ','.join(filter_type)
|
|
||||||
if key not in filter_dic_2:
|
|
||||||
filter_dic_2[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_2[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
else:
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
|
|
||||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
|
||||||
if size not in filter_dic_len:
|
|
||||||
filter_dic_len[size] = []
|
|
||||||
filter_dic_len[size].append(filter_type)
|
|
||||||
|
|
||||||
for f_t in filter_type:
|
|
||||||
if f_t not in filter_dic:
|
|
||||||
filter_dic[f_t] = 1
|
|
||||||
else:
|
|
||||||
filter_dic[f_t] += 1
|
|
||||||
|
|
||||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
|
||||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
|
||||||
for filter_count in sorted_filter_dic_len:
|
|
||||||
for filter_type in filter_dic_len[filter_count]:
|
|
||||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
|
||||||
|
|
||||||
print("\n\t[걸러진 유일 필터]")
|
|
||||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_1)):
|
|
||||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
|
||||||
|
|
||||||
print("\n\t[2개 필터에 걸린 경우]")
|
|
||||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_2)):
|
|
||||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
|
||||||
|
|
||||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
|
||||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic)):
|
|
||||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
|
||||||
|
|
||||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
|
||||||
for no in no_filter_ball:
|
|
||||||
print("\t\t>", no, no_filter_ball[no])
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
|
||||||
|
|
||||||
return win_count
|
|
||||||
|
|
||||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
|
||||||
final_candidates = []
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
final_candidates.append(ball)
|
|
||||||
|
|
||||||
return final_candidates
|
|
||||||
|
|
||||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
|
||||||
|
|
||||||
win_count = 0
|
|
||||||
for i in range(len(df_ball)-1, 0, -1):
|
|
||||||
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
|
||||||
|
|
||||||
if len(filter_type) == 0:
|
|
||||||
win_count += 1
|
|
||||||
print("\t\t>{}. {}".format(no, answer))
|
|
||||||
|
|
||||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def validate(self, df_ball, nos=None):
|
|
||||||
win_history = {}
|
|
||||||
|
|
||||||
for no in nos:
|
|
||||||
print(no, "processing...")
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
ball = list(ball)
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
|
||||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
|
||||||
win_history[no] = answer
|
|
||||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
|
||||||
break
|
|
||||||
|
|
||||||
return win_history
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--resources", default="resources")
|
|
||||||
parser.add_argument(
|
|
||||||
"--ruleset",
|
|
||||||
default=None,
|
|
||||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
|
||||||
)
|
|
||||||
parser.add_argument("--start-no", type=int, default=1)
|
|
||||||
parser.add_argument("--end-no", type=int, default=800)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
resources_path = args.resources
|
|
||||||
|
|
||||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
filter_ball=[]
|
|
||||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
|
|
||||||
|
|
||||||
print("STEP #1. 필터 방법 추출")
|
|
||||||
start = time.time()
|
|
||||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
"""
|
|
||||||
print("\n\n")
|
|
||||||
no = df_ball['no'].values[-1]
|
|
||||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = ball[1:7]
|
|
||||||
|
|
||||||
print("STEP #0. 최종 후보 선정")
|
|
||||||
start = time.time()
|
|
||||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
print(" > size: {}".format(len(final_candidates)))
|
|
||||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
|
||||||
with open(file_name, 'w+') as outFp:
|
|
||||||
for ball in final_candidates:
|
|
||||||
ball_str = [str(b) for b in answer]
|
|
||||||
outFp.write("{}\n".format(','.join(ball_str)))
|
|
||||||
|
|
||||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
|
||||||
"""
|
|
||||||
|
|
||||||
#print("\n\n")
|
|
||||||
#print("STEP #2. 당첨 회수 확인")
|
|
||||||
#filterTest.check_filter_method(df_ball, win_count)
|
|
||||||
|
|
||||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
|
||||||
231
train_2.py
231
train_2.py
@@ -1,231 +0,0 @@
|
|||||||
import os
|
|
||||||
import argparse
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from filter_model_2 import BallFilter
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
class FilterTest:
|
|
||||||
|
|
||||||
ballFilter = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path, ruleset_path=None):
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
|
||||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def find_filter_method(self, df_ball, start_no, end_no):
|
|
||||||
win_count = 0
|
|
||||||
|
|
||||||
no_filter_ball = {}
|
|
||||||
|
|
||||||
printLog = True
|
|
||||||
filter_dic = {}
|
|
||||||
filter_dic_len = {}
|
|
||||||
filter_dic_1 = {}
|
|
||||||
filter_dic_2 = {}
|
|
||||||
for i in range(len(df_ball)-1, 19, -1):
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
if no < start_no or end_no < no:
|
|
||||||
continue
|
|
||||||
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
|
||||||
filter_type = list(filter_type)
|
|
||||||
size = len(filter_type)
|
|
||||||
|
|
||||||
if size == 0:
|
|
||||||
win_count += 1
|
|
||||||
no_filter_ball[no] = answer
|
|
||||||
print("\t", no)
|
|
||||||
elif size == 1:
|
|
||||||
key = filter_type[0]
|
|
||||||
if key not in filter_dic_1:
|
|
||||||
filter_dic_1[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_1[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
elif size == 2:
|
|
||||||
key = ','.join(filter_type)
|
|
||||||
if key not in filter_dic_2:
|
|
||||||
filter_dic_2[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_2[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
else:
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
|
|
||||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
|
||||||
if size not in filter_dic_len:
|
|
||||||
filter_dic_len[size] = []
|
|
||||||
filter_dic_len[size].append(filter_type)
|
|
||||||
|
|
||||||
for f_t in filter_type:
|
|
||||||
if f_t not in filter_dic:
|
|
||||||
filter_dic[f_t] = 1
|
|
||||||
else:
|
|
||||||
filter_dic[f_t] += 1
|
|
||||||
|
|
||||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
|
||||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
|
||||||
for filter_count in sorted_filter_dic_len:
|
|
||||||
for filter_type in filter_dic_len[filter_count]:
|
|
||||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
|
||||||
|
|
||||||
print("\n\t[걸러진 유일 필터]")
|
|
||||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_1)):
|
|
||||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
|
||||||
|
|
||||||
print("\n\t[2개 필터에 걸린 경우]")
|
|
||||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_2)):
|
|
||||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
|
||||||
|
|
||||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
|
||||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic)):
|
|
||||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
|
||||||
|
|
||||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
|
||||||
for no in no_filter_ball:
|
|
||||||
print("\t\t>", no, no_filter_ball[no])
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
|
||||||
|
|
||||||
return win_count
|
|
||||||
|
|
||||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
|
||||||
final_candidates = []
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
final_candidates.append(ball)
|
|
||||||
|
|
||||||
return final_candidates
|
|
||||||
|
|
||||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
|
||||||
|
|
||||||
win_count = 0
|
|
||||||
for i in range(len(df_ball)-1, 0, -1):
|
|
||||||
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
|
||||||
|
|
||||||
if len(filter_type) == 0:
|
|
||||||
win_count += 1
|
|
||||||
print("\t\t>{}. {}".format(no, answer))
|
|
||||||
|
|
||||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def validate(self, df_ball, nos=None):
|
|
||||||
win_history = {}
|
|
||||||
|
|
||||||
for no in nos:
|
|
||||||
print(no, "processing...")
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
ball = list(ball)
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
|
||||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
|
||||||
win_history[no] = answer
|
|
||||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
|
||||||
break
|
|
||||||
|
|
||||||
return win_history
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--resources", default="resources")
|
|
||||||
parser.add_argument(
|
|
||||||
"--ruleset",
|
|
||||||
default=None,
|
|
||||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
|
||||||
)
|
|
||||||
parser.add_argument("--start-no", type=int, default=1)
|
|
||||||
parser.add_argument("--end-no", type=int, default=800)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
resources_path = args.resources
|
|
||||||
|
|
||||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
filter_ball=[]
|
|
||||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
|
|
||||||
|
|
||||||
print("STEP #1. 필터 방법 추출")
|
|
||||||
start = time.time()
|
|
||||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
"""
|
|
||||||
print("\n\n")
|
|
||||||
no = df_ball['no'].values[-1]
|
|
||||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = ball[1:7]
|
|
||||||
|
|
||||||
print("STEP #0. 최종 후보 선정")
|
|
||||||
start = time.time()
|
|
||||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
print(" > size: {}".format(len(final_candidates)))
|
|
||||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
|
||||||
with open(file_name, 'w+') as outFp:
|
|
||||||
for ball in final_candidates:
|
|
||||||
ball_str = [str(b) for b in answer]
|
|
||||||
outFp.write("{}\n".format(','.join(ball_str)))
|
|
||||||
|
|
||||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
|
||||||
"""
|
|
||||||
|
|
||||||
#print("\n\n")
|
|
||||||
#print("STEP #2. 당첨 회수 확인")
|
|
||||||
#filterTest.check_filter_method(df_ball, win_count)
|
|
||||||
|
|
||||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
|
||||||
231
train_3.py
231
train_3.py
@@ -1,231 +0,0 @@
|
|||||||
import os
|
|
||||||
import argparse
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from filter_model_3 import BallFilter
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
class FilterTest:
|
|
||||||
|
|
||||||
ballFilter = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path, ruleset_path=None):
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
|
||||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def find_filter_method(self, df_ball, start_no, end_no):
|
|
||||||
win_count = 0
|
|
||||||
|
|
||||||
no_filter_ball = {}
|
|
||||||
|
|
||||||
printLog = True
|
|
||||||
filter_dic = {}
|
|
||||||
filter_dic_len = {}
|
|
||||||
filter_dic_1 = {}
|
|
||||||
filter_dic_2 = {}
|
|
||||||
for i in range(len(df_ball)-1, 19, -1):
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
if no < start_no or end_no < no:
|
|
||||||
continue
|
|
||||||
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
|
||||||
filter_type = list(filter_type)
|
|
||||||
size = len(filter_type)
|
|
||||||
|
|
||||||
if size == 0:
|
|
||||||
win_count += 1
|
|
||||||
no_filter_ball[no] = answer
|
|
||||||
print("\t", no)
|
|
||||||
elif size == 1:
|
|
||||||
key = filter_type[0]
|
|
||||||
if key not in filter_dic_1:
|
|
||||||
filter_dic_1[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_1[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
elif size == 2:
|
|
||||||
key = ','.join(filter_type)
|
|
||||||
if key not in filter_dic_2:
|
|
||||||
filter_dic_2[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_2[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
else:
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
|
|
||||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
|
||||||
if size not in filter_dic_len:
|
|
||||||
filter_dic_len[size] = []
|
|
||||||
filter_dic_len[size].append(filter_type)
|
|
||||||
|
|
||||||
for f_t in filter_type:
|
|
||||||
if f_t not in filter_dic:
|
|
||||||
filter_dic[f_t] = 1
|
|
||||||
else:
|
|
||||||
filter_dic[f_t] += 1
|
|
||||||
|
|
||||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
|
||||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
|
||||||
for filter_count in sorted_filter_dic_len:
|
|
||||||
for filter_type in filter_dic_len[filter_count]:
|
|
||||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
|
||||||
|
|
||||||
print("\n\t[걸러진 유일 필터]")
|
|
||||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_1)):
|
|
||||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
|
||||||
|
|
||||||
print("\n\t[2개 필터에 걸린 경우]")
|
|
||||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_2)):
|
|
||||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
|
||||||
|
|
||||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
|
||||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic)):
|
|
||||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
|
||||||
|
|
||||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
|
||||||
for no in no_filter_ball:
|
|
||||||
print("\t\t>", no, no_filter_ball[no])
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
|
||||||
|
|
||||||
return win_count
|
|
||||||
|
|
||||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
|
||||||
final_candidates = []
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
final_candidates.append(ball)
|
|
||||||
|
|
||||||
return final_candidates
|
|
||||||
|
|
||||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
|
||||||
|
|
||||||
win_count = 0
|
|
||||||
for i in range(len(df_ball)-1, 0, -1):
|
|
||||||
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
|
||||||
|
|
||||||
if len(filter_type) == 0:
|
|
||||||
win_count += 1
|
|
||||||
print("\t\t>{}. {}".format(no, answer))
|
|
||||||
|
|
||||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def validate(self, df_ball, nos=None):
|
|
||||||
win_history = {}
|
|
||||||
|
|
||||||
for no in nos:
|
|
||||||
print(no, "processing...")
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
ball = list(ball)
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
|
||||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
|
||||||
win_history[no] = answer
|
|
||||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
|
||||||
break
|
|
||||||
|
|
||||||
return win_history
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--resources", default="resources")
|
|
||||||
parser.add_argument(
|
|
||||||
"--ruleset",
|
|
||||||
default=None,
|
|
||||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
|
||||||
)
|
|
||||||
parser.add_argument("--start-no", type=int, default=1)
|
|
||||||
parser.add_argument("--end-no", type=int, default=800)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
resources_path = args.resources
|
|
||||||
|
|
||||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
filter_ball=[]
|
|
||||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
|
|
||||||
|
|
||||||
print("STEP #1. 필터 방법 추출")
|
|
||||||
start = time.time()
|
|
||||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
"""
|
|
||||||
print("\n\n")
|
|
||||||
no = df_ball['no'].values[-1]
|
|
||||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = ball[1:7]
|
|
||||||
|
|
||||||
print("STEP #0. 최종 후보 선정")
|
|
||||||
start = time.time()
|
|
||||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
print(" > size: {}".format(len(final_candidates)))
|
|
||||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
|
||||||
with open(file_name, 'w+') as outFp:
|
|
||||||
for ball in final_candidates:
|
|
||||||
ball_str = [str(b) for b in answer]
|
|
||||||
outFp.write("{}\n".format(','.join(ball_str)))
|
|
||||||
|
|
||||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
|
||||||
"""
|
|
||||||
|
|
||||||
#print("\n\n")
|
|
||||||
#print("STEP #2. 당첨 회수 확인")
|
|
||||||
#filterTest.check_filter_method(df_ball, win_count)
|
|
||||||
|
|
||||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
|
||||||
234
valid_1.py
234
valid_1.py
@@ -1,234 +0,0 @@
|
|||||||
import os
|
|
||||||
import argparse
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from filter_model_1 import BallFilter
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
class FilterTest:
|
|
||||||
|
|
||||||
ballFilter = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
|
||||||
# validation should use full history for previous-draw/window features
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
|
||||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def find_filter_method(self, df_ball, start_no, end_no):
|
|
||||||
win_count = 0
|
|
||||||
|
|
||||||
no_filter_ball = {}
|
|
||||||
|
|
||||||
printLog = True
|
|
||||||
filter_dic = {}
|
|
||||||
filter_dic_len = {}
|
|
||||||
filter_dic_1 = {}
|
|
||||||
filter_dic_2 = {}
|
|
||||||
# evaluate only requested range, but allow df_ball to contain full history
|
|
||||||
for i in range(len(df_ball) - 1, -1, -1):
|
|
||||||
no = int(df_ball['no'].iloc[i])
|
|
||||||
if no < start_no or end_no < no:
|
|
||||||
continue
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
|
||||||
filter_type = list(filter_type)
|
|
||||||
size = len(filter_type)
|
|
||||||
|
|
||||||
if size == 0:
|
|
||||||
win_count += 1
|
|
||||||
no_filter_ball[no] = answer
|
|
||||||
print("\t", no)
|
|
||||||
elif size == 1:
|
|
||||||
key = filter_type[0]
|
|
||||||
if key not in filter_dic_1:
|
|
||||||
filter_dic_1[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_1[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
elif size == 2:
|
|
||||||
key = ','.join(filter_type)
|
|
||||||
if key not in filter_dic_2:
|
|
||||||
filter_dic_2[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_2[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
else:
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
|
|
||||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
|
||||||
if size not in filter_dic_len:
|
|
||||||
filter_dic_len[size] = []
|
|
||||||
filter_dic_len[size].append(filter_type)
|
|
||||||
|
|
||||||
for f_t in filter_type:
|
|
||||||
if f_t not in filter_dic:
|
|
||||||
filter_dic[f_t] = 1
|
|
||||||
else:
|
|
||||||
filter_dic[f_t] += 1
|
|
||||||
|
|
||||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
|
||||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
|
||||||
for filter_count in sorted_filter_dic_len:
|
|
||||||
for filter_type in filter_dic_len[filter_count]:
|
|
||||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
|
||||||
|
|
||||||
print("\n\t[걸러진 유일 필터]")
|
|
||||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_1)):
|
|
||||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
|
||||||
|
|
||||||
print("\n\t[2개 필터에 걸린 경우]")
|
|
||||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_2)):
|
|
||||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
|
||||||
|
|
||||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
|
||||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic)):
|
|
||||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
|
||||||
|
|
||||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
|
||||||
total = max(0, end_no - start_no + 1)
|
|
||||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
|
||||||
for no in no_filter_ball:
|
|
||||||
print("\t\t>", no, no_filter_ball[no])
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
|
||||||
|
|
||||||
return win_count
|
|
||||||
|
|
||||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
|
||||||
final_candidates = []
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
final_candidates.append(ball)
|
|
||||||
|
|
||||||
return final_candidates
|
|
||||||
|
|
||||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
|
||||||
|
|
||||||
win_count = 0
|
|
||||||
for i in range(len(df_ball)-1, 0, -1):
|
|
||||||
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
|
||||||
|
|
||||||
if len(filter_type) == 0:
|
|
||||||
win_count += 1
|
|
||||||
print("\t\t>{}. {}".format(no, answer))
|
|
||||||
|
|
||||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def validate(self, df_ball, nos=None):
|
|
||||||
win_history = {}
|
|
||||||
|
|
||||||
for no in nos:
|
|
||||||
print(no, "processing...")
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
ball = list(ball)
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
|
||||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
|
||||||
win_history[no] = answer
|
|
||||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
|
||||||
break
|
|
||||||
|
|
||||||
return win_history
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--resources", default="resources")
|
|
||||||
parser.add_argument(
|
|
||||||
"--ruleset",
|
|
||||||
default=None,
|
|
||||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
|
||||||
)
|
|
||||||
parser.add_argument("--start-no", type=int, default=801)
|
|
||||||
parser.add_argument("--end-no", type=int, default=1000)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
resources_path = args.resources
|
|
||||||
|
|
||||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
filter_ball=[]
|
|
||||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
|
||||||
|
|
||||||
print("STEP #1. 필터 방법 추출")
|
|
||||||
start = time.time()
|
|
||||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
"""
|
|
||||||
print("\n\n")
|
|
||||||
no = df_ball['no'].values[-1]
|
|
||||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = ball[1:7]
|
|
||||||
|
|
||||||
print("STEP #0. 최종 후보 선정")
|
|
||||||
start = time.time()
|
|
||||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
print(" > size: {}".format(len(final_candidates)))
|
|
||||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
|
||||||
with open(file_name, 'w+') as outFp:
|
|
||||||
for ball in final_candidates:
|
|
||||||
ball_str = [str(b) for b in answer]
|
|
||||||
outFp.write("{}\n".format(','.join(ball_str)))
|
|
||||||
|
|
||||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
|
||||||
"""
|
|
||||||
|
|
||||||
#print("\n\n")
|
|
||||||
#print("STEP #2. 당첨 회수 확인")
|
|
||||||
#filterTest.check_filter_method(df_ball, win_count)
|
|
||||||
|
|
||||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
|
||||||
234
valid_2.py
234
valid_2.py
@@ -1,234 +0,0 @@
|
|||||||
import os
|
|
||||||
import argparse
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from filter_model_2 import BallFilter
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
class FilterTest:
|
|
||||||
|
|
||||||
ballFilter = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
|
||||||
# validation should use full history for previous-draw/window features
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
|
||||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def find_filter_method(self, df_ball, start_no, end_no):
|
|
||||||
win_count = 0
|
|
||||||
|
|
||||||
no_filter_ball = {}
|
|
||||||
|
|
||||||
printLog = True
|
|
||||||
filter_dic = {}
|
|
||||||
filter_dic_len = {}
|
|
||||||
filter_dic_1 = {}
|
|
||||||
filter_dic_2 = {}
|
|
||||||
# evaluate only requested range, but allow df_ball to contain full history
|
|
||||||
for i in range(len(df_ball) - 1, -1, -1):
|
|
||||||
no = int(df_ball['no'].iloc[i])
|
|
||||||
if no < start_no or end_no < no:
|
|
||||||
continue
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
|
||||||
filter_type = list(filter_type)
|
|
||||||
size = len(filter_type)
|
|
||||||
|
|
||||||
if size == 0:
|
|
||||||
win_count += 1
|
|
||||||
no_filter_ball[no] = answer
|
|
||||||
print("\t", no)
|
|
||||||
elif size == 1:
|
|
||||||
key = filter_type[0]
|
|
||||||
if key not in filter_dic_1:
|
|
||||||
filter_dic_1[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_1[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
elif size == 2:
|
|
||||||
key = ','.join(filter_type)
|
|
||||||
if key not in filter_dic_2:
|
|
||||||
filter_dic_2[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_2[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
else:
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
|
|
||||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
|
||||||
if size not in filter_dic_len:
|
|
||||||
filter_dic_len[size] = []
|
|
||||||
filter_dic_len[size].append(filter_type)
|
|
||||||
|
|
||||||
for f_t in filter_type:
|
|
||||||
if f_t not in filter_dic:
|
|
||||||
filter_dic[f_t] = 1
|
|
||||||
else:
|
|
||||||
filter_dic[f_t] += 1
|
|
||||||
|
|
||||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
|
||||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
|
||||||
for filter_count in sorted_filter_dic_len:
|
|
||||||
for filter_type in filter_dic_len[filter_count]:
|
|
||||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
|
||||||
|
|
||||||
print("\n\t[걸러진 유일 필터]")
|
|
||||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_1)):
|
|
||||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
|
||||||
|
|
||||||
print("\n\t[2개 필터에 걸린 경우]")
|
|
||||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_2)):
|
|
||||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
|
||||||
|
|
||||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
|
||||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic)):
|
|
||||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
|
||||||
|
|
||||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
|
||||||
total = max(0, end_no - start_no + 1)
|
|
||||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
|
||||||
for no in no_filter_ball:
|
|
||||||
print("\t\t>", no, no_filter_ball[no])
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
|
||||||
|
|
||||||
return win_count
|
|
||||||
|
|
||||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
|
||||||
final_candidates = []
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
final_candidates.append(ball)
|
|
||||||
|
|
||||||
return final_candidates
|
|
||||||
|
|
||||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
|
||||||
|
|
||||||
win_count = 0
|
|
||||||
for i in range(len(df_ball)-1, 0, -1):
|
|
||||||
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
|
||||||
|
|
||||||
if len(filter_type) == 0:
|
|
||||||
win_count += 1
|
|
||||||
print("\t\t>{}. {}".format(no, answer))
|
|
||||||
|
|
||||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def validate(self, df_ball, nos=None):
|
|
||||||
win_history = {}
|
|
||||||
|
|
||||||
for no in nos:
|
|
||||||
print(no, "processing...")
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
ball = list(ball)
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
|
||||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
|
||||||
win_history[no] = answer
|
|
||||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
|
||||||
break
|
|
||||||
|
|
||||||
return win_history
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--resources", default="resources")
|
|
||||||
parser.add_argument(
|
|
||||||
"--ruleset",
|
|
||||||
default=None,
|
|
||||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
|
||||||
)
|
|
||||||
parser.add_argument("--start-no", type=int, default=801)
|
|
||||||
parser.add_argument("--end-no", type=int, default=1000)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
resources_path = args.resources
|
|
||||||
|
|
||||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
filter_ball=[]
|
|
||||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
|
||||||
|
|
||||||
print("STEP #1. 필터 방법 추출")
|
|
||||||
start = time.time()
|
|
||||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
"""
|
|
||||||
print("\n\n")
|
|
||||||
no = df_ball['no'].values[-1]
|
|
||||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = ball[1:7]
|
|
||||||
|
|
||||||
print("STEP #0. 최종 후보 선정")
|
|
||||||
start = time.time()
|
|
||||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
print(" > size: {}".format(len(final_candidates)))
|
|
||||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
|
||||||
with open(file_name, 'w+') as outFp:
|
|
||||||
for ball in final_candidates:
|
|
||||||
ball_str = [str(b) for b in answer]
|
|
||||||
outFp.write("{}\n".format(','.join(ball_str)))
|
|
||||||
|
|
||||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
|
||||||
"""
|
|
||||||
|
|
||||||
#print("\n\n")
|
|
||||||
#print("STEP #2. 당첨 회수 확인")
|
|
||||||
#filterTest.check_filter_method(df_ball, win_count)
|
|
||||||
|
|
||||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
|
||||||
234
valid_3.py
234
valid_3.py
@@ -1,234 +0,0 @@
|
|||||||
import os
|
|
||||||
import argparse
|
|
||||||
import pandas as pd
|
|
||||||
import itertools
|
|
||||||
from filter_model_3 import BallFilter
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
class FilterTest:
|
|
||||||
|
|
||||||
ballFilter = None
|
|
||||||
|
|
||||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
|
||||||
# validation should use full history for previous-draw/window features
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
|
||||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def find_filter_method(self, df_ball, start_no, end_no):
|
|
||||||
win_count = 0
|
|
||||||
|
|
||||||
no_filter_ball = {}
|
|
||||||
|
|
||||||
printLog = True
|
|
||||||
filter_dic = {}
|
|
||||||
filter_dic_len = {}
|
|
||||||
filter_dic_1 = {}
|
|
||||||
filter_dic_2 = {}
|
|
||||||
# evaluate only requested range, but allow df_ball to contain full history
|
|
||||||
for i in range(len(df_ball) - 1, -1, -1):
|
|
||||||
no = int(df_ball['no'].iloc[i])
|
|
||||||
if no < start_no or end_no < no:
|
|
||||||
continue
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
|
||||||
filter_type = list(filter_type)
|
|
||||||
size = len(filter_type)
|
|
||||||
|
|
||||||
if size == 0:
|
|
||||||
win_count += 1
|
|
||||||
no_filter_ball[no] = answer
|
|
||||||
print("\t", no)
|
|
||||||
elif size == 1:
|
|
||||||
key = filter_type[0]
|
|
||||||
if key not in filter_dic_1:
|
|
||||||
filter_dic_1[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_1[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
elif size == 2:
|
|
||||||
key = ','.join(filter_type)
|
|
||||||
if key not in filter_dic_2:
|
|
||||||
filter_dic_2[key] = 1
|
|
||||||
else:
|
|
||||||
filter_dic_2[key] += 1
|
|
||||||
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
else:
|
|
||||||
if printLog:
|
|
||||||
print("\t", no, filter_type)
|
|
||||||
|
|
||||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
|
||||||
if size not in filter_dic_len:
|
|
||||||
filter_dic_len[size] = []
|
|
||||||
filter_dic_len[size].append(filter_type)
|
|
||||||
|
|
||||||
for f_t in filter_type:
|
|
||||||
if f_t not in filter_dic:
|
|
||||||
filter_dic[f_t] = 1
|
|
||||||
else:
|
|
||||||
filter_dic[f_t] += 1
|
|
||||||
|
|
||||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
|
||||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
|
||||||
for filter_count in sorted_filter_dic_len:
|
|
||||||
for filter_type in filter_dic_len[filter_count]:
|
|
||||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
|
||||||
|
|
||||||
print("\n\t[걸러진 유일 필터]")
|
|
||||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_1)):
|
|
||||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
|
||||||
|
|
||||||
print("\n\t[2개 필터에 걸린 경우]")
|
|
||||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic_2)):
|
|
||||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
|
||||||
|
|
||||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
|
||||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
|
||||||
for i in range(len(sorted_filter_dic)):
|
|
||||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
|
||||||
|
|
||||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
|
||||||
total = max(0, end_no - start_no + 1)
|
|
||||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
|
||||||
for no in no_filter_ball:
|
|
||||||
print("\t\t>", no, no_filter_ball[no])
|
|
||||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
|
||||||
|
|
||||||
return win_count
|
|
||||||
|
|
||||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
|
||||||
final_candidates = []
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
|
|
||||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
|
||||||
filter_size = len(filter_type)
|
|
||||||
|
|
||||||
if filter_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
final_candidates.append(ball)
|
|
||||||
|
|
||||||
return final_candidates
|
|
||||||
|
|
||||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
|
||||||
|
|
||||||
win_count = 0
|
|
||||||
for i in range(len(df_ball)-1, 0, -1):
|
|
||||||
|
|
||||||
no = df_ball['no'].iloc[i]
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
|
||||||
|
|
||||||
if len(filter_type) == 0:
|
|
||||||
win_count += 1
|
|
||||||
print("\t\t>{}. {}".format(no, answer))
|
|
||||||
|
|
||||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def validate(self, df_ball, nos=None):
|
|
||||||
win_history = {}
|
|
||||||
|
|
||||||
for no in nos:
|
|
||||||
print(no, "processing...")
|
|
||||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = answer[1:7]
|
|
||||||
|
|
||||||
generation_balls = list(range(1, 46))
|
|
||||||
nCr = list(itertools.combinations(generation_balls, 6))
|
|
||||||
for idx, ball in enumerate(nCr):
|
|
||||||
if idx % 1000000 == 0:
|
|
||||||
print(" - {} processed...".format(idx))
|
|
||||||
ball = list(ball)
|
|
||||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
|
||||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
|
||||||
win_history[no] = answer
|
|
||||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
|
||||||
break
|
|
||||||
|
|
||||||
return win_history
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("--resources", default="resources")
|
|
||||||
parser.add_argument(
|
|
||||||
"--ruleset",
|
|
||||||
default=None,
|
|
||||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
|
||||||
)
|
|
||||||
parser.add_argument("--start-no", type=int, default=801)
|
|
||||||
parser.add_argument("--end-no", type=int, default=1000)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
resources_path = args.resources
|
|
||||||
|
|
||||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
|
||||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
|
||||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
|
||||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
|
||||||
|
|
||||||
filter_ball=[]
|
|
||||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
|
||||||
|
|
||||||
print("STEP #1. 필터 방법 추출")
|
|
||||||
start = time.time()
|
|
||||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
"""
|
|
||||||
print("\n\n")
|
|
||||||
no = df_ball['no'].values[-1]
|
|
||||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
|
||||||
answer = ball[1:7]
|
|
||||||
|
|
||||||
print("STEP #0. 최종 후보 선정")
|
|
||||||
start = time.time()
|
|
||||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
|
||||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
|
||||||
print("process_time: ", process_time)
|
|
||||||
|
|
||||||
print(" > size: {}".format(len(final_candidates)))
|
|
||||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
|
||||||
with open(file_name, 'w+') as outFp:
|
|
||||||
for ball in final_candidates:
|
|
||||||
ball_str = [str(b) for b in answer]
|
|
||||||
outFp.write("{}\n".format(','.join(ball_str)))
|
|
||||||
|
|
||||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
|
||||||
"""
|
|
||||||
|
|
||||||
#print("\n\n")
|
|
||||||
#print("STEP #2. 당첨 회수 확인")
|
|
||||||
#filterTest.check_filter_method(df_ball, win_count)
|
|
||||||
|
|
||||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
|
||||||
Reference in New Issue
Block a user