init
This commit is contained in:
12
PROMPT.txt
12
PROMPT.txt
@@ -1,11 +1,12 @@
|
||||
데이터는 다음과 같습니다.
|
||||
(학습 데이터)
|
||||
- train.json, train.txt
|
||||
- 1회차부터 800회차
|
||||
- lotto_history.txt에서 회차부터 800회차
|
||||
|
||||
(검증 데이터)
|
||||
- valid.json, valid.txt
|
||||
- 801회차부터 1000회차
|
||||
- lotto_history.txt에서 801회차부터 1000회차
|
||||
|
||||
(테스트 데이터)
|
||||
- lotto_history.txt에서 1001회차부터 이후 모두
|
||||
|
||||
파일 구조를 먼저 이해하세요.
|
||||
|
||||
@@ -110,3 +111,6 @@
|
||||
그리고 요구사항에 대해서 시도 방법을 설계하세요.
|
||||
그리고 반복적으로 실행해서 최적화된 방법을 찾아서 적용해주세요.
|
||||
(최적화는 언제든 학습 데이터로 최적화를 해야 합니다. 그리고 검증 데이터로 테스트만 수행하세요.)
|
||||
|
||||
당첨번호에 대한 추천 개수가 100개 미만이어야 합니다.
|
||||
1_FilterTest_25.py, BallFilter_25.py를 참고해서 최적의 final_filterTest.py, final_BallFilter.py를 작성해 주세요.
|
||||
4439
filter_model_1.py
4439
filter_model_1.py
File diff suppressed because it is too large
Load Diff
1256
filter_model_2.py
1256
filter_model_2.py
File diff suppressed because it is too large
Load Diff
@@ -1,94 +0,0 @@
|
||||
"""
|
||||
filter_model_3.py
|
||||
|
||||
OR-composed BallFilter:
|
||||
- A candidate ball is ACCEPTED if it passes EITHER filter_model_1 OR filter_model_2.
|
||||
- A candidate ball is REJECTED only if it fails BOTH.
|
||||
|
||||
This keeps the same public interface used across the project:
|
||||
BallFilter(lottoHistoryFileName, ruleset_path=..., ruleset=...)
|
||||
.filter(ball, no, until_end=False, df=None, filter_ball=None) -> set[str]
|
||||
.extract_final_candidates(ball, no=None, until_end=False, df=None) -> set[str]
|
||||
|
||||
Notes:
|
||||
- The underlying filters return a non-empty set of failure reasons when rejected.
|
||||
- Callers treat "len(result) == 0" as PASS.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import filter_model_1 as fm1
|
||||
import filter_model_2 as fm2
|
||||
|
||||
|
||||
class BallFilter:
|
||||
"""
|
||||
OR composition of filter_model_1.BallFilter and filter_model_2.BallFilter.
|
||||
|
||||
- If model1 PASSES OR model2 PASSES -> return empty set()
|
||||
- If both FAIL -> return union of reasons (prefixed for debugging)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
lottoHistoryFileName: Optional[str] = None,
|
||||
# Backward compatible single ruleset knobs (applied to both if specific ones not provided)
|
||||
ruleset_path: Optional[str] = None,
|
||||
ruleset: Optional[Dict[str, Any]] = None,
|
||||
# Optional per-model overrides
|
||||
ruleset_path_1: Optional[str] = None,
|
||||
ruleset_path_2: Optional[str] = None,
|
||||
ruleset_1: Optional[Dict[str, Any]] = None,
|
||||
ruleset_2: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
rp1 = ruleset_path_1 if ruleset_path_1 is not None else ruleset_path
|
||||
rp2 = ruleset_path_2 if ruleset_path_2 is not None else ruleset_path
|
||||
r1 = ruleset_1 if ruleset_1 is not None else ruleset
|
||||
r2 = ruleset_2 if ruleset_2 is not None else ruleset
|
||||
|
||||
self.m1 = fm1.BallFilter(lottoHistoryFileName, ruleset_path=rp1, ruleset=r1)
|
||||
self.m2 = fm2.BallFilter(lottoHistoryFileName, ruleset_path=rp2, ruleset=r2)
|
||||
|
||||
#
|
||||
# Delegate common helper methods (both models expose the same API)
|
||||
#
|
||||
def getBall(self, no):
|
||||
return self.m1.getBall(no)
|
||||
|
||||
def getLastNo(self, YMD):
|
||||
return self.m1.getLastNo(YMD)
|
||||
|
||||
def getNextNo(self, YMD):
|
||||
return self.m1.getNextNo(YMD)
|
||||
|
||||
def getYMD(self, no):
|
||||
return self.m1.getYMD(no)
|
||||
|
||||
def _prefixed(self, prefix: str, reasons: set) -> set:
|
||||
# keep stable, readable debug strings
|
||||
return {f"{prefix}{r}" for r in reasons}
|
||||
|
||||
def extract_final_candidates(self, ball, no=None, until_end: bool = False, df=None):
|
||||
"""
|
||||
OR-pass semantics:
|
||||
- If either model returns empty set -> PASS (return empty set)
|
||||
- Else -> FAIL (return union of reasons)
|
||||
"""
|
||||
r1 = self.m1.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
|
||||
if len(r1) == 0:
|
||||
return set()
|
||||
r2 = self.m2.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
|
||||
if len(r2) == 0:
|
||||
return set()
|
||||
# both failed
|
||||
return self._prefixed("m1:", set(r1)) | self._prefixed("m2:", set(r2))
|
||||
|
||||
def filter(self, ball, no, until_end: bool = False, df=None, filter_ball=None):
|
||||
"""
|
||||
Keep signature compatible with existing callers.
|
||||
- filter_ball is ignored here (callers typically pre-filter before calling .filter()).
|
||||
"""
|
||||
return self.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
|
||||
|
||||
1081
practice_0.py
1081
practice_0.py
File diff suppressed because it is too large
Load Diff
179
practice_1.py
179
practice_1.py
@@ -1,179 +0,0 @@
|
||||
# 웹 호출 라이브러리를 호출합니다.
|
||||
import time
|
||||
import requests
|
||||
from DataCrawler import DataCrawler
|
||||
|
||||
import json
|
||||
import os
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from datetime import datetime, timedelta
|
||||
from TelegramBot import TelegramBot
|
||||
|
||||
from filter_model_1 import BallFilter
|
||||
|
||||
class Practice:
|
||||
|
||||
bot = None
|
||||
preprocessor = None
|
||||
predictor = None
|
||||
|
||||
extract_count = None
|
||||
|
||||
def __init__(self, resources_path):
|
||||
self.bot = TelegramBot()
|
||||
|
||||
return
|
||||
|
||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
||||
def craw(self, lottoHistoryFile, drwNo=None):
|
||||
|
||||
ball = None
|
||||
if drwNo != None:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
||||
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
|
||||
if result['returnValue'] != 'success':
|
||||
return None
|
||||
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
else:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
||||
|
||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
||||
idx = 1
|
||||
while True:
|
||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
if result['returnValue'] != 'success':
|
||||
break
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
idx += 1
|
||||
time.sleep(0.5)
|
||||
# 저장한 파일을 종료합니다.
|
||||
jsonFp.close()
|
||||
textFp.close()
|
||||
|
||||
return ball
|
||||
|
||||
def predict1(self, result_json):
|
||||
result_json.append([6,7,10,11,20,45])
|
||||
return
|
||||
|
||||
def predict2(self, resources_path, ymd, result_json):
|
||||
|
||||
candidates = [i for i in range(1, 46)]
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
ballFilter = BallFilter(lottoHistoryFileName)
|
||||
no = ballFilter.getNextNo(ymd)
|
||||
print("회차: {}".format(no))
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
|
||||
nCr = list(itertools.combinations(candidates, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
ball = list(ball)
|
||||
|
||||
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if 0 < filter_size:
|
||||
continue
|
||||
|
||||
result_json.append(ball)
|
||||
|
||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
||||
p_no = p_ball[0]
|
||||
p_ball = p_ball[1:7]
|
||||
|
||||
return p_no, p_ball
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
today = datetime.today()
|
||||
if today.weekday() == 5:
|
||||
if today.hour > 20:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
elif today.weekday() == 6:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
|
||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
||||
ymd = this_weekend.strftime('%Y%m%d')
|
||||
|
||||
print("ymd: {}".format(ymd))
|
||||
|
||||
# 로또 예측
|
||||
practice = Practice(resources_path)
|
||||
|
||||
# 데이터 수집
|
||||
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
|
||||
lottoHistoryFileName = lottoHistoryFile + '.json'
|
||||
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if line != '\n':
|
||||
last_json = json.loads(line)
|
||||
|
||||
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
|
||||
|
||||
result_json = {ymd: []}
|
||||
|
||||
# 매주 고정
|
||||
practice.predict1(result_json[ymd])
|
||||
# 필터 기반 예측
|
||||
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
|
||||
|
||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
||||
for i, ball in enumerate(result_json[ymd]):
|
||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
||||
if (i+1) % 100 == 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
p_str = ''
|
||||
|
||||
if len(result_json[ymd]) % 100 != 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
|
||||
size = len(result_json[ymd])
|
||||
print("size: {}".format(size))
|
||||
|
||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
||||
|
||||
print("done...")
|
||||
179
practice_2.py
179
practice_2.py
@@ -1,179 +0,0 @@
|
||||
# 웹 호출 라이브러리를 호출합니다.
|
||||
import time
|
||||
import requests
|
||||
from DataCrawler import DataCrawler
|
||||
|
||||
import json
|
||||
import os
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from datetime import datetime, timedelta
|
||||
from TelegramBot import TelegramBot
|
||||
|
||||
from filter_model_2 import BallFilter
|
||||
|
||||
class Practice:
|
||||
|
||||
bot = None
|
||||
preprocessor = None
|
||||
predictor = None
|
||||
|
||||
extract_count = None
|
||||
|
||||
def __init__(self, resources_path):
|
||||
self.bot = TelegramBot()
|
||||
|
||||
return
|
||||
|
||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
||||
def craw(self, lottoHistoryFile, drwNo=None):
|
||||
|
||||
ball = None
|
||||
if drwNo != None:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
||||
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
|
||||
if result['returnValue'] != 'success':
|
||||
return None
|
||||
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
else:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
||||
|
||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
||||
idx = 1
|
||||
while True:
|
||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
if result['returnValue'] != 'success':
|
||||
break
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
idx += 1
|
||||
time.sleep(0.5)
|
||||
# 저장한 파일을 종료합니다.
|
||||
jsonFp.close()
|
||||
textFp.close()
|
||||
|
||||
return ball
|
||||
|
||||
def predict1(self, result_json):
|
||||
result_json.append([6,7,10,11,20,45])
|
||||
return
|
||||
|
||||
def predict2(self, resources_path, ymd, result_json):
|
||||
|
||||
candidates = [i for i in range(1, 46)]
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
ballFilter = BallFilter(lottoHistoryFileName)
|
||||
no = ballFilter.getNextNo(ymd)
|
||||
print("회차: {}".format(no))
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
|
||||
nCr = list(itertools.combinations(candidates, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
ball = list(ball)
|
||||
|
||||
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if 0 < filter_size:
|
||||
continue
|
||||
|
||||
result_json.append(ball)
|
||||
|
||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
||||
p_no = p_ball[0]
|
||||
p_ball = p_ball[1:7]
|
||||
|
||||
return p_no, p_ball
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
today = datetime.today()
|
||||
if today.weekday() == 5:
|
||||
if today.hour > 20:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
elif today.weekday() == 6:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
|
||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
||||
ymd = this_weekend.strftime('%Y%m%d')
|
||||
|
||||
print("ymd: {}".format(ymd))
|
||||
|
||||
# 로또 예측
|
||||
practice = Practice(resources_path)
|
||||
|
||||
# 데이터 수집
|
||||
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
|
||||
lottoHistoryFileName = lottoHistoryFile + '.json'
|
||||
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if line != '\n':
|
||||
last_json = json.loads(line)
|
||||
|
||||
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
|
||||
|
||||
result_json = {ymd: []}
|
||||
|
||||
# 매주 고정
|
||||
practice.predict1(result_json[ymd])
|
||||
# 필터 기반 예측
|
||||
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
|
||||
|
||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
||||
for i, ball in enumerate(result_json[ymd]):
|
||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
||||
if (i+1) % 100 == 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
p_str = ''
|
||||
|
||||
if len(result_json[ymd]) % 100 != 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
|
||||
size = len(result_json[ymd])
|
||||
print("size: {}".format(size))
|
||||
|
||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
||||
|
||||
print("done...")
|
||||
546
practice_3.py
546
practice_3.py
@@ -1,546 +0,0 @@
|
||||
# 웹 호출 라이브러리를 호출합니다.
|
||||
import time
|
||||
import requests
|
||||
from DataCrawler import DataCrawler
|
||||
|
||||
import json
|
||||
import os
|
||||
import copy
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from datetime import datetime, timedelta
|
||||
from TelegramBot import TelegramBot
|
||||
|
||||
from filter_model_3 import BallFilter
|
||||
|
||||
class Practice:
|
||||
|
||||
bot = None
|
||||
preprocessor = None
|
||||
predictor = None
|
||||
|
||||
extract_count = None
|
||||
TARGET_MIN_SURVIVORS = 30
|
||||
TARGET_MAX_SURVIVORS = 150
|
||||
PREDICT_TIMEOUT_SECONDS = 180
|
||||
|
||||
def __init__(self, resources_path):
|
||||
self.bot = TelegramBot()
|
||||
self.resources_path = resources_path
|
||||
|
||||
return
|
||||
|
||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
||||
def craw(self, lottoHistoryFile, drwNo=None):
|
||||
|
||||
ball = None
|
||||
if drwNo != None:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
||||
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
|
||||
if result['returnValue'] != 'success':
|
||||
return None
|
||||
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
else:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
||||
|
||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
||||
idx = 1
|
||||
while True:
|
||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
if result['returnValue'] != 'success':
|
||||
break
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
idx += 1
|
||||
time.sleep(0.5)
|
||||
# 저장한 파일을 종료합니다.
|
||||
jsonFp.close()
|
||||
textFp.close()
|
||||
|
||||
return ball
|
||||
|
||||
def predict1(self, result_json):
|
||||
result_json.append([6, 7, 10, 11, 20, 45])
|
||||
result_json.append([2, 7, 17, 28, 35, 39])
|
||||
result_json.append([6, 10, 19, 25, 33, 35])
|
||||
result_json.append([3, 17, 20, 24, 35, 45])
|
||||
result_json.append([5, 15, 18, 29, 36, 41])
|
||||
result_json.append([6, 15, 20, 23, 37, 43])
|
||||
result_json.append([8, 15, 19, 23, 38, 41])
|
||||
result_json.append([5, 11, 19, 24, 40, 45])
|
||||
result_json.append([9, 16, 18, 23, 35, 43])
|
||||
result_json.append([7, 13, 19, 28, 33, 44])
|
||||
result_json.append([7, 11, 18, 29, 37, 42])
|
||||
print("회차(predict1)")
|
||||
return
|
||||
|
||||
def predict2(self, resources_path, ymd, result_json):
|
||||
|
||||
candidates = [i for i in range(1, 46)]
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
ballFilter = BallFilter(lottoHistoryFileName)
|
||||
no = ballFilter.getNextNo(ymd)
|
||||
print("회차(predict2): {}".format(no))
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
|
||||
nCr = list(itertools.combinations(candidates, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
ball = list(ball)
|
||||
|
||||
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if 0 < filter_size:
|
||||
continue
|
||||
|
||||
result_json.append(ball)
|
||||
|
||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
||||
p_no = p_ball[0]
|
||||
p_ball = p_ball[1:7]
|
||||
|
||||
return p_no, p_ball
|
||||
|
||||
def predict3(self, resources_path, ymd, result_json):
|
||||
candidates = [i for i in range(1, 46)]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
no = BallFilter(lottoHistoryFileName).getNextNo(ymd)
|
||||
print("회차(predict3): {}".format(no))
|
||||
predict_start_ts = time.time()
|
||||
deadline_ts = predict_start_ts + self.PREDICT_TIMEOUT_SECONDS
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
||||
p_no = p_ball[0]
|
||||
p_ball = sorted(p_ball[1:7])
|
||||
|
||||
base_ruleset = self._get_base_ruleset()
|
||||
tighten_rulesets = [
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": True,
|
||||
"ban_triples_legacy": True,
|
||||
"all_in_previous7": True,
|
||||
"previous_neighbors": True,
|
||||
},
|
||||
allowed_overrides={
|
||||
"ac_value": [8, 9],
|
||||
"uniq_last_digit_count": [4, 5],
|
||||
"even_count": [2, 3, 4],
|
||||
},
|
||||
),
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": True,
|
||||
"ban_triples_legacy": True,
|
||||
"all_in_previous7": True,
|
||||
"previous_neighbors": True,
|
||||
},
|
||||
allowed_overrides={
|
||||
"ac_value": [8, 9],
|
||||
"uniq_last_digit_count": [4, 5],
|
||||
"even_count": [2, 3, 4],
|
||||
"sum": [112, 114, 121, 123, 126, 127, 131, 132, 138, 146, 148],
|
||||
"sum_prev_diff": [13, 14, 17, 18, 26, 28, 29, 30, 32, 39, 40],
|
||||
},
|
||||
),
|
||||
]
|
||||
relax_rulesets = [
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": False,
|
||||
"ban_triples_legacy": False,
|
||||
},
|
||||
),
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": False,
|
||||
"ban_triples_legacy": False,
|
||||
"previous_neighbors": False,
|
||||
"all_in_previous7": False,
|
||||
},
|
||||
),
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": False,
|
||||
"ban_triples_legacy": False,
|
||||
"previous_neighbors": False,
|
||||
"all_in_previous7": False,
|
||||
"weeks_8_count": False,
|
||||
"weeks_12_count": False,
|
||||
"weeks_16_count": False,
|
||||
"weeks_20_count": False,
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
min_survivors = self.TARGET_MIN_SURVIVORS
|
||||
max_survivors = self.TARGET_MAX_SURVIVORS
|
||||
chosen = []
|
||||
stage_name = "base"
|
||||
|
||||
current_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=base_ruleset,
|
||||
stop_when_gt=max_survivors,
|
||||
stage_name="base",
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
current = current_info["candidates"]
|
||||
if current_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(current, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "base_timeout_fallback"
|
||||
print("predict3 stage: {}, survivors: {}".format(stage_name, len(chosen)))
|
||||
for ball in chosen:
|
||||
result_json.append(ball)
|
||||
return p_no, p_ball
|
||||
|
||||
if min_survivors <= len(current) <= max_survivors:
|
||||
chosen = current
|
||||
elif len(current) > max_survivors:
|
||||
chosen = current
|
||||
stage_name = "base_overflow"
|
||||
for idx, rs in enumerate(tighten_rulesets, start=1):
|
||||
t_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=rs,
|
||||
stop_when_gt=max_survivors,
|
||||
stage_name="tighten_{}".format(idx),
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
t = t_info["candidates"]
|
||||
if t_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(t, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "tighten_{}_timeout_fallback".format(idx)
|
||||
break
|
||||
if min_survivors <= len(t) <= max_survivors:
|
||||
chosen = t
|
||||
stage_name = "tighten_{}".format(idx)
|
||||
break
|
||||
if len(t) <= max_survivors:
|
||||
chosen = t
|
||||
stage_name = "tighten_{}".format(idx)
|
||||
|
||||
if len(chosen) > max_survivors:
|
||||
full_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=tighten_rulesets[-1],
|
||||
stop_when_gt=None,
|
||||
stage_name="tighten_full_rank",
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
full_for_ranking = full_info["candidates"]
|
||||
if full_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(full_for_ranking, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "tighten_rank_timeout_fallback"
|
||||
else:
|
||||
chosen = self._rank_and_trim(full_for_ranking, p_ball, max_survivors)
|
||||
stage_name = "tighten_rank_trim"
|
||||
else:
|
||||
chosen = current
|
||||
stage_name = "base_underflow"
|
||||
for idx, rs in enumerate(relax_rulesets, start=1):
|
||||
r_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=rs,
|
||||
stop_when_gt=None,
|
||||
stop_when_gte=min_survivors,
|
||||
stage_name="relax_{}".format(idx),
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
r = r_info["candidates"]
|
||||
chosen = r
|
||||
stage_name = "relax_{}".format(idx)
|
||||
if r_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(r, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "relax_{}_timeout_fallback".format(idx)
|
||||
break
|
||||
if len(r) >= min_survivors:
|
||||
break
|
||||
|
||||
if len(chosen) == 0:
|
||||
stage_name = "relax_zero_fallback"
|
||||
chosen = self._fallback_candidates_from_prev(p_ball, min_survivors)
|
||||
elif len(chosen) < min_survivors:
|
||||
stage_name = "{}_fill".format(stage_name)
|
||||
fill = self._fallback_candidates_from_prev(
|
||||
p_ball,
|
||||
min_survivors - len(chosen),
|
||||
exclude=set(tuple(x) for x in chosen),
|
||||
)
|
||||
chosen.extend(fill)
|
||||
|
||||
print("predict3 stage: {}, survivors: {}".format(stage_name, len(chosen)))
|
||||
for ball in chosen:
|
||||
result_json.append(ball)
|
||||
return p_no, p_ball
|
||||
|
||||
def _get_base_ruleset(self):
|
||||
history_json = os.path.join(self.resources_path, "lotto_history.json")
|
||||
base_filter = BallFilter(history_json)
|
||||
return copy.deepcopy(base_filter.m1.ruleset)
|
||||
|
||||
def _build_ruleset(self, base_ruleset, enabled_overrides=None, allowed_overrides=None):
|
||||
ruleset = copy.deepcopy(base_ruleset)
|
||||
ruleset.setdefault("filters", {})
|
||||
enabled_overrides = enabled_overrides or {}
|
||||
allowed_overrides = allowed_overrides or {}
|
||||
for key, value in enabled_overrides.items():
|
||||
ruleset["filters"].setdefault(key, {})
|
||||
ruleset["filters"][key]["enabled"] = bool(value)
|
||||
for key, values in allowed_overrides.items():
|
||||
ruleset["filters"].setdefault(key, {})
|
||||
ruleset["filters"][key]["enabled"] = True
|
||||
ruleset["filters"][key]["allowed"] = list(values)
|
||||
return ruleset
|
||||
|
||||
def _collect_candidates(
|
||||
self,
|
||||
candidates,
|
||||
no,
|
||||
df_ball,
|
||||
ruleset,
|
||||
stop_when_gt=None,
|
||||
stop_when_gte=None,
|
||||
stage_name="base",
|
||||
predict_start_ts=None,
|
||||
deadline_ts=None,
|
||||
):
|
||||
lottoHistoryFileName = os.path.join(self.resources_path, "lotto_history.json")
|
||||
ballFilter = BallFilter(lottoHistoryFileName, ruleset=ruleset)
|
||||
result = []
|
||||
last_idx = 0
|
||||
for idx, ball in enumerate(itertools.combinations(candidates, 6), start=1):
|
||||
last_idx = idx
|
||||
if deadline_ts is not None and deadline_ts <= time.time():
|
||||
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
|
||||
print(" - [{}] timeout after {:,} processed (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
|
||||
return {"candidates": result, "timed_out": True, "processed": idx}
|
||||
if idx % 1000000 == 0:
|
||||
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
|
||||
print(" - [{}] {:,} processed... (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
|
||||
b = list(ball)
|
||||
if len(ballFilter.filter(ball=b, no=no, until_end=False, df=df_ball)) == 0:
|
||||
result.append(b)
|
||||
if stop_when_gt is not None and len(result) > stop_when_gt:
|
||||
return {"candidates": result, "timed_out": False, "processed": idx}
|
||||
if stop_when_gte is not None and len(result) >= stop_when_gte:
|
||||
return {"candidates": result, "timed_out": False, "processed": idx}
|
||||
return {"candidates": result, "timed_out": False, "processed": last_idx}
|
||||
|
||||
def _finalize_on_timeout(self, partial_candidates, prev_ball, min_survivors, max_survivors):
|
||||
chosen = list(partial_candidates)
|
||||
if len(chosen) > max_survivors:
|
||||
chosen = self._rank_and_trim(chosen, prev_ball, max_survivors)
|
||||
elif len(chosen) < min_survivors:
|
||||
fill = self._fallback_candidates_from_prev(
|
||||
prev_ball,
|
||||
min_survivors - len(chosen),
|
||||
exclude=set(tuple(x) for x in chosen),
|
||||
)
|
||||
chosen.extend(fill)
|
||||
return chosen
|
||||
|
||||
def _rank_and_trim(self, candidates, prev_ball, limit):
|
||||
scored = [(self._score_candidate(ball, prev_ball), ball) for ball in candidates]
|
||||
scored.sort(key=lambda x: x[0])
|
||||
return [ball for _, ball in scored[:limit]]
|
||||
|
||||
def _score_candidate(self, ball, prev_ball):
|
||||
sum_diff = abs(sum(ball) - sum(prev_ball))
|
||||
even_cnt = len([x for x in ball if x % 2 == 0])
|
||||
uniq_last = len(set([x % 10 for x in ball]))
|
||||
contiguous_penalty = 0
|
||||
s = sorted(ball)
|
||||
for i in range(1, len(s)):
|
||||
if s[i] - s[i - 1] == 1:
|
||||
contiguous_penalty += 1
|
||||
score = 0
|
||||
score += sum_diff
|
||||
score += abs(even_cnt - 3) * 2
|
||||
score += abs(uniq_last - 5) * 2
|
||||
score += contiguous_penalty
|
||||
return score
|
||||
|
||||
def _fallback_candidates_from_prev(self, prev_ball, need_count, exclude=None):
|
||||
exclude = exclude or set()
|
||||
seed = sorted(prev_ball)
|
||||
out = []
|
||||
delta_patterns = [
|
||||
(0, 0, 0, 0, 0, 0),
|
||||
(-1, 0, 0, 0, 0, 1),
|
||||
(0, -1, 0, 0, 1, 0),
|
||||
(0, 0, -1, 1, 0, 0),
|
||||
(-2, 0, 0, 0, 0, 2),
|
||||
(0, -2, 0, 0, 2, 0),
|
||||
(0, 0, -2, 2, 0, 0),
|
||||
(-1, -1, 0, 0, 1, 1),
|
||||
(1, 0, -1, 0, 0, 0),
|
||||
(0, 1, 0, -1, 0, 0),
|
||||
(1, -1, 1, -1, 1, -1),
|
||||
(-1, 1, -1, 1, -1, 1),
|
||||
]
|
||||
shift = 0
|
||||
while len(out) < need_count and shift <= 8:
|
||||
for delta in delta_patterns:
|
||||
cand = [seed[i] + delta[i] for i in range(6)]
|
||||
cand = [min(45, max(1, v + shift)) for v in cand]
|
||||
cand = sorted(cand)
|
||||
if len(set(cand)) != 6:
|
||||
continue
|
||||
t = tuple(cand)
|
||||
if t in exclude:
|
||||
continue
|
||||
exclude.add(t)
|
||||
out.append(cand)
|
||||
if len(out) >= need_count:
|
||||
break
|
||||
shift += 1
|
||||
return out
|
||||
|
||||
def _merge_unique_balls(self, base_balls, extra_balls):
|
||||
seen = set(tuple(sorted(x)) for x in base_balls)
|
||||
for ball in extra_balls:
|
||||
key = tuple(sorted(ball))
|
||||
if key not in seen:
|
||||
base_balls.append(list(ball))
|
||||
seen.add(key)
|
||||
return base_balls
|
||||
|
||||
def _sorted_unique_balls(self, balls):
|
||||
"""
|
||||
Normalize (sort within ball), de-duplicate, then sort lexicographically.
|
||||
Returns List[List[int]].
|
||||
"""
|
||||
uniq = {}
|
||||
for b in balls:
|
||||
key = tuple(sorted(b))
|
||||
uniq[key] = list(key)
|
||||
return [list(t) for t in sorted(uniq.keys())]
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
today = datetime.today()
|
||||
if today.weekday() == 5:
|
||||
if today.hour > 20:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
elif today.weekday() == 6:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
|
||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
||||
ymd = this_weekend.strftime('%Y%m%d')
|
||||
|
||||
print("ymd: {}".format(ymd))
|
||||
|
||||
# 로또 예측
|
||||
practice = Practice(resources_path)
|
||||
|
||||
# 데이터 수집
|
||||
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
|
||||
lottoHistoryFileName = lottoHistoryFile + '.json'
|
||||
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if line != '\n':
|
||||
last_json = json.loads(line)
|
||||
|
||||
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
|
||||
|
||||
result_json = {ymd: []}
|
||||
|
||||
# 매주 고정
|
||||
practice.predict1(result_json[ymd])
|
||||
# 필터 기반 예측(기존/신규): 결과는 합친 후 정렬해서 predict1 결과 뒤에 붙인다.
|
||||
predict2_json = []
|
||||
p_no, p_ball = practice.predict2(resources_path, ymd, predict2_json)
|
||||
|
||||
predict3_json = []
|
||||
p_no3, p_ball3 = practice.predict3(resources_path, ymd, predict3_json)
|
||||
|
||||
merged_predict = []
|
||||
practice._merge_unique_balls(merged_predict, predict2_json)
|
||||
practice._merge_unique_balls(merged_predict, predict3_json)
|
||||
merged_predict = practice._sorted_unique_balls(merged_predict)
|
||||
|
||||
# predict1 결과에 merged_predict를 정렬된 순서로 append(중복 제거)
|
||||
practice._merge_unique_balls(result_json[ymd], merged_predict)
|
||||
if p_no3 == p_no:
|
||||
p_ball = p_ball3
|
||||
|
||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
||||
for i, ball in enumerate(result_json[ymd]):
|
||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
||||
if (i+1) % 100 == 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
p_str = ''
|
||||
|
||||
if len(result_json[ymd]) % 100 != 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
|
||||
size = len(result_json[ymd])
|
||||
print("size: {}".format(size))
|
||||
|
||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
||||
|
||||
print("done...")
|
||||
@@ -1,216 +0,0 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_3 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path):
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, filter_ball=None):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
for i in range(len(df_ball)-1, 19, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
resources_path = 'resources'
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path)
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, filter_ball)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
@@ -1,490 +0,0 @@
|
||||
# 웹 호출 라이브러리를 호출합니다.
|
||||
import time
|
||||
import requests
|
||||
|
||||
import json
|
||||
import os
|
||||
import copy
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from datetime import datetime, timedelta
|
||||
from TelegramBot import TelegramBot
|
||||
|
||||
from filter_model_3 import BallFilter
|
||||
|
||||
class Practice:
|
||||
|
||||
bot = None
|
||||
preprocessor = None
|
||||
predictor = None
|
||||
|
||||
extract_count = None
|
||||
TARGET_MIN_SURVIVORS = 30
|
||||
TARGET_MAX_SURVIVORS = 150
|
||||
PREDICT_TIMEOUT_SECONDS = 180
|
||||
|
||||
def __init__(self, resources_path):
|
||||
self.bot = TelegramBot()
|
||||
self.resources_path = resources_path
|
||||
|
||||
return
|
||||
|
||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
||||
def craw(self, lottoHistoryFile, drwNo=None):
|
||||
|
||||
ball = None
|
||||
if drwNo != None:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
||||
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
|
||||
if result['returnValue'] != 'success':
|
||||
return None
|
||||
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
else:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
||||
|
||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
||||
idx = 1
|
||||
while True:
|
||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
if result['returnValue'] != 'success':
|
||||
break
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
idx += 1
|
||||
time.sleep(0.5)
|
||||
# 저장한 파일을 종료합니다.
|
||||
jsonFp.close()
|
||||
textFp.close()
|
||||
|
||||
return ball
|
||||
|
||||
def predict1(self, result_json):
|
||||
result_json.append([6, 7, 10, 11, 20, 45])
|
||||
result_json.append([2, 7, 17, 28, 35, 39])
|
||||
result_json.append([6, 10, 19, 25, 33, 35])
|
||||
result_json.append([3, 17, 20, 24, 35, 45])
|
||||
result_json.append([5, 15, 18, 29, 36, 41])
|
||||
result_json.append([6, 15, 20, 23, 37, 43])
|
||||
result_json.append([8, 15, 19, 23, 38, 41])
|
||||
result_json.append([5, 11, 19, 24, 40, 45])
|
||||
result_json.append([9, 16, 18, 23, 35, 43])
|
||||
result_json.append([7, 13, 19, 28, 33, 44])
|
||||
result_json.append([7, 11, 18, 29, 37, 42])
|
||||
return
|
||||
|
||||
def predict2(self, resources_path, ymd, result_json):
|
||||
candidates = [i for i in range(1, 46)]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
no = BallFilter(lottoHistoryFileName).getNextNo(ymd)
|
||||
print("회차: {}".format(no))
|
||||
predict_start_ts = time.time()
|
||||
deadline_ts = predict_start_ts + self.PREDICT_TIMEOUT_SECONDS
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
||||
p_no = p_ball[0]
|
||||
p_ball = sorted(p_ball[1:7])
|
||||
|
||||
# 기본/강화/완화 단계별 ruleset
|
||||
base_ruleset = self._get_base_ruleset()
|
||||
tighten_rulesets = [
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": True,
|
||||
"ban_triples_legacy": True,
|
||||
"all_in_previous7": True,
|
||||
"previous_neighbors": True,
|
||||
},
|
||||
allowed_overrides={
|
||||
"ac_value": [8, 9],
|
||||
"uniq_last_digit_count": [4, 5],
|
||||
"even_count": [2, 3, 4],
|
||||
},
|
||||
),
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": True,
|
||||
"ban_triples_legacy": True,
|
||||
"all_in_previous7": True,
|
||||
"previous_neighbors": True,
|
||||
},
|
||||
allowed_overrides={
|
||||
"ac_value": [8, 9],
|
||||
"uniq_last_digit_count": [4, 5],
|
||||
"even_count": [2, 3, 4],
|
||||
"sum": [112, 114, 121, 123, 126, 127, 131, 132, 138, 146, 148],
|
||||
"sum_prev_diff": [13, 14, 17, 18, 26, 28, 29, 30, 32, 39, 40],
|
||||
},
|
||||
),
|
||||
]
|
||||
relax_rulesets = [
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": False,
|
||||
"ban_triples_legacy": False,
|
||||
},
|
||||
),
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": False,
|
||||
"ban_triples_legacy": False,
|
||||
"previous_neighbors": False,
|
||||
"all_in_previous7": False,
|
||||
},
|
||||
),
|
||||
self._build_ruleset(
|
||||
base_ruleset=base_ruleset,
|
||||
enabled_overrides={
|
||||
"paper_patterns": False,
|
||||
"ban_triples_legacy": False,
|
||||
"previous_neighbors": False,
|
||||
"all_in_previous7": False,
|
||||
"weeks_8_count": False,
|
||||
"weeks_12_count": False,
|
||||
"weeks_16_count": False,
|
||||
"weeks_20_count": False,
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
min_survivors = self.TARGET_MIN_SURVIVORS
|
||||
max_survivors = self.TARGET_MAX_SURVIVORS
|
||||
chosen = []
|
||||
stage_name = "base"
|
||||
|
||||
current_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=base_ruleset,
|
||||
stop_when_gt=max_survivors,
|
||||
stage_name="base",
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
current = current_info["candidates"]
|
||||
if current_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(current, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "base_timeout_fallback"
|
||||
print("candidate_stage: {}, survivors: {}".format(stage_name, len(chosen)))
|
||||
for ball in chosen:
|
||||
result_json.append(ball)
|
||||
return p_no, p_ball
|
||||
|
||||
if min_survivors <= len(current) <= max_survivors:
|
||||
chosen = current
|
||||
elif len(current) > max_survivors:
|
||||
chosen = current
|
||||
stage_name = "base_overflow"
|
||||
for idx, rs in enumerate(tighten_rulesets, start=1):
|
||||
t_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=rs,
|
||||
stop_when_gt=max_survivors,
|
||||
stage_name="tighten_{}".format(idx),
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
t = t_info["candidates"]
|
||||
if t_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(t, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "tighten_{}_timeout_fallback".format(idx)
|
||||
break
|
||||
if min_survivors <= len(t) <= max_survivors:
|
||||
chosen = t
|
||||
stage_name = "tighten_{}".format(idx)
|
||||
break
|
||||
if len(t) <= max_survivors:
|
||||
chosen = t
|
||||
stage_name = "tighten_{}".format(idx)
|
||||
if len(chosen) > max_survivors:
|
||||
# 상한 가드 강제 적용: 품질 점수 상위 N개만 사용
|
||||
full_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=tighten_rulesets[-1],
|
||||
stop_when_gt=None,
|
||||
stage_name="tighten_full_rank",
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
full_for_ranking = full_info["candidates"]
|
||||
if full_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(full_for_ranking, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "tighten_rank_timeout_fallback"
|
||||
else:
|
||||
chosen = self._rank_and_trim(full_for_ranking, p_ball, max_survivors)
|
||||
stage_name = "tighten_rank_trim"
|
||||
else:
|
||||
chosen = current
|
||||
stage_name = "base_underflow"
|
||||
for idx, rs in enumerate(relax_rulesets, start=1):
|
||||
# relax는 하한(min_survivors)만 채우면 충분하므로 조기 종료
|
||||
r_info = self._collect_candidates(
|
||||
candidates=candidates,
|
||||
no=no,
|
||||
df_ball=df_ball,
|
||||
ruleset=rs,
|
||||
stop_when_gt=None,
|
||||
stop_when_gte=min_survivors,
|
||||
stage_name="relax_{}".format(idx),
|
||||
predict_start_ts=predict_start_ts,
|
||||
deadline_ts=deadline_ts,
|
||||
)
|
||||
r = r_info["candidates"]
|
||||
chosen = r
|
||||
stage_name = "relax_{}".format(idx)
|
||||
if r_info["timed_out"]:
|
||||
chosen = self._finalize_on_timeout(r, p_ball, min_survivors, max_survivors)
|
||||
stage_name = "relax_{}_timeout_fallback".format(idx)
|
||||
break
|
||||
if len(r) >= min_survivors:
|
||||
break
|
||||
|
||||
if len(chosen) == 0:
|
||||
# 0개 생존 방지: 가장 완화된 규칙에서도 0개면 직전 결과와 유사한 조합으로 최소 개수 확보
|
||||
stage_name = "relax_zero_fallback"
|
||||
chosen = self._fallback_candidates_from_prev(p_ball, min_survivors)
|
||||
elif len(chosen) < min_survivors:
|
||||
# 하한 가드: 부족분은 완화 후보/고정 후보 기반으로 보강
|
||||
stage_name = "{}_fill".format(stage_name)
|
||||
fill = self._fallback_candidates_from_prev(p_ball, min_survivors - len(chosen), exclude=set(tuple(x) for x in chosen))
|
||||
chosen.extend(fill)
|
||||
|
||||
print("candidate_stage: {}, survivors: {}".format(stage_name, len(chosen)))
|
||||
for ball in chosen:
|
||||
result_json.append(ball)
|
||||
return p_no, p_ball
|
||||
|
||||
def _get_base_ruleset(self):
|
||||
history_json = os.path.join(self.resources_path, "lotto_history.json")
|
||||
base_filter = BallFilter(history_json)
|
||||
return copy.deepcopy(base_filter.m1.ruleset)
|
||||
|
||||
def _build_ruleset(self, base_ruleset, enabled_overrides=None, allowed_overrides=None):
|
||||
ruleset = copy.deepcopy(base_ruleset)
|
||||
ruleset.setdefault("filters", {})
|
||||
enabled_overrides = enabled_overrides or {}
|
||||
allowed_overrides = allowed_overrides or {}
|
||||
for key, value in enabled_overrides.items():
|
||||
ruleset["filters"].setdefault(key, {})
|
||||
ruleset["filters"][key]["enabled"] = bool(value)
|
||||
for key, values in allowed_overrides.items():
|
||||
ruleset["filters"].setdefault(key, {})
|
||||
ruleset["filters"][key]["enabled"] = True
|
||||
ruleset["filters"][key]["allowed"] = list(values)
|
||||
return ruleset
|
||||
|
||||
def _collect_candidates(
|
||||
self,
|
||||
candidates,
|
||||
no,
|
||||
df_ball,
|
||||
ruleset,
|
||||
stop_when_gt=None,
|
||||
stop_when_gte=None,
|
||||
stage_name="base",
|
||||
predict_start_ts=None,
|
||||
deadline_ts=None,
|
||||
):
|
||||
lottoHistoryFileName = os.path.join(self.resources_path, "lotto_history.json")
|
||||
ballFilter = BallFilter(lottoHistoryFileName, ruleset=ruleset)
|
||||
result = []
|
||||
last_idx = 0
|
||||
for idx, ball in enumerate(itertools.combinations(candidates, 6), start=1):
|
||||
last_idx = idx
|
||||
if deadline_ts is not None and deadline_ts <= time.time():
|
||||
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
|
||||
print(" - [{}] timeout after {:,} processed (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
|
||||
return {
|
||||
"candidates": result,
|
||||
"timed_out": True,
|
||||
"processed": idx,
|
||||
}
|
||||
if idx % 1000000 == 0:
|
||||
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
|
||||
print(" - [{}] {:,} processed... (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
|
||||
b = list(ball)
|
||||
if len(ballFilter.filter(ball=b, no=no, until_end=False, df=df_ball)) == 0:
|
||||
result.append(b)
|
||||
if stop_when_gt is not None and len(result) > stop_when_gt:
|
||||
return {
|
||||
"candidates": result,
|
||||
"timed_out": False,
|
||||
"processed": idx,
|
||||
}
|
||||
if stop_when_gte is not None and len(result) >= stop_when_gte:
|
||||
return {
|
||||
"candidates": result,
|
||||
"timed_out": False,
|
||||
"processed": idx,
|
||||
}
|
||||
return {
|
||||
"candidates": result,
|
||||
"timed_out": False,
|
||||
"processed": last_idx,
|
||||
}
|
||||
|
||||
def _finalize_on_timeout(self, partial_candidates, prev_ball, min_survivors, max_survivors):
|
||||
chosen = list(partial_candidates)
|
||||
if len(chosen) > max_survivors:
|
||||
chosen = self._rank_and_trim(chosen, prev_ball, max_survivors)
|
||||
elif len(chosen) < min_survivors:
|
||||
fill = self._fallback_candidates_from_prev(
|
||||
prev_ball,
|
||||
min_survivors - len(chosen),
|
||||
exclude=set(tuple(x) for x in chosen),
|
||||
)
|
||||
chosen.extend(fill)
|
||||
return chosen
|
||||
|
||||
def _rank_and_trim(self, candidates, prev_ball, limit):
|
||||
scored = [(self._score_candidate(ball, prev_ball), ball) for ball in candidates]
|
||||
scored.sort(key=lambda x: x[0])
|
||||
return [ball for _, ball in scored[:limit]]
|
||||
|
||||
def _score_candidate(self, ball, prev_ball):
|
||||
sum_diff = abs(sum(ball) - sum(prev_ball))
|
||||
even_cnt = len([x for x in ball if x % 2 == 0])
|
||||
uniq_last = len(set([x % 10 for x in ball]))
|
||||
contiguous_penalty = 0
|
||||
s = sorted(ball)
|
||||
for i in range(1, len(s)):
|
||||
if s[i] - s[i - 1] == 1:
|
||||
contiguous_penalty += 1
|
||||
score = 0
|
||||
score += sum_diff
|
||||
score += abs(even_cnt - 3) * 2
|
||||
score += abs(uniq_last - 5) * 2
|
||||
score += contiguous_penalty
|
||||
return score
|
||||
|
||||
def _fallback_candidates_from_prev(self, prev_ball, need_count, exclude=None):
|
||||
exclude = exclude or set()
|
||||
seed = sorted(prev_ball)
|
||||
out = []
|
||||
delta_patterns = [
|
||||
(0, 0, 0, 0, 0, 0),
|
||||
(-1, 0, 0, 0, 0, 1),
|
||||
(0, -1, 0, 0, 1, 0),
|
||||
(0, 0, -1, 1, 0, 0),
|
||||
(-2, 0, 0, 0, 0, 2),
|
||||
(0, -2, 0, 0, 2, 0),
|
||||
(0, 0, -2, 2, 0, 0),
|
||||
(-1, -1, 0, 0, 1, 1),
|
||||
(1, 0, -1, 0, 0, 0),
|
||||
(0, 1, 0, -1, 0, 0),
|
||||
(1, -1, 1, -1, 1, -1),
|
||||
(-1, 1, -1, 1, -1, 1),
|
||||
]
|
||||
shift = 0
|
||||
while len(out) < need_count and shift <= 8:
|
||||
for delta in delta_patterns:
|
||||
cand = [seed[i] + delta[i] for i in range(6)]
|
||||
cand = [min(45, max(1, v + shift)) for v in cand]
|
||||
cand = sorted(cand)
|
||||
if len(set(cand)) != 6:
|
||||
continue
|
||||
t = tuple(cand)
|
||||
if t in exclude:
|
||||
continue
|
||||
exclude.add(t)
|
||||
out.append(cand)
|
||||
if len(out) >= need_count:
|
||||
break
|
||||
shift += 1
|
||||
return out
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
today = datetime.today()
|
||||
if today.weekday() == 5:
|
||||
if today.hour > 20:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
elif today.weekday() == 6:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
|
||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
||||
ymd = this_weekend.strftime('%Y%m%d')
|
||||
|
||||
print("ymd: {}".format(ymd))
|
||||
|
||||
# 로또 예측
|
||||
practice = Practice(resources_path)
|
||||
|
||||
# 데이터 수집
|
||||
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
|
||||
lottoHistoryFileName = lottoHistoryFile + '.json'
|
||||
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if line != '\n':
|
||||
last_json = json.loads(line)
|
||||
|
||||
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
|
||||
|
||||
result_json = {ymd: []}
|
||||
|
||||
# 매주 고정
|
||||
practice.predict1(result_json[ymd])
|
||||
# 필터 기반 예측
|
||||
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
|
||||
|
||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
||||
for i, ball in enumerate(result_json[ymd]):
|
||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
||||
if (i+1) % 100 == 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
p_str = ''
|
||||
|
||||
if len(result_json[ymd]) % 100 != 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
|
||||
size = len(result_json[ymd])
|
||||
print("size: {}".format(size))
|
||||
|
||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
||||
|
||||
print("done...")
|
||||
@@ -1,189 +0,0 @@
|
||||
# 웹 호출 라이브러리를 호출합니다.
|
||||
import time
|
||||
import requests
|
||||
from DataCrawler import DataCrawler
|
||||
|
||||
import json
|
||||
import os
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from datetime import datetime, timedelta
|
||||
from TelegramBot import TelegramBot
|
||||
|
||||
from filter_model_3 import BallFilter
|
||||
|
||||
class Practice:
|
||||
|
||||
bot = None
|
||||
preprocessor = None
|
||||
predictor = None
|
||||
|
||||
extract_count = None
|
||||
|
||||
def __init__(self, resources_path):
|
||||
self.bot = TelegramBot()
|
||||
|
||||
return
|
||||
|
||||
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
|
||||
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
|
||||
def craw(self, lottoHistoryFile, drwNo=None):
|
||||
|
||||
ball = None
|
||||
if drwNo != None:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
|
||||
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
|
||||
if result['returnValue'] != 'success':
|
||||
return None
|
||||
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
else:
|
||||
# 로또 데이터를 저장할 파일을 선언합니다.
|
||||
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
|
||||
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
|
||||
|
||||
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
|
||||
idx = 1
|
||||
while True:
|
||||
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
|
||||
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
|
||||
# URL을 호출합니다.
|
||||
res = requests.post(url)
|
||||
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
|
||||
result = res.json()
|
||||
if result['returnValue'] != 'success':
|
||||
break
|
||||
# 가져온 Json 포맷을 파일로 저장합니다.
|
||||
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
|
||||
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
|
||||
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
|
||||
idx += 1
|
||||
time.sleep(0.5)
|
||||
# 저장한 파일을 종료합니다.
|
||||
jsonFp.close()
|
||||
textFp.close()
|
||||
|
||||
return ball
|
||||
|
||||
def predict1(self, result_json):
|
||||
result_json.append([6, 7, 10, 11, 20, 45])
|
||||
result_json.append([2, 7, 17, 28, 35, 39])
|
||||
result_json.append([6, 10, 19, 25, 33, 35])
|
||||
result_json.append([3, 17, 20, 24, 35, 45])
|
||||
result_json.append([5, 15, 18, 29, 36, 41])
|
||||
result_json.append([6, 15, 20, 23, 37, 43])
|
||||
result_json.append([8, 15, 19, 23, 38, 41])
|
||||
result_json.append([5, 11, 19, 24, 40, 45])
|
||||
result_json.append([9, 16, 18, 23, 35, 43])
|
||||
result_json.append([7, 13, 19, 28, 33, 44])
|
||||
result_json.append([7, 11, 18, 29, 37, 42])
|
||||
return
|
||||
|
||||
def predict2(self, resources_path, ymd, result_json):
|
||||
|
||||
candidates = [i for i in range(1, 46)]
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
ballFilter = BallFilter(lottoHistoryFileName)
|
||||
no = ballFilter.getNextNo(ymd)
|
||||
print("회차: {}".format(no))
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
|
||||
nCr = list(itertools.combinations(candidates, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
ball = list(ball)
|
||||
|
||||
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if 0 < filter_size:
|
||||
continue
|
||||
|
||||
result_json.append(ball)
|
||||
|
||||
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
|
||||
p_no = p_ball[0]
|
||||
p_ball = p_ball[1:7]
|
||||
|
||||
return p_no, p_ball
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
today = datetime.today()
|
||||
if today.weekday() == 5:
|
||||
if today.hour > 20:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
elif today.weekday() == 6:
|
||||
this_weekend = today + timedelta(days=(12 - today.weekday()))
|
||||
else:
|
||||
this_weekend = today + timedelta(days=(5 - today.weekday()))
|
||||
|
||||
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
|
||||
ymd = this_weekend.strftime('%Y%m%d')
|
||||
|
||||
print("ymd: {}".format(ymd))
|
||||
|
||||
# 로또 예측
|
||||
practice = Practice(resources_path)
|
||||
|
||||
# 데이터 수집
|
||||
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
|
||||
lottoHistoryFileName = lottoHistoryFile + '.json'
|
||||
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if line != '\n':
|
||||
last_json = json.loads(line)
|
||||
|
||||
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
|
||||
|
||||
result_json = {ymd: []}
|
||||
|
||||
# 매주 고정
|
||||
practice.predict1(result_json[ymd])
|
||||
# 필터 기반 예측
|
||||
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
|
||||
|
||||
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
|
||||
for i, ball in enumerate(result_json[ymd]):
|
||||
p_str += " {}. {}\n".format((i+1), str(ball))
|
||||
if (i+1) % 100 == 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
p_str = ''
|
||||
|
||||
if len(result_json[ymd]) % 100 != 0:
|
||||
practice.bot.sendMsg("{}".format(p_str))
|
||||
|
||||
size = len(result_json[ymd])
|
||||
print("size: {}".format(size))
|
||||
|
||||
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
|
||||
# https://www.youtube.com/watch?v=YwiHaa1KNwA
|
||||
|
||||
print("done...")
|
||||
@@ -1216,3 +1216,4 @@
|
||||
{"returnValue": "success", "drwNoDate": "2026-03-21", "drwNo": 1216, "drwtNo1": 3, "drwtNo2": 10, "drwtNo3": 14, "drwtNo4": 15, "drwtNo5": 23, "drwtNo6": 24, "bnusNo": 25}
|
||||
{"returnValue": "success", "drwNoDate": "2026-03-28", "drwNo": 1217, "drwtNo1": 8, "drwtNo2": 10, "drwtNo3": 15, "drwtNo4": 20, "drwtNo5": 29, "drwtNo6": 31, "bnusNo": 41}
|
||||
{"returnValue": "success", "drwNoDate": "2026-04-04", "drwNo": 1218, "drwtNo1": 3, "drwtNo2": 28, "drwtNo3": 31, "drwtNo4": 32, "drwtNo5": 42, "drwtNo6": 45, "bnusNo": 25}
|
||||
{"returnValue": "success", "drwNoDate": "2026-04-11", "drwNo": 1219, "drwtNo1": 1, "drwtNo2": 2, "drwtNo3": 15, "drwtNo4": 28, "drwtNo5": 39, "drwtNo6": 45, "bnusNo": 31}
|
||||
|
||||
@@ -1216,3 +1216,4 @@
|
||||
1216,3,10,14,15,23,24,25
|
||||
1217,8,10,15,20,29,31,41
|
||||
1218,3,28,31,32,42,45,25
|
||||
1219,1,2,15,28,39,45,31
|
||||
|
||||
99
review_1.py
99
review_1.py
@@ -1,99 +0,0 @@
|
||||
import os
|
||||
import time
|
||||
import datetime
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_1 import BallFilter
|
||||
|
||||
class FilterTestReview:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path):
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName)
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
|
||||
win_history = {}
|
||||
win_history_size = {}
|
||||
|
||||
for no in nos:
|
||||
|
||||
print("[{} 회차]".format(no))
|
||||
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = balls[1:7].copy() # copy()로 복사
|
||||
bonus = balls[7]
|
||||
|
||||
final_candidates = []
|
||||
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
ball = list(ball)
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if 0 < filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
match = len(set(ball) & set(answer))
|
||||
if match == 6:
|
||||
if no not in win_history: # 중복 방지
|
||||
win_history[no] = answer.copy() # copy()로 복사
|
||||
if ball not in win_dic[1]: # 같은 조합 중복 방지
|
||||
win_dic[1].append(ball.copy()) # copy()로 복사
|
||||
|
||||
else:
|
||||
if match == 3:
|
||||
win_dic[5].append(ball)
|
||||
elif match == 4:
|
||||
win_dic[4].append(ball)
|
||||
elif match == 5:
|
||||
# 2등 판별: 5개 맞고 보너스 번호 포함
|
||||
if bonus in ball:
|
||||
win_dic[2].append(ball)
|
||||
else:
|
||||
win_dic[3].append(ball)
|
||||
|
||||
win_history_size[no] = len(final_candidates)
|
||||
|
||||
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
|
||||
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
|
||||
|
||||
return win_history, win_history_size
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filterTestReview = FilterTestReview(resources_path)
|
||||
|
||||
start = time.time()
|
||||
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
|
||||
win_history, win_history_size = filterTestReview.validate(
|
||||
df_ball,
|
||||
#nos=range(1126, 21, -1),
|
||||
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print("{} 회 당첨".format(len(win_history)))
|
||||
sorted_win_history = sorted(win_history.keys())
|
||||
for i in range(len(sorted_win_history)):
|
||||
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))
|
||||
99
review_2.py
99
review_2.py
@@ -1,99 +0,0 @@
|
||||
import os
|
||||
import time
|
||||
import datetime
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_2 import BallFilter
|
||||
|
||||
class FilterTestReview:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path):
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName)
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
|
||||
win_history = {}
|
||||
win_history_size = {}
|
||||
|
||||
for no in nos:
|
||||
|
||||
print("[{} 회차]".format(no))
|
||||
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = balls[1:7].copy() # copy()로 복사
|
||||
bonus = balls[7]
|
||||
|
||||
final_candidates = []
|
||||
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
ball = list(ball)
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if 0 < filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
match = len(set(ball) & set(answer))
|
||||
if match == 6:
|
||||
if no not in win_history: # 중복 방지
|
||||
win_history[no] = answer.copy() # copy()로 복사
|
||||
if ball not in win_dic[1]: # 같은 조합 중복 방지
|
||||
win_dic[1].append(ball.copy()) # copy()로 복사
|
||||
|
||||
else:
|
||||
if match == 3:
|
||||
win_dic[5].append(ball)
|
||||
elif match == 4:
|
||||
win_dic[4].append(ball)
|
||||
elif match == 5:
|
||||
# 2등 판별: 5개 맞고 보너스 번호 포함
|
||||
if bonus in ball:
|
||||
win_dic[2].append(ball)
|
||||
else:
|
||||
win_dic[3].append(ball)
|
||||
|
||||
win_history_size[no] = len(final_candidates)
|
||||
|
||||
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
|
||||
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
|
||||
|
||||
return win_history, win_history_size
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filterTestReview = FilterTestReview(resources_path)
|
||||
|
||||
start = time.time()
|
||||
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
|
||||
win_history, win_history_size = filterTestReview.validate(
|
||||
df_ball,
|
||||
#nos=range(1126, 21, -1),
|
||||
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print("{} 회 당첨".format(len(win_history)))
|
||||
sorted_win_history = sorted(win_history.keys())
|
||||
for i in range(len(sorted_win_history)):
|
||||
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))
|
||||
99
review_3.py
99
review_3.py
@@ -1,99 +0,0 @@
|
||||
import os
|
||||
import time
|
||||
import datetime
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_3 import BallFilter
|
||||
|
||||
class FilterTestReview:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path):
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName)
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
|
||||
win_history = {}
|
||||
win_history_size = {}
|
||||
|
||||
for no in nos:
|
||||
|
||||
print("[{} 회차]".format(no))
|
||||
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = balls[1:7].copy() # copy()로 복사
|
||||
bonus = balls[7]
|
||||
|
||||
final_candidates = []
|
||||
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
ball = list(ball)
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if 0 < filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
match = len(set(ball) & set(answer))
|
||||
if match == 6:
|
||||
if no not in win_history: # 중복 방지
|
||||
win_history[no] = answer.copy() # copy()로 복사
|
||||
if ball not in win_dic[1]: # 같은 조합 중복 방지
|
||||
win_dic[1].append(ball.copy()) # copy()로 복사
|
||||
|
||||
else:
|
||||
if match == 3:
|
||||
win_dic[5].append(ball)
|
||||
elif match == 4:
|
||||
win_dic[4].append(ball)
|
||||
elif match == 5:
|
||||
# 2등 판별: 5개 맞고 보너스 번호 포함
|
||||
if bonus in ball:
|
||||
win_dic[2].append(ball)
|
||||
else:
|
||||
win_dic[3].append(ball)
|
||||
|
||||
win_history_size[no] = len(final_candidates)
|
||||
|
||||
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
|
||||
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
|
||||
|
||||
return win_history, win_history_size
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
PROJECT_HOME = '.'
|
||||
resources_path = os.path.join(PROJECT_HOME, 'resources')
|
||||
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filterTestReview = FilterTestReview(resources_path)
|
||||
|
||||
start = time.time()
|
||||
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
|
||||
win_history, win_history_size = filterTestReview.validate(
|
||||
df_ball,
|
||||
#nos=range(1126, 21, -1),
|
||||
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print("{} 회 당첨".format(len(win_history)))
|
||||
sorted_win_history = sorted(win_history.keys())
|
||||
for i in range(len(sorted_win_history)):
|
||||
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))
|
||||
236
test_1.py
236
test_1.py
@@ -1,236 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_1 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
||||
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
|
||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
|
||||
for i in range(len(df_ball) - 1, -1, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
no = int(no)
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
total = max(0, end_no - start_no + 1)
|
||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=1001)
|
||||
parser.add_argument("--end-no", type=int, default=1204)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
236
test_2.py
236
test_2.py
@@ -1,236 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_2 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
||||
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
|
||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
|
||||
for i in range(len(df_ball) - 1, -1, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
no = int(no)
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
total = max(0, end_no - start_no + 1)
|
||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=1001)
|
||||
parser.add_argument("--end-no", type=int, default=1204)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
236
test_3.py
236
test_3.py
@@ -1,236 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_3 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
||||
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
|
||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
|
||||
for i in range(len(df_ball) - 1, -1, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
no = int(no)
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
total = max(0, end_no - start_no + 1)
|
||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=1001)
|
||||
parser.add_argument("--end-no", type=int, default=1204)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
@@ -1,405 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
학습 구간(1~800회) 당첨번호로 final_BallFilter.extract_final_candidates 에 쓸 허용 집합을 계산합니다.
|
||||
표준 라이브러리 + pandas(df 호환)만 사용합니다.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
HISTORY = ROOT / "resources" / "lotto_history.txt"
|
||||
BALLFILTER_SRC = ROOT / "BallFilter_25.py"
|
||||
OUT = ROOT / "final_filter_params.py"
|
||||
|
||||
TRAIN_LO = 1
|
||||
TRAIN_HI = 800
|
||||
|
||||
# 학습 분포에서 너무 넓은 합집합(union)을 피하기 위해 고유값 기준 백분위 밴드 후,
|
||||
# 각 회차 특성값이 밴드 밖이면 해당 값을 다시 포함(학습 당첨 100% 커버).
|
||||
# 좁을수록 필터가 강해짐. 학습·검증 균형은 이 값과 final_filterTest.py 결과로 조정.
|
||||
PCT_LO = 8
|
||||
PCT_HI = 92
|
||||
|
||||
PRIME = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43}
|
||||
COMPOSITE = {4, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45}
|
||||
|
||||
|
||||
def load_draws():
|
||||
rows = []
|
||||
with open(HISTORY, newline="", encoding="utf-8") as f:
|
||||
for p in csv.reader(f):
|
||||
if not p:
|
||||
continue
|
||||
no = int(p[0])
|
||||
balls = sorted(int(x) for x in p[1:7])
|
||||
rows.append((no, balls))
|
||||
rows.sort(key=lambda x: x[0])
|
||||
return {no: b for no, b in rows}
|
||||
|
||||
|
||||
def get_ac(ball):
|
||||
ac = set()
|
||||
for i in range(5, -1, -1):
|
||||
for j in range(i - 1, -1, -1):
|
||||
ac.add(ball[i] - ball[j])
|
||||
return len(ac) - (6 - 1)
|
||||
|
||||
|
||||
def interval_sum(ball):
|
||||
return sum(ball[i] - ball[i - 1] for i in range(1, 6))
|
||||
|
||||
|
||||
def first_letter_sum(ball):
|
||||
acc = [str(b)[0] for b in ball if len(str(b)) == 2]
|
||||
return sum(int(x) for x in acc)
|
||||
|
||||
|
||||
def last_letter_sum(ball):
|
||||
acc = [str(b)[1] for b in ball if len(str(b)) == 2] + [str(b) for b in ball if len(str(b)) == 1]
|
||||
return sum(int(x) for x in acc)
|
||||
|
||||
|
||||
def uniq_end_digits(ball):
|
||||
return len({b % 10 for b in ball})
|
||||
|
||||
|
||||
def high_low(ball):
|
||||
low = sum(1 for b in ball if b < 23)
|
||||
high = sum(1 for b in ball if 23 < b)
|
||||
return low, high
|
||||
|
||||
|
||||
def section10_count(ball):
|
||||
section = set()
|
||||
for b in ball:
|
||||
section.add(int(b / 10))
|
||||
return len(section)
|
||||
|
||||
|
||||
def count_mult(ball, m):
|
||||
return sum(1 for b in ball if b % m == 0)
|
||||
|
||||
|
||||
def continus_max(ball):
|
||||
w = ball
|
||||
best = 1
|
||||
run = 1
|
||||
for i in range(1, 6):
|
||||
if w[i] == w[i - 1] + 1:
|
||||
run += 1
|
||||
best = max(best, run)
|
||||
else:
|
||||
run = 1
|
||||
return best
|
||||
|
||||
|
||||
def weeks_freq(draws_map, answer, no, week):
|
||||
s = set()
|
||||
for w in range(1, week + 1):
|
||||
prev_no = no - w
|
||||
if prev_no not in draws_map:
|
||||
continue
|
||||
for b in draws_map[prev_no]:
|
||||
s.add(b)
|
||||
return sum(1 for b in answer if b in s)
|
||||
|
||||
|
||||
def pct_band_unique(values, lo=PCT_LO, hi=PCT_HI):
|
||||
"""고유값 정렬 후 백분위 구간에 들어가는 값만 유지. 고유 개수가 적으면 전부 유지."""
|
||||
if not values:
|
||||
return set()
|
||||
u = sorted(set(values))
|
||||
if len(u) <= 6:
|
||||
return set(u)
|
||||
n = len(u)
|
||||
il = int((lo / 100.0) * (n - 1))
|
||||
ih = int((hi / 100.0) * (n - 1))
|
||||
low_b, high_b = u[il], u[ih]
|
||||
return {x for x in u if low_b <= x <= high_b}
|
||||
|
||||
|
||||
def parse_pair_triple_rules():
|
||||
"""BallFilter_25.filterPairBall / filterTriplePairBall 에서 규칙 추출."""
|
||||
text = BALLFILTER_SRC.read_text(encoding="utf-8")
|
||||
pairs = []
|
||||
for m in re.finditer(r"len\(set_ball & \{([^}]+)\}\) == 2", text):
|
||||
parts = [int(x.strip()) for x in m.group(1).split(",")]
|
||||
if len(parts) == 2:
|
||||
pairs.append(frozenset(parts))
|
||||
triples = []
|
||||
for m in re.finditer(r"len\(set_ball & \{([^}]+)\}\) == 3", text):
|
||||
parts = [int(x.strip()) for x in m.group(1).split(",")]
|
||||
if len(parts) == 3:
|
||||
triples.append(frozenset(parts))
|
||||
return pairs, triples
|
||||
|
||||
|
||||
def main():
|
||||
draws = load_draws()
|
||||
pair_rules, triple_rules = parse_pair_triple_rules()
|
||||
|
||||
train_draws = {n: draws[n] for n in range(TRAIN_LO, TRAIN_HI + 1) if n in draws}
|
||||
|
||||
# 블랙리스트: 학습 당첨 6개에 함께 등장한 쌍/삼은 제외(당첨을 막지 않음)
|
||||
train_pairs_seen = set()
|
||||
train_triples_seen = set()
|
||||
for b in train_draws.values():
|
||||
for i in range(6):
|
||||
for j in range(i + 1, 6):
|
||||
train_pairs_seen.add(frozenset((b[i], b[j])))
|
||||
for i in range(6):
|
||||
for j in range(i + 1, 6):
|
||||
for k in range(j + 1, 6):
|
||||
train_triples_seen.add(frozenset((b[i], b[j], b[k])))
|
||||
|
||||
pair_block = [p for p in pair_rules if p not in train_pairs_seen]
|
||||
triple_block = [t for t in triple_rules if t not in train_triples_seen]
|
||||
|
||||
sets = defaultdict(set)
|
||||
flags_prev = {"need_relax_previous": False, "need_relax_prev7": False}
|
||||
|
||||
for no in range(2, TRAIN_HI + 1):
|
||||
if no not in draws or (no - 1) not in draws:
|
||||
continue
|
||||
ball = draws[no]
|
||||
p_ball = draws[no - 1]
|
||||
|
||||
s = sum(ball)
|
||||
sets["sum6"].add(s)
|
||||
sets["sum6_diff"].add(abs(s - sum(p_ball)))
|
||||
|
||||
avg = s // 6
|
||||
pavg = sum(p_ball) // 6
|
||||
sets["avg6"].add(avg)
|
||||
sets["avg6_diff"].add(abs(avg - pavg))
|
||||
|
||||
s3f = ball[0] + ball[1] + ball[2]
|
||||
ps3f = p_ball[0] + p_ball[1] + p_ball[2]
|
||||
sets["sum3f"].add(s3f)
|
||||
sets["sum3f_diff"].add(abs(s3f - ps3f))
|
||||
|
||||
s3b = ball[3] + ball[4] + ball[5]
|
||||
ps3b = p_ball[3] + p_ball[4] + p_ball[5]
|
||||
sets["sum3b"].add(s3b)
|
||||
sets["sum3b_diff"].add(abs(s3b - ps3b))
|
||||
|
||||
l, h = high_low(ball)
|
||||
sets["hl_allowed"].add((l, h))
|
||||
|
||||
gh = ball[0] + ball[5]
|
||||
pgh = p_ball[0] + p_ball[5]
|
||||
sets["go_sum"].add(gh)
|
||||
sets["go_sum_diff"].add(abs(gh - pgh))
|
||||
|
||||
iv = interval_sum(ball)
|
||||
piv = interval_sum(p_ball)
|
||||
sets["interval"].add(iv)
|
||||
sets["interval_diff"].add(abs(iv - piv))
|
||||
|
||||
fl = first_letter_sum(ball)
|
||||
pfl = first_letter_sum(p_ball)
|
||||
sets["first_letter"].add(fl)
|
||||
sets["first_letter_diff"].add(abs(fl - pfl))
|
||||
|
||||
ll = last_letter_sum(ball)
|
||||
pll = last_letter_sum(p_ball)
|
||||
sets["last_letter"].add(ll)
|
||||
sets["last_letter_diff"].add(abs(ll - pll))
|
||||
|
||||
sets["b0"].add(ball[0])
|
||||
sets["b0_diff"].add(abs(ball[0] - p_ball[0]))
|
||||
sets["b5"].add(ball[5])
|
||||
sets["b5_diff"].add(abs(ball[5] - p_ball[5]))
|
||||
|
||||
sets["uniq_end"].add(uniq_end_digits(ball))
|
||||
sets["uniq_end_diff"].add(abs(uniq_end_digits(ball) - uniq_end_digits(p_ball)))
|
||||
|
||||
ac = get_ac(ball)
|
||||
pac = get_ac(p_ball)
|
||||
sets["ac"].add(ac)
|
||||
sets["ac_diff"].add(abs(ac - pac))
|
||||
|
||||
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
|
||||
sets[f"mul{m}"].add(count_mult(ball, m))
|
||||
sets[f"mul{m}_diff"].add(abs(count_mult(ball, m) - count_mult(p_ball, m)))
|
||||
|
||||
pn = len(set(ball) & PRIME)
|
||||
sets["prime_n"].add(pn)
|
||||
|
||||
cn = len(set(ball) & COMPOSITE)
|
||||
sets["composite_n"].add(cn)
|
||||
sets["composite_diff"].add(abs(cn - len(set(p_ball) & COMPOSITE)))
|
||||
|
||||
ev = sum(1 for b in ball if b % 2 == 0)
|
||||
pev = sum(1 for b in p_ball if b % 2 == 0)
|
||||
sets["even_n"].add(ev)
|
||||
sets["even_diff"].add(abs(ev - pev))
|
||||
|
||||
sc = section10_count(ball)
|
||||
psc = section10_count(p_ball)
|
||||
sets["sec10"].add(sc)
|
||||
sets["sec10_diff"].add(abs(sc - psc))
|
||||
|
||||
for wk in (8, 12, 16, 20):
|
||||
ex = weeks_freq(draws, ball, no, wk)
|
||||
pex = weeks_freq(draws, p_ball, no, wk)
|
||||
sets[f"w{wk}"].add(ex)
|
||||
sets[f"w{wk}_diff"].add(abs(ex - pex))
|
||||
|
||||
sets["continus_max"].add(continus_max(ball))
|
||||
|
||||
# filterPreviousNumber (원본과 동일)
|
||||
pb_set = set(p_ball)
|
||||
bad_prev = True
|
||||
for i in range(6):
|
||||
bi = ball[i]
|
||||
if bi in pb_set or bi - 1 in pb_set or bi + 1 in pb_set:
|
||||
bad_prev = False
|
||||
break
|
||||
if bad_prev:
|
||||
flags_prev["need_relax_previous"] = True
|
||||
|
||||
# filterAllPreivous7
|
||||
pb7 = set()
|
||||
for i in range(no - 1, no - 8, -1):
|
||||
if i in draws:
|
||||
for x in draws[i]:
|
||||
pb7.add(x)
|
||||
if len(set(ball) & pb7) == 6:
|
||||
flags_prev["need_relax_prev7"] = True
|
||||
|
||||
# 백분위로 타이트닝 후 학습 각 회차 특성 보강
|
||||
keys_numeric = [
|
||||
"sum6",
|
||||
"sum6_diff",
|
||||
"avg6",
|
||||
"avg6_diff",
|
||||
"sum3f",
|
||||
"sum3f_diff",
|
||||
"sum3b",
|
||||
"sum3b_diff",
|
||||
"go_sum",
|
||||
"go_sum_diff",
|
||||
"interval",
|
||||
"interval_diff",
|
||||
"first_letter",
|
||||
"first_letter_diff",
|
||||
"last_letter",
|
||||
"last_letter_diff",
|
||||
"b0",
|
||||
"b0_diff",
|
||||
"b5",
|
||||
"b5_diff",
|
||||
"uniq_end",
|
||||
"uniq_end_diff",
|
||||
"ac",
|
||||
"ac_diff",
|
||||
"prime_n",
|
||||
"composite_n",
|
||||
"composite_diff",
|
||||
"even_n",
|
||||
"even_diff",
|
||||
"sec10",
|
||||
"sec10_diff",
|
||||
]
|
||||
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
|
||||
keys_numeric.extend([f"mul{m}", f"mul{m}_diff"])
|
||||
for wk in (8, 12, 16, 20):
|
||||
keys_numeric.extend([f"w{wk}", f"w{wk}_diff"])
|
||||
keys_numeric.append("continus_max")
|
||||
|
||||
for k in keys_numeric:
|
||||
sets[k] = pct_band_unique(sets[k])
|
||||
|
||||
# 고저: (0,1)/(1,0) 만 제외하는 기존 로직 유지 + 학습에 나온 (l,h) 항상 허용
|
||||
hl_skip = {(l, h) for l in (0, 1) for h in (0, 1)}
|
||||
|
||||
def emit():
|
||||
lines = [
|
||||
"# -*- coding: utf-8 -*-",
|
||||
'"""학습 구간 {}~{}회 기준 자동 생성 — tools/compute_final_filter_params.py"""'.format(
|
||||
TRAIN_LO, TRAIN_HI
|
||||
),
|
||||
"",
|
||||
"TRAIN_RANGE = ({}, {})".format(TRAIN_LO, TRAIN_HI),
|
||||
"DISABLE_FILTER_PREVIOUS_NUMBER = {}".format(
|
||||
str(flags_prev["need_relax_previous"])
|
||||
),
|
||||
"DISABLE_FILTER_ALL_PREVIOUS_7 = {}".format(str(flags_prev["need_relax_prev7"])),
|
||||
"",
|
||||
]
|
||||
|
||||
def sset(name, key):
|
||||
v = sets[key]
|
||||
lines.append("{} = {}".format(name, repr(sorted(v))))
|
||||
|
||||
sset("ALLOW_SUM6", "sum6")
|
||||
sset("ALLOW_SUM6_DIFF", "sum6_diff")
|
||||
sset("ALLOW_AVG6", "avg6")
|
||||
sset("ALLOW_AVG6_DIFF", "avg6_diff")
|
||||
sset("ALLOW_SUM3F", "sum3f")
|
||||
sset("ALLOW_SUM3F_DIFF", "sum3f_diff")
|
||||
sset("ALLOW_SUM3B", "sum3b")
|
||||
sset("ALLOW_SUM3B_DIFF", "sum3b_diff")
|
||||
lines.append("HL_SKIP = {}".format(repr(sorted(hl_skip))))
|
||||
lines.append("HL_SEEN = {}".format(repr(sorted(sets['hl_allowed']))))
|
||||
sset("ALLOW_GO_SUM", "go_sum")
|
||||
sset("ALLOW_GO_SUM_DIFF", "go_sum_diff")
|
||||
sset("ALLOW_INTERVAL", "interval")
|
||||
sset("ALLOW_INTERVAL_DIFF", "interval_diff")
|
||||
sset("ALLOW_FIRST_LETTER", "first_letter")
|
||||
sset("ALLOW_FIRST_LETTER_DIFF", "first_letter_diff")
|
||||
sset("ALLOW_LAST_LETTER", "last_letter")
|
||||
sset("ALLOW_LAST_LETTER_DIFF", "last_letter_diff")
|
||||
sset("ALLOW_B0", "b0")
|
||||
sset("ALLOW_B0_DIFF", "b0_diff")
|
||||
sset("ALLOW_B5", "b5")
|
||||
sset("ALLOW_B5_DIFF", "b5_diff")
|
||||
sset("ALLOW_UNIQ_END", "uniq_end")
|
||||
sset("ALLOW_UNIQ_END_DIFF", "uniq_end_diff")
|
||||
sset("ALLOW_AC", "ac")
|
||||
sset("ALLOW_AC_DIFF", "ac_diff")
|
||||
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
|
||||
sset("ALLOW_MUL{}".format(m), "mul{}".format(m))
|
||||
sset("ALLOW_MUL{}_DIFF".format(m), "mul{}_diff".format(m))
|
||||
sset("ALLOW_PRIME_N", "prime_n")
|
||||
sset("ALLOW_COMPOSITE_N", "composite_n")
|
||||
sset("ALLOW_COMPOSITE_DIFF", "composite_diff")
|
||||
sset("ALLOW_EVEN_N", "even_n")
|
||||
sset("ALLOW_EVEN_DIFF", "even_diff")
|
||||
sset("ALLOW_SEC10", "sec10")
|
||||
sset("ALLOW_SEC10_DIFF", "sec10_diff")
|
||||
for wk in (8, 12, 16, 20):
|
||||
sset("ALLOW_W{}".format(wk), "w{}".format(wk))
|
||||
sset("ALLOW_W{}_DIFF".format(wk), "w{}_diff".format(wk))
|
||||
sset("ALLOW_CONTINUS_MAX", "continus_max")
|
||||
|
||||
lines.append("PAIR_BLOCKLIST = {}".format(repr([sorted(list(x)) for x in pair_block])))
|
||||
lines.append("TRIPLE_BLOCKLIST = {}".format(repr([sorted(list(x)) for x in triple_block])))
|
||||
lines.extend(["", "# frozenset 캐시", ""])
|
||||
allow_names = []
|
||||
for line in list(lines):
|
||||
if line.startswith("ALLOW_") and " = " in line:
|
||||
name = line.split(" = ")[0]
|
||||
allow_names.append(name)
|
||||
for name in allow_names:
|
||||
short = name.replace("ALLOW_", "", 1)
|
||||
lines.append("_F_{} = frozenset({})".format(short, name))
|
||||
lines.append("_F_HL_SEEN = frozenset(HL_SEEN)")
|
||||
lines.append("")
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
OUT.write_text(emit(), encoding="utf-8")
|
||||
print("Wrote", OUT)
|
||||
print("pair rules:", len(pair_rules), "-> block", len(pair_block))
|
||||
print("triple rules:", len(triple_rules), "-> block", len(triple_block))
|
||||
print("DISABLE_FILTER_PREVIOUS_NUMBER", flags_prev["need_relax_previous"])
|
||||
print("DISABLE_FILTER_ALL_PREVIOUS_7", flags_prev["need_relax_prev7"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
231
train_1.py
231
train_1.py
@@ -1,231 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_1 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None):
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
for i in range(len(df_ball)-1, 19, -1):
|
||||
no = df_ball['no'].iloc[i]
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=1)
|
||||
parser.add_argument("--end-no", type=int, default=800)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
231
train_2.py
231
train_2.py
@@ -1,231 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_2 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None):
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
for i in range(len(df_ball)-1, 19, -1):
|
||||
no = df_ball['no'].iloc[i]
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=1)
|
||||
parser.add_argument("--end-no", type=int, default=800)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
231
train_3.py
231
train_3.py
@@ -1,231 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_3 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None):
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
for i in range(len(df_ball)-1, 19, -1):
|
||||
no = df_ball['no'].iloc[i]
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=1)
|
||||
parser.add_argument("--end-no", type=int, default=800)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
234
valid_1.py
234
valid_1.py
@@ -1,234 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_1 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
||||
# validation should use full history for previous-draw/window features
|
||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
# evaluate only requested range, but allow df_ball to contain full history
|
||||
for i in range(len(df_ball) - 1, -1, -1):
|
||||
no = int(df_ball['no'].iloc[i])
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
total = max(0, end_no - start_no + 1)
|
||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=801)
|
||||
parser.add_argument("--end-no", type=int, default=1000)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
234
valid_2.py
234
valid_2.py
@@ -1,234 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_2 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
||||
# validation should use full history for previous-draw/window features
|
||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
# evaluate only requested range, but allow df_ball to contain full history
|
||||
for i in range(len(df_ball) - 1, -1, -1):
|
||||
no = int(df_ball['no'].iloc[i])
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
total = max(0, end_no - start_no + 1)
|
||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=801)
|
||||
parser.add_argument("--end-no", type=int, default=1000)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
234
valid_3.py
234
valid_3.py
@@ -1,234 +0,0 @@
|
||||
import os
|
||||
import argparse
|
||||
import pandas as pd
|
||||
import itertools
|
||||
from filter_model_3 import BallFilter
|
||||
import time
|
||||
import datetime
|
||||
|
||||
class FilterTest:
|
||||
|
||||
ballFilter = None
|
||||
|
||||
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
|
||||
# validation should use full history for previous-draw/window features
|
||||
lottoHistoryFileName = os.path.join(resources_path, history_json)
|
||||
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
|
||||
|
||||
return
|
||||
|
||||
def find_filter_method(self, df_ball, start_no, end_no):
|
||||
win_count = 0
|
||||
|
||||
no_filter_ball = {}
|
||||
|
||||
printLog = True
|
||||
filter_dic = {}
|
||||
filter_dic_len = {}
|
||||
filter_dic_1 = {}
|
||||
filter_dic_2 = {}
|
||||
# evaluate only requested range, but allow df_ball to contain full history
|
||||
for i in range(len(df_ball) - 1, -1, -1):
|
||||
no = int(df_ball['no'].iloc[i])
|
||||
if no < start_no or end_no < no:
|
||||
continue
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
|
||||
filter_type = list(filter_type)
|
||||
size = len(filter_type)
|
||||
|
||||
if size == 0:
|
||||
win_count += 1
|
||||
no_filter_ball[no] = answer
|
||||
print("\t", no)
|
||||
elif size == 1:
|
||||
key = filter_type[0]
|
||||
if key not in filter_dic_1:
|
||||
filter_dic_1[key] = 1
|
||||
else:
|
||||
filter_dic_1[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
elif size == 2:
|
||||
key = ','.join(filter_type)
|
||||
if key not in filter_dic_2:
|
||||
filter_dic_2[key] = 1
|
||||
else:
|
||||
filter_dic_2[key] += 1
|
||||
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
else:
|
||||
if printLog:
|
||||
print("\t", no, filter_type)
|
||||
|
||||
# 회차별 필터개수가 적은 것을 정렬하기 위함
|
||||
if size not in filter_dic_len:
|
||||
filter_dic_len[size] = []
|
||||
filter_dic_len[size].append(filter_type)
|
||||
|
||||
for f_t in filter_type:
|
||||
if f_t not in filter_dic:
|
||||
filter_dic[f_t] = 1
|
||||
else:
|
||||
filter_dic[f_t] += 1
|
||||
|
||||
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
|
||||
sorted_filter_dic_len = sorted(filter_dic_len.keys())
|
||||
for filter_count in sorted_filter_dic_len:
|
||||
for filter_type in filter_dic_len[filter_count]:
|
||||
print("\t\t>{} > {}".format(filter_count, filter_type))
|
||||
|
||||
print("\n\t[걸러진 유일 필터]")
|
||||
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_1)):
|
||||
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
|
||||
|
||||
print("\n\t[2개 필터에 걸린 경우]")
|
||||
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic_2)):
|
||||
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
|
||||
|
||||
print("\n\t[Filter 유형 별 걸린 개수]")
|
||||
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
|
||||
for i in range(len(sorted_filter_dic)):
|
||||
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
|
||||
|
||||
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
|
||||
total = max(0, end_no - start_no + 1)
|
||||
rate = (100 * len(no_filter_ball) / total) if total else 0.0
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
for no in no_filter_ball:
|
||||
print("\t\t>", no, no_filter_ball[no])
|
||||
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
|
||||
|
||||
return win_count
|
||||
|
||||
def find_final_candidates(self, no, df_ball, filter_ball=None):
|
||||
final_candidates = []
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
|
||||
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
|
||||
filter_size = len(filter_type)
|
||||
|
||||
if filter_size:
|
||||
continue
|
||||
|
||||
final_candidates.append(ball)
|
||||
|
||||
return final_candidates
|
||||
|
||||
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
|
||||
|
||||
win_count = 0
|
||||
for i in range(len(df_ball)-1, 0, -1):
|
||||
|
||||
no = df_ball['no'].iloc[i]
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
if filter_ball is not None and len(set(answer) & set(filter_ball)):
|
||||
continue
|
||||
|
||||
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
|
||||
|
||||
if len(filter_type) == 0:
|
||||
win_count += 1
|
||||
print("\t\t>{}. {}".format(no, answer))
|
||||
|
||||
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
|
||||
|
||||
return
|
||||
|
||||
def validate(self, df_ball, nos=None):
|
||||
win_history = {}
|
||||
|
||||
for no in nos:
|
||||
print(no, "processing...")
|
||||
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = answer[1:7]
|
||||
|
||||
generation_balls = list(range(1, 46))
|
||||
nCr = list(itertools.combinations(generation_balls, 6))
|
||||
for idx, ball in enumerate(nCr):
|
||||
if idx % 1000000 == 0:
|
||||
print(" - {} processed...".format(idx))
|
||||
ball = list(ball)
|
||||
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
|
||||
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
|
||||
win_history[no] = answer
|
||||
print("win.. no: {}, answer: {}".format(no, str(answer)))
|
||||
break
|
||||
|
||||
return win_history
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--resources", default="resources")
|
||||
parser.add_argument(
|
||||
"--ruleset",
|
||||
default=None,
|
||||
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
|
||||
)
|
||||
parser.add_argument("--start-no", type=int, default=801)
|
||||
parser.add_argument("--end-no", type=int, default=1000)
|
||||
args = parser.parse_args()
|
||||
|
||||
resources_path = args.resources
|
||||
|
||||
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
|
||||
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
|
||||
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
|
||||
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
|
||||
|
||||
filter_ball=[]
|
||||
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
|
||||
|
||||
print("STEP #1. 필터 방법 추출")
|
||||
start = time.time()
|
||||
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
"""
|
||||
print("\n\n")
|
||||
no = df_ball['no'].values[-1]
|
||||
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
|
||||
answer = ball[1:7]
|
||||
|
||||
print("STEP #0. 최종 후보 선정")
|
||||
start = time.time()
|
||||
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
|
||||
process_time = datetime.timedelta(seconds=time.time() - start)
|
||||
print("process_time: ", process_time)
|
||||
|
||||
print(" > size: {}".format(len(final_candidates)))
|
||||
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
|
||||
with open(file_name, 'w+') as outFp:
|
||||
for ball in final_candidates:
|
||||
ball_str = [str(b) for b in answer]
|
||||
outFp.write("{}\n".format(','.join(ball_str)))
|
||||
|
||||
print('{}회, 정답: {}\n'.format(no, str(answer)))
|
||||
"""
|
||||
|
||||
#print("\n\n")
|
||||
#print("STEP #2. 당첨 회수 확인")
|
||||
#filterTest.check_filter_method(df_ball, win_count)
|
||||
|
||||
# 오리지널 버전 (자질 파일에 고정): 당첨 22개
|
||||
Reference in New Issue
Block a user