This commit is contained in:
2026-04-12 10:55:46 +09:00
parent e31eefef09
commit a6b170fefa
26 changed files with 11 additions and 11479 deletions

View File

@@ -1,11 +1,12 @@
데이터는 다음과 같습니다. 데이터는 다음과 같습니다.
(학습 데이터) (학습 데이터)
- train.json, train.txt - lotto_history.txt에서 회차부터 800회차
- 1회차부터 800회차
(검증 데이터) (검증 데이터)
- valid.json, valid.txt - lotto_history.txt에서 801회차부터 1000회차
- 801회차부터 1000회차
(테스트 데이터)
- lotto_history.txt에서 1001회차부터 이후 모두
파일 구조를 먼저 이해하세요. 파일 구조를 먼저 이해하세요.
@@ -110,3 +111,6 @@
그리고 요구사항에 대해서 시도 방법을 설계하세요. 그리고 요구사항에 대해서 시도 방법을 설계하세요.
그리고 반복적으로 실행해서 최적화된 방법을 찾아서 적용해주세요. 그리고 반복적으로 실행해서 최적화된 방법을 찾아서 적용해주세요.
(최적화는 언제든 학습 데이터로 최적화를 해야 합니다. 그리고 검증 데이터로 테스트만 수행하세요.) (최적화는 언제든 학습 데이터로 최적화를 해야 합니다. 그리고 검증 데이터로 테스트만 수행하세요.)
당첨번호에 대한 추천 개수가 100개 미만이어야 합니다.
1_FilterTest_25.py, BallFilter_25.py를 참고해서 최적의 final_filterTest.py, final_BallFilter.py를 작성해 주세요.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,94 +0,0 @@
"""
filter_model_3.py
OR-composed BallFilter:
- A candidate ball is ACCEPTED if it passes EITHER filter_model_1 OR filter_model_2.
- A candidate ball is REJECTED only if it fails BOTH.
This keeps the same public interface used across the project:
BallFilter(lottoHistoryFileName, ruleset_path=..., ruleset=...)
.filter(ball, no, until_end=False, df=None, filter_ball=None) -> set[str]
.extract_final_candidates(ball, no=None, until_end=False, df=None) -> set[str]
Notes:
- The underlying filters return a non-empty set of failure reasons when rejected.
- Callers treat "len(result) == 0" as PASS.
"""
from __future__ import annotations
from typing import Any, Dict, Optional
import filter_model_1 as fm1
import filter_model_2 as fm2
class BallFilter:
"""
OR composition of filter_model_1.BallFilter and filter_model_2.BallFilter.
- If model1 PASSES OR model2 PASSES -> return empty set()
- If both FAIL -> return union of reasons (prefixed for debugging)
"""
def __init__(
self,
lottoHistoryFileName: Optional[str] = None,
# Backward compatible single ruleset knobs (applied to both if specific ones not provided)
ruleset_path: Optional[str] = None,
ruleset: Optional[Dict[str, Any]] = None,
# Optional per-model overrides
ruleset_path_1: Optional[str] = None,
ruleset_path_2: Optional[str] = None,
ruleset_1: Optional[Dict[str, Any]] = None,
ruleset_2: Optional[Dict[str, Any]] = None,
):
rp1 = ruleset_path_1 if ruleset_path_1 is not None else ruleset_path
rp2 = ruleset_path_2 if ruleset_path_2 is not None else ruleset_path
r1 = ruleset_1 if ruleset_1 is not None else ruleset
r2 = ruleset_2 if ruleset_2 is not None else ruleset
self.m1 = fm1.BallFilter(lottoHistoryFileName, ruleset_path=rp1, ruleset=r1)
self.m2 = fm2.BallFilter(lottoHistoryFileName, ruleset_path=rp2, ruleset=r2)
#
# Delegate common helper methods (both models expose the same API)
#
def getBall(self, no):
return self.m1.getBall(no)
def getLastNo(self, YMD):
return self.m1.getLastNo(YMD)
def getNextNo(self, YMD):
return self.m1.getNextNo(YMD)
def getYMD(self, no):
return self.m1.getYMD(no)
def _prefixed(self, prefix: str, reasons: set) -> set:
# keep stable, readable debug strings
return {f"{prefix}{r}" for r in reasons}
def extract_final_candidates(self, ball, no=None, until_end: bool = False, df=None):
"""
OR-pass semantics:
- If either model returns empty set -> PASS (return empty set)
- Else -> FAIL (return union of reasons)
"""
r1 = self.m1.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
if len(r1) == 0:
return set()
r2 = self.m2.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
if len(r2) == 0:
return set()
# both failed
return self._prefixed("m1:", set(r1)) | self._prefixed("m2:", set(r2))
def filter(self, ball, no, until_end: bool = False, df=None, filter_ball=None):
"""
Keep signature compatible with existing callers.
- filter_ball is ignored here (callers typically pre-filter before calling .filter()).
"""
return self.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)

File diff suppressed because it is too large Load Diff

View File

@@ -1,179 +0,0 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
from DataCrawler import DataCrawler
import json
import os
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from filter_model_1 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
def __init__(self, resources_path):
self.bot = TelegramBot()
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6,7,10,11,20,45])
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNextNo(ymd)
print("회차: {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
result_json.append(ball)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = p_ball[1:7]
return p_no, p_ball
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

View File

@@ -1,179 +0,0 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
from DataCrawler import DataCrawler
import json
import os
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from filter_model_2 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
def __init__(self, resources_path):
self.bot = TelegramBot()
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6,7,10,11,20,45])
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNextNo(ymd)
print("회차: {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
result_json.append(ball)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = p_ball[1:7]
return p_no, p_ball
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

View File

@@ -1,546 +0,0 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
from DataCrawler import DataCrawler
import json
import os
import copy
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from filter_model_3 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
TARGET_MIN_SURVIVORS = 30
TARGET_MAX_SURVIVORS = 150
PREDICT_TIMEOUT_SECONDS = 180
def __init__(self, resources_path):
self.bot = TelegramBot()
self.resources_path = resources_path
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6, 7, 10, 11, 20, 45])
result_json.append([2, 7, 17, 28, 35, 39])
result_json.append([6, 10, 19, 25, 33, 35])
result_json.append([3, 17, 20, 24, 35, 45])
result_json.append([5, 15, 18, 29, 36, 41])
result_json.append([6, 15, 20, 23, 37, 43])
result_json.append([8, 15, 19, 23, 38, 41])
result_json.append([5, 11, 19, 24, 40, 45])
result_json.append([9, 16, 18, 23, 35, 43])
result_json.append([7, 13, 19, 28, 33, 44])
result_json.append([7, 11, 18, 29, 37, 42])
print("회차(predict1)")
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNextNo(ymd)
print("회차(predict2): {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
result_json.append(ball)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = p_ball[1:7]
return p_no, p_ball
def predict3(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
no = BallFilter(lottoHistoryFileName).getNextNo(ymd)
print("회차(predict3): {}".format(no))
predict_start_ts = time.time()
deadline_ts = predict_start_ts + self.PREDICT_TIMEOUT_SECONDS
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = sorted(p_ball[1:7])
base_ruleset = self._get_base_ruleset()
tighten_rulesets = [
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": True,
"ban_triples_legacy": True,
"all_in_previous7": True,
"previous_neighbors": True,
},
allowed_overrides={
"ac_value": [8, 9],
"uniq_last_digit_count": [4, 5],
"even_count": [2, 3, 4],
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": True,
"ban_triples_legacy": True,
"all_in_previous7": True,
"previous_neighbors": True,
},
allowed_overrides={
"ac_value": [8, 9],
"uniq_last_digit_count": [4, 5],
"even_count": [2, 3, 4],
"sum": [112, 114, 121, 123, 126, 127, 131, 132, 138, 146, 148],
"sum_prev_diff": [13, 14, 17, 18, 26, 28, 29, 30, 32, 39, 40],
},
),
]
relax_rulesets = [
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
"previous_neighbors": False,
"all_in_previous7": False,
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
"previous_neighbors": False,
"all_in_previous7": False,
"weeks_8_count": False,
"weeks_12_count": False,
"weeks_16_count": False,
"weeks_20_count": False,
},
),
]
min_survivors = self.TARGET_MIN_SURVIVORS
max_survivors = self.TARGET_MAX_SURVIVORS
chosen = []
stage_name = "base"
current_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=base_ruleset,
stop_when_gt=max_survivors,
stage_name="base",
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
current = current_info["candidates"]
if current_info["timed_out"]:
chosen = self._finalize_on_timeout(current, p_ball, min_survivors, max_survivors)
stage_name = "base_timeout_fallback"
print("predict3 stage: {}, survivors: {}".format(stage_name, len(chosen)))
for ball in chosen:
result_json.append(ball)
return p_no, p_ball
if min_survivors <= len(current) <= max_survivors:
chosen = current
elif len(current) > max_survivors:
chosen = current
stage_name = "base_overflow"
for idx, rs in enumerate(tighten_rulesets, start=1):
t_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=rs,
stop_when_gt=max_survivors,
stage_name="tighten_{}".format(idx),
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
t = t_info["candidates"]
if t_info["timed_out"]:
chosen = self._finalize_on_timeout(t, p_ball, min_survivors, max_survivors)
stage_name = "tighten_{}_timeout_fallback".format(idx)
break
if min_survivors <= len(t) <= max_survivors:
chosen = t
stage_name = "tighten_{}".format(idx)
break
if len(t) <= max_survivors:
chosen = t
stage_name = "tighten_{}".format(idx)
if len(chosen) > max_survivors:
full_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=tighten_rulesets[-1],
stop_when_gt=None,
stage_name="tighten_full_rank",
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
full_for_ranking = full_info["candidates"]
if full_info["timed_out"]:
chosen = self._finalize_on_timeout(full_for_ranking, p_ball, min_survivors, max_survivors)
stage_name = "tighten_rank_timeout_fallback"
else:
chosen = self._rank_and_trim(full_for_ranking, p_ball, max_survivors)
stage_name = "tighten_rank_trim"
else:
chosen = current
stage_name = "base_underflow"
for idx, rs in enumerate(relax_rulesets, start=1):
r_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=rs,
stop_when_gt=None,
stop_when_gte=min_survivors,
stage_name="relax_{}".format(idx),
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
r = r_info["candidates"]
chosen = r
stage_name = "relax_{}".format(idx)
if r_info["timed_out"]:
chosen = self._finalize_on_timeout(r, p_ball, min_survivors, max_survivors)
stage_name = "relax_{}_timeout_fallback".format(idx)
break
if len(r) >= min_survivors:
break
if len(chosen) == 0:
stage_name = "relax_zero_fallback"
chosen = self._fallback_candidates_from_prev(p_ball, min_survivors)
elif len(chosen) < min_survivors:
stage_name = "{}_fill".format(stage_name)
fill = self._fallback_candidates_from_prev(
p_ball,
min_survivors - len(chosen),
exclude=set(tuple(x) for x in chosen),
)
chosen.extend(fill)
print("predict3 stage: {}, survivors: {}".format(stage_name, len(chosen)))
for ball in chosen:
result_json.append(ball)
return p_no, p_ball
def _get_base_ruleset(self):
history_json = os.path.join(self.resources_path, "lotto_history.json")
base_filter = BallFilter(history_json)
return copy.deepcopy(base_filter.m1.ruleset)
def _build_ruleset(self, base_ruleset, enabled_overrides=None, allowed_overrides=None):
ruleset = copy.deepcopy(base_ruleset)
ruleset.setdefault("filters", {})
enabled_overrides = enabled_overrides or {}
allowed_overrides = allowed_overrides or {}
for key, value in enabled_overrides.items():
ruleset["filters"].setdefault(key, {})
ruleset["filters"][key]["enabled"] = bool(value)
for key, values in allowed_overrides.items():
ruleset["filters"].setdefault(key, {})
ruleset["filters"][key]["enabled"] = True
ruleset["filters"][key]["allowed"] = list(values)
return ruleset
def _collect_candidates(
self,
candidates,
no,
df_ball,
ruleset,
stop_when_gt=None,
stop_when_gte=None,
stage_name="base",
predict_start_ts=None,
deadline_ts=None,
):
lottoHistoryFileName = os.path.join(self.resources_path, "lotto_history.json")
ballFilter = BallFilter(lottoHistoryFileName, ruleset=ruleset)
result = []
last_idx = 0
for idx, ball in enumerate(itertools.combinations(candidates, 6), start=1):
last_idx = idx
if deadline_ts is not None and deadline_ts <= time.time():
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
print(" - [{}] timeout after {:,} processed (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
return {"candidates": result, "timed_out": True, "processed": idx}
if idx % 1000000 == 0:
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
print(" - [{}] {:,} processed... (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
b = list(ball)
if len(ballFilter.filter(ball=b, no=no, until_end=False, df=df_ball)) == 0:
result.append(b)
if stop_when_gt is not None and len(result) > stop_when_gt:
return {"candidates": result, "timed_out": False, "processed": idx}
if stop_when_gte is not None and len(result) >= stop_when_gte:
return {"candidates": result, "timed_out": False, "processed": idx}
return {"candidates": result, "timed_out": False, "processed": last_idx}
def _finalize_on_timeout(self, partial_candidates, prev_ball, min_survivors, max_survivors):
chosen = list(partial_candidates)
if len(chosen) > max_survivors:
chosen = self._rank_and_trim(chosen, prev_ball, max_survivors)
elif len(chosen) < min_survivors:
fill = self._fallback_candidates_from_prev(
prev_ball,
min_survivors - len(chosen),
exclude=set(tuple(x) for x in chosen),
)
chosen.extend(fill)
return chosen
def _rank_and_trim(self, candidates, prev_ball, limit):
scored = [(self._score_candidate(ball, prev_ball), ball) for ball in candidates]
scored.sort(key=lambda x: x[0])
return [ball for _, ball in scored[:limit]]
def _score_candidate(self, ball, prev_ball):
sum_diff = abs(sum(ball) - sum(prev_ball))
even_cnt = len([x for x in ball if x % 2 == 0])
uniq_last = len(set([x % 10 for x in ball]))
contiguous_penalty = 0
s = sorted(ball)
for i in range(1, len(s)):
if s[i] - s[i - 1] == 1:
contiguous_penalty += 1
score = 0
score += sum_diff
score += abs(even_cnt - 3) * 2
score += abs(uniq_last - 5) * 2
score += contiguous_penalty
return score
def _fallback_candidates_from_prev(self, prev_ball, need_count, exclude=None):
exclude = exclude or set()
seed = sorted(prev_ball)
out = []
delta_patterns = [
(0, 0, 0, 0, 0, 0),
(-1, 0, 0, 0, 0, 1),
(0, -1, 0, 0, 1, 0),
(0, 0, -1, 1, 0, 0),
(-2, 0, 0, 0, 0, 2),
(0, -2, 0, 0, 2, 0),
(0, 0, -2, 2, 0, 0),
(-1, -1, 0, 0, 1, 1),
(1, 0, -1, 0, 0, 0),
(0, 1, 0, -1, 0, 0),
(1, -1, 1, -1, 1, -1),
(-1, 1, -1, 1, -1, 1),
]
shift = 0
while len(out) < need_count and shift <= 8:
for delta in delta_patterns:
cand = [seed[i] + delta[i] for i in range(6)]
cand = [min(45, max(1, v + shift)) for v in cand]
cand = sorted(cand)
if len(set(cand)) != 6:
continue
t = tuple(cand)
if t in exclude:
continue
exclude.add(t)
out.append(cand)
if len(out) >= need_count:
break
shift += 1
return out
def _merge_unique_balls(self, base_balls, extra_balls):
seen = set(tuple(sorted(x)) for x in base_balls)
for ball in extra_balls:
key = tuple(sorted(ball))
if key not in seen:
base_balls.append(list(ball))
seen.add(key)
return base_balls
def _sorted_unique_balls(self, balls):
"""
Normalize (sort within ball), de-duplicate, then sort lexicographically.
Returns List[List[int]].
"""
uniq = {}
for b in balls:
key = tuple(sorted(b))
uniq[key] = list(key)
return [list(t) for t in sorted(uniq.keys())]
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측(기존/신규): 결과는 합친 후 정렬해서 predict1 결과 뒤에 붙인다.
predict2_json = []
p_no, p_ball = practice.predict2(resources_path, ymd, predict2_json)
predict3_json = []
p_no3, p_ball3 = practice.predict3(resources_path, ymd, predict3_json)
merged_predict = []
practice._merge_unique_balls(merged_predict, predict2_json)
practice._merge_unique_balls(merged_predict, predict3_json)
merged_predict = practice._sorted_unique_balls(merged_predict)
# predict1 결과에 merged_predict를 정렬된 순서로 append(중복 제거)
practice._merge_unique_balls(result_json[ymd], merged_predict)
if p_no3 == p_no:
p_ball = p_ball3
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

View File

@@ -1,216 +0,0 @@
import os
import pandas as pd
import itertools
from filter_model_3 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def find_filter_method(self, df_ball, filter_ball=None):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
for i in range(len(df_ball)-1, 19, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
resources_path = 'resources'
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path)
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, filter_ball)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

View File

@@ -1,490 +0,0 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
import json
import os
import copy
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from filter_model_3 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
TARGET_MIN_SURVIVORS = 30
TARGET_MAX_SURVIVORS = 150
PREDICT_TIMEOUT_SECONDS = 180
def __init__(self, resources_path):
self.bot = TelegramBot()
self.resources_path = resources_path
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6, 7, 10, 11, 20, 45])
result_json.append([2, 7, 17, 28, 35, 39])
result_json.append([6, 10, 19, 25, 33, 35])
result_json.append([3, 17, 20, 24, 35, 45])
result_json.append([5, 15, 18, 29, 36, 41])
result_json.append([6, 15, 20, 23, 37, 43])
result_json.append([8, 15, 19, 23, 38, 41])
result_json.append([5, 11, 19, 24, 40, 45])
result_json.append([9, 16, 18, 23, 35, 43])
result_json.append([7, 13, 19, 28, 33, 44])
result_json.append([7, 11, 18, 29, 37, 42])
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
no = BallFilter(lottoHistoryFileName).getNextNo(ymd)
print("회차: {}".format(no))
predict_start_ts = time.time()
deadline_ts = predict_start_ts + self.PREDICT_TIMEOUT_SECONDS
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = sorted(p_ball[1:7])
# 기본/강화/완화 단계별 ruleset
base_ruleset = self._get_base_ruleset()
tighten_rulesets = [
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": True,
"ban_triples_legacy": True,
"all_in_previous7": True,
"previous_neighbors": True,
},
allowed_overrides={
"ac_value": [8, 9],
"uniq_last_digit_count": [4, 5],
"even_count": [2, 3, 4],
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": True,
"ban_triples_legacy": True,
"all_in_previous7": True,
"previous_neighbors": True,
},
allowed_overrides={
"ac_value": [8, 9],
"uniq_last_digit_count": [4, 5],
"even_count": [2, 3, 4],
"sum": [112, 114, 121, 123, 126, 127, 131, 132, 138, 146, 148],
"sum_prev_diff": [13, 14, 17, 18, 26, 28, 29, 30, 32, 39, 40],
},
),
]
relax_rulesets = [
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
"previous_neighbors": False,
"all_in_previous7": False,
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
"previous_neighbors": False,
"all_in_previous7": False,
"weeks_8_count": False,
"weeks_12_count": False,
"weeks_16_count": False,
"weeks_20_count": False,
},
),
]
min_survivors = self.TARGET_MIN_SURVIVORS
max_survivors = self.TARGET_MAX_SURVIVORS
chosen = []
stage_name = "base"
current_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=base_ruleset,
stop_when_gt=max_survivors,
stage_name="base",
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
current = current_info["candidates"]
if current_info["timed_out"]:
chosen = self._finalize_on_timeout(current, p_ball, min_survivors, max_survivors)
stage_name = "base_timeout_fallback"
print("candidate_stage: {}, survivors: {}".format(stage_name, len(chosen)))
for ball in chosen:
result_json.append(ball)
return p_no, p_ball
if min_survivors <= len(current) <= max_survivors:
chosen = current
elif len(current) > max_survivors:
chosen = current
stage_name = "base_overflow"
for idx, rs in enumerate(tighten_rulesets, start=1):
t_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=rs,
stop_when_gt=max_survivors,
stage_name="tighten_{}".format(idx),
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
t = t_info["candidates"]
if t_info["timed_out"]:
chosen = self._finalize_on_timeout(t, p_ball, min_survivors, max_survivors)
stage_name = "tighten_{}_timeout_fallback".format(idx)
break
if min_survivors <= len(t) <= max_survivors:
chosen = t
stage_name = "tighten_{}".format(idx)
break
if len(t) <= max_survivors:
chosen = t
stage_name = "tighten_{}".format(idx)
if len(chosen) > max_survivors:
# 상한 가드 강제 적용: 품질 점수 상위 N개만 사용
full_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=tighten_rulesets[-1],
stop_when_gt=None,
stage_name="tighten_full_rank",
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
full_for_ranking = full_info["candidates"]
if full_info["timed_out"]:
chosen = self._finalize_on_timeout(full_for_ranking, p_ball, min_survivors, max_survivors)
stage_name = "tighten_rank_timeout_fallback"
else:
chosen = self._rank_and_trim(full_for_ranking, p_ball, max_survivors)
stage_name = "tighten_rank_trim"
else:
chosen = current
stage_name = "base_underflow"
for idx, rs in enumerate(relax_rulesets, start=1):
# relax는 하한(min_survivors)만 채우면 충분하므로 조기 종료
r_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=rs,
stop_when_gt=None,
stop_when_gte=min_survivors,
stage_name="relax_{}".format(idx),
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
r = r_info["candidates"]
chosen = r
stage_name = "relax_{}".format(idx)
if r_info["timed_out"]:
chosen = self._finalize_on_timeout(r, p_ball, min_survivors, max_survivors)
stage_name = "relax_{}_timeout_fallback".format(idx)
break
if len(r) >= min_survivors:
break
if len(chosen) == 0:
# 0개 생존 방지: 가장 완화된 규칙에서도 0개면 직전 결과와 유사한 조합으로 최소 개수 확보
stage_name = "relax_zero_fallback"
chosen = self._fallback_candidates_from_prev(p_ball, min_survivors)
elif len(chosen) < min_survivors:
# 하한 가드: 부족분은 완화 후보/고정 후보 기반으로 보강
stage_name = "{}_fill".format(stage_name)
fill = self._fallback_candidates_from_prev(p_ball, min_survivors - len(chosen), exclude=set(tuple(x) for x in chosen))
chosen.extend(fill)
print("candidate_stage: {}, survivors: {}".format(stage_name, len(chosen)))
for ball in chosen:
result_json.append(ball)
return p_no, p_ball
def _get_base_ruleset(self):
history_json = os.path.join(self.resources_path, "lotto_history.json")
base_filter = BallFilter(history_json)
return copy.deepcopy(base_filter.m1.ruleset)
def _build_ruleset(self, base_ruleset, enabled_overrides=None, allowed_overrides=None):
ruleset = copy.deepcopy(base_ruleset)
ruleset.setdefault("filters", {})
enabled_overrides = enabled_overrides or {}
allowed_overrides = allowed_overrides or {}
for key, value in enabled_overrides.items():
ruleset["filters"].setdefault(key, {})
ruleset["filters"][key]["enabled"] = bool(value)
for key, values in allowed_overrides.items():
ruleset["filters"].setdefault(key, {})
ruleset["filters"][key]["enabled"] = True
ruleset["filters"][key]["allowed"] = list(values)
return ruleset
def _collect_candidates(
self,
candidates,
no,
df_ball,
ruleset,
stop_when_gt=None,
stop_when_gte=None,
stage_name="base",
predict_start_ts=None,
deadline_ts=None,
):
lottoHistoryFileName = os.path.join(self.resources_path, "lotto_history.json")
ballFilter = BallFilter(lottoHistoryFileName, ruleset=ruleset)
result = []
last_idx = 0
for idx, ball in enumerate(itertools.combinations(candidates, 6), start=1):
last_idx = idx
if deadline_ts is not None and deadline_ts <= time.time():
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
print(" - [{}] timeout after {:,} processed (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
return {
"candidates": result,
"timed_out": True,
"processed": idx,
}
if idx % 1000000 == 0:
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
print(" - [{}] {:,} processed... (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
b = list(ball)
if len(ballFilter.filter(ball=b, no=no, until_end=False, df=df_ball)) == 0:
result.append(b)
if stop_when_gt is not None and len(result) > stop_when_gt:
return {
"candidates": result,
"timed_out": False,
"processed": idx,
}
if stop_when_gte is not None and len(result) >= stop_when_gte:
return {
"candidates": result,
"timed_out": False,
"processed": idx,
}
return {
"candidates": result,
"timed_out": False,
"processed": last_idx,
}
def _finalize_on_timeout(self, partial_candidates, prev_ball, min_survivors, max_survivors):
chosen = list(partial_candidates)
if len(chosen) > max_survivors:
chosen = self._rank_and_trim(chosen, prev_ball, max_survivors)
elif len(chosen) < min_survivors:
fill = self._fallback_candidates_from_prev(
prev_ball,
min_survivors - len(chosen),
exclude=set(tuple(x) for x in chosen),
)
chosen.extend(fill)
return chosen
def _rank_and_trim(self, candidates, prev_ball, limit):
scored = [(self._score_candidate(ball, prev_ball), ball) for ball in candidates]
scored.sort(key=lambda x: x[0])
return [ball for _, ball in scored[:limit]]
def _score_candidate(self, ball, prev_ball):
sum_diff = abs(sum(ball) - sum(prev_ball))
even_cnt = len([x for x in ball if x % 2 == 0])
uniq_last = len(set([x % 10 for x in ball]))
contiguous_penalty = 0
s = sorted(ball)
for i in range(1, len(s)):
if s[i] - s[i - 1] == 1:
contiguous_penalty += 1
score = 0
score += sum_diff
score += abs(even_cnt - 3) * 2
score += abs(uniq_last - 5) * 2
score += contiguous_penalty
return score
def _fallback_candidates_from_prev(self, prev_ball, need_count, exclude=None):
exclude = exclude or set()
seed = sorted(prev_ball)
out = []
delta_patterns = [
(0, 0, 0, 0, 0, 0),
(-1, 0, 0, 0, 0, 1),
(0, -1, 0, 0, 1, 0),
(0, 0, -1, 1, 0, 0),
(-2, 0, 0, 0, 0, 2),
(0, -2, 0, 0, 2, 0),
(0, 0, -2, 2, 0, 0),
(-1, -1, 0, 0, 1, 1),
(1, 0, -1, 0, 0, 0),
(0, 1, 0, -1, 0, 0),
(1, -1, 1, -1, 1, -1),
(-1, 1, -1, 1, -1, 1),
]
shift = 0
while len(out) < need_count and shift <= 8:
for delta in delta_patterns:
cand = [seed[i] + delta[i] for i in range(6)]
cand = [min(45, max(1, v + shift)) for v in cand]
cand = sorted(cand)
if len(set(cand)) != 6:
continue
t = tuple(cand)
if t in exclude:
continue
exclude.add(t)
out.append(cand)
if len(out) >= need_count:
break
shift += 1
return out
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

View File

@@ -1,189 +0,0 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
from DataCrawler import DataCrawler
import json
import os
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from filter_model_3 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
def __init__(self, resources_path):
self.bot = TelegramBot()
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6, 7, 10, 11, 20, 45])
result_json.append([2, 7, 17, 28, 35, 39])
result_json.append([6, 10, 19, 25, 33, 35])
result_json.append([3, 17, 20, 24, 35, 45])
result_json.append([5, 15, 18, 29, 36, 41])
result_json.append([6, 15, 20, 23, 37, 43])
result_json.append([8, 15, 19, 23, 38, 41])
result_json.append([5, 11, 19, 24, 40, 45])
result_json.append([9, 16, 18, 23, 35, 43])
result_json.append([7, 13, 19, 28, 33, 44])
result_json.append([7, 11, 18, 29, 37, 42])
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNextNo(ymd)
print("회차: {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
result_json.append(ball)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = p_ball[1:7]
return p_no, p_ball
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

View File

@@ -1216,3 +1216,4 @@
{"returnValue": "success", "drwNoDate": "2026-03-21", "drwNo": 1216, "drwtNo1": 3, "drwtNo2": 10, "drwtNo3": 14, "drwtNo4": 15, "drwtNo5": 23, "drwtNo6": 24, "bnusNo": 25} {"returnValue": "success", "drwNoDate": "2026-03-21", "drwNo": 1216, "drwtNo1": 3, "drwtNo2": 10, "drwtNo3": 14, "drwtNo4": 15, "drwtNo5": 23, "drwtNo6": 24, "bnusNo": 25}
{"returnValue": "success", "drwNoDate": "2026-03-28", "drwNo": 1217, "drwtNo1": 8, "drwtNo2": 10, "drwtNo3": 15, "drwtNo4": 20, "drwtNo5": 29, "drwtNo6": 31, "bnusNo": 41} {"returnValue": "success", "drwNoDate": "2026-03-28", "drwNo": 1217, "drwtNo1": 8, "drwtNo2": 10, "drwtNo3": 15, "drwtNo4": 20, "drwtNo5": 29, "drwtNo6": 31, "bnusNo": 41}
{"returnValue": "success", "drwNoDate": "2026-04-04", "drwNo": 1218, "drwtNo1": 3, "drwtNo2": 28, "drwtNo3": 31, "drwtNo4": 32, "drwtNo5": 42, "drwtNo6": 45, "bnusNo": 25} {"returnValue": "success", "drwNoDate": "2026-04-04", "drwNo": 1218, "drwtNo1": 3, "drwtNo2": 28, "drwtNo3": 31, "drwtNo4": 32, "drwtNo5": 42, "drwtNo6": 45, "bnusNo": 25}
{"returnValue": "success", "drwNoDate": "2026-04-11", "drwNo": 1219, "drwtNo1": 1, "drwtNo2": 2, "drwtNo3": 15, "drwtNo4": 28, "drwtNo5": 39, "drwtNo6": 45, "bnusNo": 31}

View File

@@ -1216,3 +1216,4 @@
1216,3,10,14,15,23,24,25 1216,3,10,14,15,23,24,25
1217,8,10,15,20,29,31,41 1217,8,10,15,20,29,31,41
1218,3,28,31,32,42,45,25 1218,3,28,31,32,42,45,25
1219,1,2,15,28,39,45,31

View File

@@ -1,99 +0,0 @@
import os
import time
import datetime
import pandas as pd
import itertools
from filter_model_1 import BallFilter
class FilterTestReview:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def validate(self, df_ball, nos=None):
win_history = {}
win_history_size = {}
for no in nos:
print("[{} 회차]".format(no))
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = balls[1:7].copy() # copy()로 복사
bonus = balls[7]
final_candidates = []
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
final_candidates.append(ball)
match = len(set(ball) & set(answer))
if match == 6:
if no not in win_history: # 중복 방지
win_history[no] = answer.copy() # copy()로 복사
if ball not in win_dic[1]: # 같은 조합 중복 방지
win_dic[1].append(ball.copy()) # copy()로 복사
else:
if match == 3:
win_dic[5].append(ball)
elif match == 4:
win_dic[4].append(ball)
elif match == 5:
# 2등 판별: 5개 맞고 보너스 번호 포함
if bonus in ball:
win_dic[2].append(ball)
else:
win_dic[3].append(ball)
win_history_size[no] = len(final_candidates)
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
return win_history, win_history_size
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filterTestReview = FilterTestReview(resources_path)
start = time.time()
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
win_history, win_history_size = filterTestReview.validate(
df_ball,
#nos=range(1126, 21, -1),
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print("{} 회 당첨".format(len(win_history)))
sorted_win_history = sorted(win_history.keys())
for i in range(len(sorted_win_history)):
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))

View File

@@ -1,99 +0,0 @@
import os
import time
import datetime
import pandas as pd
import itertools
from filter_model_2 import BallFilter
class FilterTestReview:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def validate(self, df_ball, nos=None):
win_history = {}
win_history_size = {}
for no in nos:
print("[{} 회차]".format(no))
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = balls[1:7].copy() # copy()로 복사
bonus = balls[7]
final_candidates = []
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
final_candidates.append(ball)
match = len(set(ball) & set(answer))
if match == 6:
if no not in win_history: # 중복 방지
win_history[no] = answer.copy() # copy()로 복사
if ball not in win_dic[1]: # 같은 조합 중복 방지
win_dic[1].append(ball.copy()) # copy()로 복사
else:
if match == 3:
win_dic[5].append(ball)
elif match == 4:
win_dic[4].append(ball)
elif match == 5:
# 2등 판별: 5개 맞고 보너스 번호 포함
if bonus in ball:
win_dic[2].append(ball)
else:
win_dic[3].append(ball)
win_history_size[no] = len(final_candidates)
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
return win_history, win_history_size
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filterTestReview = FilterTestReview(resources_path)
start = time.time()
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
win_history, win_history_size = filterTestReview.validate(
df_ball,
#nos=range(1126, 21, -1),
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print("{} 회 당첨".format(len(win_history)))
sorted_win_history = sorted(win_history.keys())
for i in range(len(sorted_win_history)):
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))

View File

@@ -1,99 +0,0 @@
import os
import time
import datetime
import pandas as pd
import itertools
from filter_model_3 import BallFilter
class FilterTestReview:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def validate(self, df_ball, nos=None):
win_history = {}
win_history_size = {}
for no in nos:
print("[{} 회차]".format(no))
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = balls[1:7].copy() # copy()로 복사
bonus = balls[7]
final_candidates = []
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
final_candidates.append(ball)
match = len(set(ball) & set(answer))
if match == 6:
if no not in win_history: # 중복 방지
win_history[no] = answer.copy() # copy()로 복사
if ball not in win_dic[1]: # 같은 조합 중복 방지
win_dic[1].append(ball.copy()) # copy()로 복사
else:
if match == 3:
win_dic[5].append(ball)
elif match == 4:
win_dic[4].append(ball)
elif match == 5:
# 2등 판별: 5개 맞고 보너스 번호 포함
if bonus in ball:
win_dic[2].append(ball)
else:
win_dic[3].append(ball)
win_history_size[no] = len(final_candidates)
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
return win_history, win_history_size
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filterTestReview = FilterTestReview(resources_path)
start = time.time()
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
win_history, win_history_size = filterTestReview.validate(
df_ball,
#nos=range(1126, 21, -1),
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print("{} 회 당첨".format(len(win_history)))
sorted_win_history = sorted(win_history.keys())
for i in range(len(sorted_win_history)):
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))

236
test_1.py
View File

@@ -1,236 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_1 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
for i in range(len(df_ball) - 1, -1, -1):
no = df_ball['no'].iloc[i]
no = int(no)
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
)
parser.add_argument("--start-no", type=int, default=1001)
parser.add_argument("--end-no", type=int, default=1204)
args = parser.parse_args()
resources_path = args.resources
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

236
test_2.py
View File

@@ -1,236 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_2 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
for i in range(len(df_ball) - 1, -1, -1):
no = df_ball['no'].iloc[i]
no = int(no)
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
)
parser.add_argument("--start-no", type=int, default=1001)
parser.add_argument("--end-no", type=int, default=1204)
args = parser.parse_args()
resources_path = args.resources
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

236
test_3.py
View File

@@ -1,236 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_3 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
for i in range(len(df_ball) - 1, -1, -1):
no = df_ball['no'].iloc[i]
no = int(no)
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
)
parser.add_argument("--start-no", type=int, default=1001)
parser.add_argument("--end-no", type=int, default=1204)
args = parser.parse_args()
resources_path = args.resources
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

View File

@@ -1,405 +0,0 @@
#!/usr/bin/env python3
"""
학습 구간(1~800회) 당첨번호로 final_BallFilter.extract_final_candidates 에 쓸 허용 집합을 계산합니다.
표준 라이브러리 + pandas(df 호환)만 사용합니다.
"""
from __future__ import annotations
import csv
import re
from collections import defaultdict
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
HISTORY = ROOT / "resources" / "lotto_history.txt"
BALLFILTER_SRC = ROOT / "BallFilter_25.py"
OUT = ROOT / "final_filter_params.py"
TRAIN_LO = 1
TRAIN_HI = 800
# 학습 분포에서 너무 넓은 합집합(union)을 피하기 위해 고유값 기준 백분위 밴드 후,
# 각 회차 특성값이 밴드 밖이면 해당 값을 다시 포함(학습 당첨 100% 커버).
# 좁을수록 필터가 강해짐. 학습·검증 균형은 이 값과 final_filterTest.py 결과로 조정.
PCT_LO = 8
PCT_HI = 92
PRIME = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43}
COMPOSITE = {4, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45}
def load_draws():
rows = []
with open(HISTORY, newline="", encoding="utf-8") as f:
for p in csv.reader(f):
if not p:
continue
no = int(p[0])
balls = sorted(int(x) for x in p[1:7])
rows.append((no, balls))
rows.sort(key=lambda x: x[0])
return {no: b for no, b in rows}
def get_ac(ball):
ac = set()
for i in range(5, -1, -1):
for j in range(i - 1, -1, -1):
ac.add(ball[i] - ball[j])
return len(ac) - (6 - 1)
def interval_sum(ball):
return sum(ball[i] - ball[i - 1] for i in range(1, 6))
def first_letter_sum(ball):
acc = [str(b)[0] for b in ball if len(str(b)) == 2]
return sum(int(x) for x in acc)
def last_letter_sum(ball):
acc = [str(b)[1] for b in ball if len(str(b)) == 2] + [str(b) for b in ball if len(str(b)) == 1]
return sum(int(x) for x in acc)
def uniq_end_digits(ball):
return len({b % 10 for b in ball})
def high_low(ball):
low = sum(1 for b in ball if b < 23)
high = sum(1 for b in ball if 23 < b)
return low, high
def section10_count(ball):
section = set()
for b in ball:
section.add(int(b / 10))
return len(section)
def count_mult(ball, m):
return sum(1 for b in ball if b % m == 0)
def continus_max(ball):
w = ball
best = 1
run = 1
for i in range(1, 6):
if w[i] == w[i - 1] + 1:
run += 1
best = max(best, run)
else:
run = 1
return best
def weeks_freq(draws_map, answer, no, week):
s = set()
for w in range(1, week + 1):
prev_no = no - w
if prev_no not in draws_map:
continue
for b in draws_map[prev_no]:
s.add(b)
return sum(1 for b in answer if b in s)
def pct_band_unique(values, lo=PCT_LO, hi=PCT_HI):
"""고유값 정렬 후 백분위 구간에 들어가는 값만 유지. 고유 개수가 적으면 전부 유지."""
if not values:
return set()
u = sorted(set(values))
if len(u) <= 6:
return set(u)
n = len(u)
il = int((lo / 100.0) * (n - 1))
ih = int((hi / 100.0) * (n - 1))
low_b, high_b = u[il], u[ih]
return {x for x in u if low_b <= x <= high_b}
def parse_pair_triple_rules():
"""BallFilter_25.filterPairBall / filterTriplePairBall 에서 규칙 추출."""
text = BALLFILTER_SRC.read_text(encoding="utf-8")
pairs = []
for m in re.finditer(r"len\(set_ball & \{([^}]+)\}\) == 2", text):
parts = [int(x.strip()) for x in m.group(1).split(",")]
if len(parts) == 2:
pairs.append(frozenset(parts))
triples = []
for m in re.finditer(r"len\(set_ball & \{([^}]+)\}\) == 3", text):
parts = [int(x.strip()) for x in m.group(1).split(",")]
if len(parts) == 3:
triples.append(frozenset(parts))
return pairs, triples
def main():
draws = load_draws()
pair_rules, triple_rules = parse_pair_triple_rules()
train_draws = {n: draws[n] for n in range(TRAIN_LO, TRAIN_HI + 1) if n in draws}
# 블랙리스트: 학습 당첨 6개에 함께 등장한 쌍/삼은 제외(당첨을 막지 않음)
train_pairs_seen = set()
train_triples_seen = set()
for b in train_draws.values():
for i in range(6):
for j in range(i + 1, 6):
train_pairs_seen.add(frozenset((b[i], b[j])))
for i in range(6):
for j in range(i + 1, 6):
for k in range(j + 1, 6):
train_triples_seen.add(frozenset((b[i], b[j], b[k])))
pair_block = [p for p in pair_rules if p not in train_pairs_seen]
triple_block = [t for t in triple_rules if t not in train_triples_seen]
sets = defaultdict(set)
flags_prev = {"need_relax_previous": False, "need_relax_prev7": False}
for no in range(2, TRAIN_HI + 1):
if no not in draws or (no - 1) not in draws:
continue
ball = draws[no]
p_ball = draws[no - 1]
s = sum(ball)
sets["sum6"].add(s)
sets["sum6_diff"].add(abs(s - sum(p_ball)))
avg = s // 6
pavg = sum(p_ball) // 6
sets["avg6"].add(avg)
sets["avg6_diff"].add(abs(avg - pavg))
s3f = ball[0] + ball[1] + ball[2]
ps3f = p_ball[0] + p_ball[1] + p_ball[2]
sets["sum3f"].add(s3f)
sets["sum3f_diff"].add(abs(s3f - ps3f))
s3b = ball[3] + ball[4] + ball[5]
ps3b = p_ball[3] + p_ball[4] + p_ball[5]
sets["sum3b"].add(s3b)
sets["sum3b_diff"].add(abs(s3b - ps3b))
l, h = high_low(ball)
sets["hl_allowed"].add((l, h))
gh = ball[0] + ball[5]
pgh = p_ball[0] + p_ball[5]
sets["go_sum"].add(gh)
sets["go_sum_diff"].add(abs(gh - pgh))
iv = interval_sum(ball)
piv = interval_sum(p_ball)
sets["interval"].add(iv)
sets["interval_diff"].add(abs(iv - piv))
fl = first_letter_sum(ball)
pfl = first_letter_sum(p_ball)
sets["first_letter"].add(fl)
sets["first_letter_diff"].add(abs(fl - pfl))
ll = last_letter_sum(ball)
pll = last_letter_sum(p_ball)
sets["last_letter"].add(ll)
sets["last_letter_diff"].add(abs(ll - pll))
sets["b0"].add(ball[0])
sets["b0_diff"].add(abs(ball[0] - p_ball[0]))
sets["b5"].add(ball[5])
sets["b5_diff"].add(abs(ball[5] - p_ball[5]))
sets["uniq_end"].add(uniq_end_digits(ball))
sets["uniq_end_diff"].add(abs(uniq_end_digits(ball) - uniq_end_digits(p_ball)))
ac = get_ac(ball)
pac = get_ac(p_ball)
sets["ac"].add(ac)
sets["ac_diff"].add(abs(ac - pac))
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
sets[f"mul{m}"].add(count_mult(ball, m))
sets[f"mul{m}_diff"].add(abs(count_mult(ball, m) - count_mult(p_ball, m)))
pn = len(set(ball) & PRIME)
sets["prime_n"].add(pn)
cn = len(set(ball) & COMPOSITE)
sets["composite_n"].add(cn)
sets["composite_diff"].add(abs(cn - len(set(p_ball) & COMPOSITE)))
ev = sum(1 for b in ball if b % 2 == 0)
pev = sum(1 for b in p_ball if b % 2 == 0)
sets["even_n"].add(ev)
sets["even_diff"].add(abs(ev - pev))
sc = section10_count(ball)
psc = section10_count(p_ball)
sets["sec10"].add(sc)
sets["sec10_diff"].add(abs(sc - psc))
for wk in (8, 12, 16, 20):
ex = weeks_freq(draws, ball, no, wk)
pex = weeks_freq(draws, p_ball, no, wk)
sets[f"w{wk}"].add(ex)
sets[f"w{wk}_diff"].add(abs(ex - pex))
sets["continus_max"].add(continus_max(ball))
# filterPreviousNumber (원본과 동일)
pb_set = set(p_ball)
bad_prev = True
for i in range(6):
bi = ball[i]
if bi in pb_set or bi - 1 in pb_set or bi + 1 in pb_set:
bad_prev = False
break
if bad_prev:
flags_prev["need_relax_previous"] = True
# filterAllPreivous7
pb7 = set()
for i in range(no - 1, no - 8, -1):
if i in draws:
for x in draws[i]:
pb7.add(x)
if len(set(ball) & pb7) == 6:
flags_prev["need_relax_prev7"] = True
# 백분위로 타이트닝 후 학습 각 회차 특성 보강
keys_numeric = [
"sum6",
"sum6_diff",
"avg6",
"avg6_diff",
"sum3f",
"sum3f_diff",
"sum3b",
"sum3b_diff",
"go_sum",
"go_sum_diff",
"interval",
"interval_diff",
"first_letter",
"first_letter_diff",
"last_letter",
"last_letter_diff",
"b0",
"b0_diff",
"b5",
"b5_diff",
"uniq_end",
"uniq_end_diff",
"ac",
"ac_diff",
"prime_n",
"composite_n",
"composite_diff",
"even_n",
"even_diff",
"sec10",
"sec10_diff",
]
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
keys_numeric.extend([f"mul{m}", f"mul{m}_diff"])
for wk in (8, 12, 16, 20):
keys_numeric.extend([f"w{wk}", f"w{wk}_diff"])
keys_numeric.append("continus_max")
for k in keys_numeric:
sets[k] = pct_band_unique(sets[k])
# 고저: (0,1)/(1,0) 만 제외하는 기존 로직 유지 + 학습에 나온 (l,h) 항상 허용
hl_skip = {(l, h) for l in (0, 1) for h in (0, 1)}
def emit():
lines = [
"# -*- coding: utf-8 -*-",
'"""학습 구간 {}~{}회 기준 자동 생성 — tools/compute_final_filter_params.py"""'.format(
TRAIN_LO, TRAIN_HI
),
"",
"TRAIN_RANGE = ({}, {})".format(TRAIN_LO, TRAIN_HI),
"DISABLE_FILTER_PREVIOUS_NUMBER = {}".format(
str(flags_prev["need_relax_previous"])
),
"DISABLE_FILTER_ALL_PREVIOUS_7 = {}".format(str(flags_prev["need_relax_prev7"])),
"",
]
def sset(name, key):
v = sets[key]
lines.append("{} = {}".format(name, repr(sorted(v))))
sset("ALLOW_SUM6", "sum6")
sset("ALLOW_SUM6_DIFF", "sum6_diff")
sset("ALLOW_AVG6", "avg6")
sset("ALLOW_AVG6_DIFF", "avg6_diff")
sset("ALLOW_SUM3F", "sum3f")
sset("ALLOW_SUM3F_DIFF", "sum3f_diff")
sset("ALLOW_SUM3B", "sum3b")
sset("ALLOW_SUM3B_DIFF", "sum3b_diff")
lines.append("HL_SKIP = {}".format(repr(sorted(hl_skip))))
lines.append("HL_SEEN = {}".format(repr(sorted(sets['hl_allowed']))))
sset("ALLOW_GO_SUM", "go_sum")
sset("ALLOW_GO_SUM_DIFF", "go_sum_diff")
sset("ALLOW_INTERVAL", "interval")
sset("ALLOW_INTERVAL_DIFF", "interval_diff")
sset("ALLOW_FIRST_LETTER", "first_letter")
sset("ALLOW_FIRST_LETTER_DIFF", "first_letter_diff")
sset("ALLOW_LAST_LETTER", "last_letter")
sset("ALLOW_LAST_LETTER_DIFF", "last_letter_diff")
sset("ALLOW_B0", "b0")
sset("ALLOW_B0_DIFF", "b0_diff")
sset("ALLOW_B5", "b5")
sset("ALLOW_B5_DIFF", "b5_diff")
sset("ALLOW_UNIQ_END", "uniq_end")
sset("ALLOW_UNIQ_END_DIFF", "uniq_end_diff")
sset("ALLOW_AC", "ac")
sset("ALLOW_AC_DIFF", "ac_diff")
for m in (3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 17, 19, 23):
sset("ALLOW_MUL{}".format(m), "mul{}".format(m))
sset("ALLOW_MUL{}_DIFF".format(m), "mul{}_diff".format(m))
sset("ALLOW_PRIME_N", "prime_n")
sset("ALLOW_COMPOSITE_N", "composite_n")
sset("ALLOW_COMPOSITE_DIFF", "composite_diff")
sset("ALLOW_EVEN_N", "even_n")
sset("ALLOW_EVEN_DIFF", "even_diff")
sset("ALLOW_SEC10", "sec10")
sset("ALLOW_SEC10_DIFF", "sec10_diff")
for wk in (8, 12, 16, 20):
sset("ALLOW_W{}".format(wk), "w{}".format(wk))
sset("ALLOW_W{}_DIFF".format(wk), "w{}_diff".format(wk))
sset("ALLOW_CONTINUS_MAX", "continus_max")
lines.append("PAIR_BLOCKLIST = {}".format(repr([sorted(list(x)) for x in pair_block])))
lines.append("TRIPLE_BLOCKLIST = {}".format(repr([sorted(list(x)) for x in triple_block])))
lines.extend(["", "# frozenset 캐시", ""])
allow_names = []
for line in list(lines):
if line.startswith("ALLOW_") and " = " in line:
name = line.split(" = ")[0]
allow_names.append(name)
for name in allow_names:
short = name.replace("ALLOW_", "", 1)
lines.append("_F_{} = frozenset({})".format(short, name))
lines.append("_F_HL_SEEN = frozenset(HL_SEEN)")
lines.append("")
return "\n".join(lines) + "\n"
OUT.write_text(emit(), encoding="utf-8")
print("Wrote", OUT)
print("pair rules:", len(pair_rules), "-> block", len(pair_block))
print("triple rules:", len(triple_rules), "-> block", len(triple_block))
print("DISABLE_FILTER_PREVIOUS_NUMBER", flags_prev["need_relax_previous"])
print("DISABLE_FILTER_ALL_PREVIOUS_7", flags_prev["need_relax_prev7"])
if __name__ == "__main__":
main()

View File

@@ -1,231 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_1 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
for i in range(len(df_ball)-1, 19, -1):
no = df_ball['no'].iloc[i]
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=1)
parser.add_argument("--end-no", type=int, default=800)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

View File

@@ -1,231 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_2 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
for i in range(len(df_ball)-1, 19, -1):
no = df_ball['no'].iloc[i]
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=1)
parser.add_argument("--end-no", type=int, default=800)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

View File

@@ -1,231 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_3 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
for i in range(len(df_ball)-1, 19, -1):
no = df_ball['no'].iloc[i]
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=1)
parser.add_argument("--end-no", type=int, default=800)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

View File

@@ -1,234 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_1 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# validation should use full history for previous-draw/window features
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# evaluate only requested range, but allow df_ball to contain full history
for i in range(len(df_ball) - 1, -1, -1):
no = int(df_ball['no'].iloc[i])
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=801)
parser.add_argument("--end-no", type=int, default=1000)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

View File

@@ -1,234 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_2 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# validation should use full history for previous-draw/window features
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# evaluate only requested range, but allow df_ball to contain full history
for i in range(len(df_ball) - 1, -1, -1):
no = int(df_ball['no'].iloc[i])
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=801)
parser.add_argument("--end-no", type=int, default=1000)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

View File

@@ -1,234 +0,0 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_3 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# validation should use full history for previous-draw/window features
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# evaluate only requested range, but allow df_ball to contain full history
for i in range(len(df_ball) - 1, -1, -1):
no = int(df_ball['no'].iloc[i])
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=801)
parser.add_argument("--end-no", type=int, default=1000)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개