Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
dsyoon
2026-02-25 18:32:11 +09:00
commit c611b400ae
40 changed files with 24532 additions and 0 deletions

101
.gitignore vendored Normal file
View File

@@ -0,0 +1,101 @@
# ---> Python
.idea
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
# macOS metadata
.DS_Store

216
1_FilterTest_22.py Normal file
View File

@@ -0,0 +1,216 @@
import os
import pandas as pd
import itertools
from BallFilter_22 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def find_filter_method(self, df_ball, filter_ball=None):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
for i in range(len(df_ball)-1, 19, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
resources_path = 'resources'
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path)
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, filter_ball)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

216
1_FilterTest_25.py Normal file
View File

@@ -0,0 +1,216 @@
import os
import pandas as pd
import itertools
from BallFilter_25 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def find_filter_method(self, df_ball, filter_ball=None):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
for i in range(len(df_ball)-1, 19, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
resources_path = 'resources'
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path)
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, filter_ball)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

94
2_FilterTestReview_22.py Normal file
View File

@@ -0,0 +1,94 @@
import os
import time
import datetime
import pandas as pd
import itertools
from BallFilter_22 import BallFilter
class FilterTestReview:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print("[{} 회차]".format(no))
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = balls[1:7]
bonus = balls[7]
final_candidates = []
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
final_candidates.append(ball)
match = len(set(ball) & set(answer))
if match == 6:
win_history[no] = answer
win_dic[1].append(ball)
else:
if match == 3:
win_dic[5].append(ball)
elif match == 4:
win_dic[4].append(ball)
elif match == 5:
answer.append(bonus)
if len(set(ball) & set(answer)) == 6:
win_dic[2].append(ball)
else:
win_dic[3].append(ball)
print("no: {}, answer: {}, size: {}".format(no, str(answer), len(final_candidates)))
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
return win_history
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filterTestReview = FilterTestReview(resources_path)
start = time.time()
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
win_history = filterTestReview.validate(
df_ball,
#nos=range(1126, 21, -1),
nos=[1165,1164,1163,1162,1161,1160,1159,1158,1157,1156,1155,1154,1153,1152,1151,1150,1149,1148,1147,1146])
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print("{} 회 당첨".format(len(win_history)))
sorted_win_history = sorted(win_history.keys())
for i in range(len(sorted_win_history)):
print("\t>{} > {}".format(sorted_win_history[i], str(win_history[sorted_win_history[i]])))

94
2_FilterTestReview_25.py Normal file
View File

@@ -0,0 +1,94 @@
import os
import time
import datetime
import pandas as pd
import itertools
from BallFilter_25 import BallFilter
class FilterTestReview:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print("[{} 회차]".format(no))
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = balls[1:7]
bonus = balls[7]
final_candidates = []
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
final_candidates.append(ball)
match = len(set(ball) & set(answer))
if match == 6:
win_history[no] = answer
win_dic[1].append(ball)
else:
if match == 3:
win_dic[5].append(ball)
elif match == 4:
win_dic[4].append(ball)
elif match == 5:
answer.append(bonus)
if len(set(ball) & set(answer)) == 6:
win_dic[2].append(ball)
else:
win_dic[3].append(ball)
print("no: {}, answer: {}, size: {}".format(no, str(answer), len(final_candidates)))
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
return win_history
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filterTestReview = FilterTestReview(resources_path)
start = time.time()
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
win_history = filterTestReview.validate(
df_ball,
#nos=range(1126, 21, -1),
nos=[1165,1164,1163,1162,1161,1160,1159,1158,1157,1156,1155,1154,1153,1152,1151,1150,1149,1148,1147,1146])
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print("{} 회 당첨".format(len(win_history)))
sorted_win_history = sorted(win_history.keys())
for i in range(len(sorted_win_history)):
print("\t>{} > {}".format(sorted_win_history[i], str(win_history[sorted_win_history[i]])))

192
3_Practice_22.py Normal file
View File

@@ -0,0 +1,192 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
import json
import os
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from DataCrawler import DataCrawler
from BallFilter_22 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
def __init__(self, resources_path):
self.bot = TelegramBot()
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6,7,10,11,20,45])
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNo(ymd)
print("회차: {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
result_json.append(ball)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = p_ball[1:7]
return p_no, p_ball
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
# 데이터 수집
dataCrawler = DataCrawler()
dataCrawler.excute(resources_path)
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
pass
last_json = json.loads(line)
ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
recommend_result_file = os.path.join(resources_path, "recommend_ball.biz_22.json")
if os.path.isfile(recommend_result_file):
result_fp = open(recommend_result_file, "r")
result_json = json.load(result_fp)
result_json[ymd] = []
else:
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
with open(recommend_result_file, 'w', encoding='utf-8') as outFp:
json.dump(result_json, outFp, ensure_ascii=False)
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#22]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
#print(p_str)
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

192
3_Practice_25.py Normal file
View File

@@ -0,0 +1,192 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
from DataCrawler import DataCrawler
import json
import os
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from BallFilter_25 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
def __init__(self, resources_path):
self.bot = TelegramBot()
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6,7,10,11,20,45])
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNextNo(ymd)
print("회차: {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
result_json.append(ball)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = p_ball[1:7]
return p_no, p_ball
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
# 데이터 수집
dataCrawler = DataCrawler()
dataCrawler.excute(resources_path)
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
recommend_result_file = os.path.join(resources_path, "recommend_ball.biz_25.json")
if os.path.isfile(recommend_result_file):
result_fp = open(recommend_result_file, "r")
result_json = json.load(result_fp)
result_json[ymd] = []
else:
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
with open(recommend_result_file, 'w', encoding='utf-8') as outFp:
json.dump(result_json, outFp, ensure_ascii=False)
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

4330
BallFilter_22.py Normal file

File diff suppressed because it is too large Load Diff

4453
BallFilter_25.py Normal file

File diff suppressed because it is too large Load Diff

115
DataCrawler.py Normal file
View File

@@ -0,0 +1,115 @@
# 웹 호출 라이브러리를 호출합니다.
import os
import pandas as pd
import time
from datetime import datetime, timedelta
import requests
# JSON 포맷을 다루기 위한 라이브러리를 호출합니다.
import json
from TelegramBot import TelegramBot
# 로또 데이터를 수집하기 위한 파이썬 클래스를 선언합니다.
class DataCrawler:
bot = None
# 클래스 생성자로 수집할 회차를 입력받습니다.
def __init__(self):
self.bot = TelegramBot()
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return
def excute(self, resource_path):
"""
# 가져올 로또 회차를 지정합니다.
sDrwNo = 915
eDrwNo = 947
for i in range (sDrwNo, eDrwNo+1):
# 로또 데이터를 수집하기 위한 함수를 호출합니다.
dataCrawler.crawl(lottoHistoryFile, i)
"""
lottoHistoryFile = os.path.join(resource_path, 'lotto_history')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y-%m-%d')
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
if 'returnValue' not in last_json or last_json['returnValue'] == 'fail':
return False
if last_json['drwNoDate'] == last_weekend:
self.bot.sendMsg("[Lottery Crawler] {} already existed..".format(last_weekend))
else:
self.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
self.bot.sendMsg("[Lottery Crawler] {} ({}) crawled..".format(last_weekend, last_json['drwNo'] + 1))
return True
if __name__ == "__main__":
PROJECT_HOME = '.'
resource_path = os.path.join(PROJECT_HOME, 'resources')
# 로또 데이터를 수집하기 위한 파이썬 클래스를 지정합니다.
dataCrawler = DataCrawler()
dataCrawler.excute(resource_path)

112
PROMPT.txt Normal file
View File

@@ -0,0 +1,112 @@
데이터는 다음과 같습니다.
(학습 데이터)
- train.json, train.txt
- 1회차부터 800회차
(검증 데이터)
- valid.json, valid.txt
- 801회차부터 1000회차
파일 구조를 먼저 이해하세요.
- train.py, valid.py는 각 데이터에 대한 당첨 여부 확인입니다. filter_model.py를 이용하여 그 동안 몇회 당첨이 되었는지와 어떤 규칙으로 탈락이 되었는지를 분석해 줍니다.
로또 추출 로직은 다음 파일입니다.
- filter_model.py
(filter_model의 필터 설명)
- filterPairBall는 함께 나오지 않을 만한 2개 숫자에 대한 조합입니다.
- filterTriplePairBall는 함께 나오지 않을 만한 3개 숫자에 대한 조합입니다.
- filterPatternInPaper1 ~ filterPatternInPaper6까지 함수는 용지 영역에서 함께 나오지 않을 법한 숫자 조합입니다.
- 그 외 숫자 합, 숫자 평균, AC 값 등 로직 포함
(요구사항)
- 100회차에 최소 1회 당첨 필터 로직 만드는 것입니다.
- filter_model.py를 최적화 해야 합니다.
-- 만약 불필요한 로직이 있다면 제거해도 좋습니다.
-- filterPairBall과 filterTriplePairBall, filterPatternInPaper1 ~ filterPatternInPaper6의 필터도 학습 데이터를 분석해서 최적화 해주세요.
-- 다음의 다른 모든 필터 로직도 재설계 및 최적화 해주세요...
--- 당첨번호 6개 합
--- 당첨번호 6개 합에 대한 전주와 차이
--- 당첨번호 6개 평균
--- 당첨번호 6개 평균에 대한 전주와 차이
--- 앞 3개 볼의 합
--- 앞 3개 볼의 합에 대한 전주와 차이
--- 뒤 3개 볼의 합
--- 뒤 3개 볼의 합에 대한 전주와 차이
--- 23기준 작은 숫자 개수와 큰 숫자 개수
--- 고저합 (가장 큰 수와 가작 작은 숫자의 합)
--- 고저합 (가장 큰 수와 가작 작은 숫자의 합)에 대한 전주와 차이
--- ball 간격의 합
--- ball 간격의 합에 대한 전주와 차이
--- 두 자리 중 첫자리 숫자의 합 (예, 8은 0, 15는 1, 28은 2)
--- 두 자리 중 첫자리 숫자의 합 (예, 8은 0, 15는 1, 28은 2)에 대한 전주와 차이
--- 두 자리 중 두번째 자리 숫자의 합 (예, 8은 8, 15는 5, 27은 7)
--- 두 자리 중 두번째 자리 숫자의 합 (예, 8은 8, 15는 5, 27은 7)에 대한 전주와 차이
--- 첫번째 숫자
--- 첫번째 숫자에 대한 전주와 차이
--- 마지막 숫자
--- 마지막 숫자에 대한 전주와 차이
--- Uniq한 끝자리 숫자 개수 ([8, 18, 22, 31, 40, 44]는 8, 2, 1, 0, 4 이렇게 5개임)
--- Uniq한 끝자리 숫자 개수 ([8, 18, 22, 31, 40, 44]는 8, 2, 1, 0, 4 이렇게 5개임)에 대한 전주와 차이
--- AC 값
--- AC 값에 대한 전주와 차이
--- 3의 배수의 개수
--- 3의 배수의 개수에 대한 전주와 차이
--- 4의 배수의 개수
--- 4의 배수의 개수에 대한 전주와 차이
--- 5의 배수의 개수
--- 5의 배수의 개수에 대한 전주와 차이
--- 6의 배수의 개수
--- 6의 배수의 개수에 대한 전주와 차이
--- 7의 배수의 개수
--- 7의 배수의 개수에 대한 전주와 차이
--- 8의 배수의 개수
--- 8의 배수의 개수에 대한 전주와 차이
--- 9의 배수의 개수
--- 9의 배수의 개수에 대한 전주와 차이
--- 10의 배수의 개수
--- 10의 배수의 개수에 대한 전주와 차이
--- 11의 배수의 개수
--- 11의 배수의 개수에 대한 전주와 차이
--- 13의 배수의 개수
--- 13의 배수의 개수에 대한 전주와 차이
--- 17의 배수의 개수
--- 17의 배수의 개수에 대한 전주와 차이
--- 19의 배수의 개수
--- 19의 배수의 개수에 대한 전주와 차이
--- 23의 배수의 개수
--- 23의 배수의 개수에 대한 전주와 차이
--- 소수 포함 개수
--- 복소수 포함 개수
--- 복소수 포함 개수에 대한 전주와 차이
--- 홀짝 개수
--- 홀짝 개수에 대한 전주와 차이
--- 용지에 안나올 것 같은 마킹 위치 (filterPatternInPaper1~filterPatternInPaper6)
--- 전회차와 주어진 볼과 전후 볼
--- 공 6개가 1번대, 10번대, 20번대, 30번대, 40번대 중 공이 존재하는 구간의 개수
--- 공 6개가 1번대, 10번대, 20번대, 30번대, 40번대 중 공이 존재하는 구간의 개수에 대한 전주와 차이
--- 최근 8주간 모든 당첨번호에 포함되는 금주 번호 개수
--- 최근 8주간 모든 당첨번호에 포함되는 금주 번호 개수에 대한 전주와 차이
--- 최근 12주간 모든 당첨번호에 포함되는 금주 번호 개수
--- 최근 12주간 모든 당첨번호에 포함되는 금주 번호 개수에 대한 전주와 차이
--- 최근 16주간 모든 당첨번호에 포함되는 금주 번호 개수
--- 최근 16주간 모든 당첨번호에 포함되는 금주 번호 개수에 대한 전주와 차이
--- 최근 20주간 모든 당첨번호에 포함되는 금주 번호 개수
--- 최근 20주간 모든 당첨번호에 포함되는 금주 번호 개수에 대한 전주와 차이
--- 통계적으로 잘 나올 것 같지 않은 3개 공 조합 체크
--- 이전 7회차에서 안나온 값이 없는 경우
--- 하나의 당첨 번호에서 N개 연속된 숫자인지 체크
-- 최적화된 로직은 매우 엄격해야 합니다.
--- 매 회차에서 필터에 걸리지 않고 살아남는 조합의 개수는 300개 이내이면 좋겠습니다.
--- 301개 넘는 다고 중단하라는 의미는 아닙니다.
--- 가급적 필터에 많이 걸러서 적은 비용으로 구매할 수 있도록 해달라는 의미입니다.
- train 데이터를 이용하여 filter_model의 필터 로직을 만들고 valid 데이터로 실험을 하세요.
-- valid 데이터는 200회차가 존재함으로 최소 3회 이상 당첨이 되어야 합니다.
먼저 진행해야할 일에 대해서 생각하고 정리하세요.
그리고 요구사항에 대해서 시도 방법을 설계하세요.
그리고 반복적으로 실행해서 최적화된 방법을 찾아서 적용해주세요.
(최적화는 언제든 학습 데이터로 최적화를 해야 합니다. 그리고 검증 데이터로 테스트만 수행하세요.)

326
README.md Normal file
View File

@@ -0,0 +1,326 @@
# 실행 순서
* FilterFeature.py를 실행한다.
* lotto_history.json을 읽어서 all_filter_[1-100].[cluster,csv,feature] 파일을 생성한다.
* FilterFeatureCluster.py를 실행한다.
* 첫수는 1~10까지만 허용한다.
* random_state 전체 내 각 cluster에 대해서 당첨 회수를 파악하여 ./resources/cluster_win_info.csv 파일을 생성한다.
* 생성 파일
* filtertest_1.csv: random_state 내 cluster 개수를 파악한다.
* filtertest_2.csv: random_state 내 cluster 개수 별 전체 당첨 회수를 파악한다.
* filtertest_3.csv: random_state 내 cluster 개수 별 최초 당첨 번호만 파악한다.
* 실행할 random_state와 cluster 번호 파악
* filtertest_2.csv과 answer_pattern_analsys.xlsx을 이용하여 선별한다.
* cluster_info.json 파일 업데이트
* 실행할 random_state와 cluster 번호를 json 형태로 등록한다.
* Util_filegen.py 실행
* m1, amd, intel 컴퓨터에서 실행할 sh, bat 파일을 생성한다.
* 파이썬 내에서 아래 두 부분만 수정하면 된다.
* m1_file_max, amd_file_max, intel_file_max = 8,12,7
* m1_proc_limit, amd_proc_limit, intel_proc_limit = 124,125,110
* 각 장비에서 sh와 bat 파일 실행
## Ruleset(임계값 설정) 기반으로 운영하기
`filter_model.BallFilter`의 주요 임계값(합/평균/앞3합/뒤3합/간격 등)을 **JSON ruleset**으로 외부화했습니다.
이제 “코드 수정 없이” ruleset 파일만 바꿔서 실험/튜닝을 자동화할 수 있습니다.
- **기본 ruleset 경로**: `resources/rulesets/default.json`
- **주의/한계**: 로또는 본질적으로 랜덤(독립/균등 가설)이며, ruleset은 “구매 조합 수를 줄이기 위한 필터”입니다. **당첨 보장/예측을 주장하지 않습니다.**
### valid 성능 확인 예시
```bash
python scripts/eval_filters.py \
--data valid \
--resources resources \
--ruleset resources/rulesets/default.json \
--start-no 801 --end-no 1000 \
--survivors-samples 0
```
### survivors(생존 조합 수) 근사 포함 예시
```bash
python scripts/eval_filters.py \
--data valid \
--resources resources \
--ruleset resources/rulesets/default.json \
--start-no 801 --end-no 1000 \
--survivors-samples 3000
```
## 자동 튜닝 → ruleset 생성 → 일괄 평가 파이프라인
### 1) train 기반 자동 튜닝(후보 ruleset 생성)
아래 스크립트는 **train 구간에서만** 임계값을 랜덤 탐색으로 튜닝한 뒤,
`resources/rulesets/``Balanced.json`, `Coverage-First.json`을 저장합니다.
```bash
python scripts/tune_ruleset.py \
--resources resources \
--base-ruleset resources/rulesets/default.json \
--out-dir resources/rulesets \
--train-start 21 --train-end 800 \
--hit-rate-min 0.01 \
--iters 200 \
--mc-samples 40000
```
- **Coverage-First**: survivors(생존 조합 수) 최소화를 우선
- **Balanced**: survivors를 줄이되 hit-rate도 함께 고려
> 주의: survivors는 전수(8,145,060조합) 대신 **풀링 Monte Carlo**로 근사하므로 오차가 있습니다.
### 2) valid/train 구간에서 ruleset 일괄 평가
```bash
python scripts/eval_rulesets.py \
--resources resources \
--rulesets-dir resources/rulesets \
--data valid \
--start-no 801 --end-no 1000 \
--survivors-samples 0
```
# Query
```SQL
##### #####
with source_count as (
select source, count(*) as source_count
from cluster_info
where priority not in (99)
and source in (1,3)
group by 1
),
ball_count as (
# 1) random_state, cluster
select source, random_state, cluster, ball_cnt
from (
SELECT source, random_state, cluster, count(*)
as ball_cnt
from recommend_ball
where no=1136
and b1 > 0
group by 1,2,3
union all
SELECT source, random_state, cluster, 0 as ball_cnt
from recommend_ball
where no=1136
and b1 = 0
group by 1,2,3
) lj
),
source_rc_cluster_list as (
select ci.source, ci.random_state, ci.cluster, ci.cluster_count, ci.win_count, ci.priority, rc.source_count, bc.ball_cnt
from cluster_info ci
left join source_count rc on ci.source = rc.source
left join ball_count bc on ci.source = bc.source and ci.random_state = bc.random_state and ci.cluster = bc.cluster
where ci.priority not in (99)
and ci.source in (1,3)
),
source_process as (
select source, "done" as type, count(*) as cnt from source_rc_cluster_list
where ball_cnt is not NULL
group by 1,2
union all
select source, "yet" as type, count(*) as cnt from source_rc_cluster_list
where ball_cnt is NULL
group by 1,2
)
select source, type, cnt,
case when source=1 then concat(round(100.0 * cnt / (select source_count from source_count where source=1),2), '%')
when source=3 then concat(round(100.0 * cnt / (select source_count from source_count where source=3),2), '%')
end as rate from source_process order by 1,2
;
### ###
SELECT ci.source, ci.random_state, ci.cluster, lj.cnt
from cluster_info ci
left join (select source, random_state, cluster, count(*) as cnt from recommend_ball rb where no=1136 group by 1,2,3) lj on ci.source=lj.source and ci.random_state=lj.random_state and ci.cluster=lj.cluster
where priority not in (99)
and lj.cnt is null
order by 1,2,3
;
##### cluster #####
with raw_data as (
select rb.source, ci.priority, rb.random_state, rb.cluster, ci.cluster_count, ci.win_count, b1, count(*) as ball_cnt
from recommend_ball rb left join cluster_info ci on rb.source=ci.source and rb.random_state = ci.random_state and rb.cluster = ci.cluster
where no=1136
group by 1,2,3,4,5,6,7
),
all_cluster as (
select source, priority, random_state, cluster, ball_cnt
from raw_data
where (
(source = 1 and priority in (1,2)) or
(source = 3 and priority in (1,2))
)
group by 1,2,3,4
),
valid_total_cluster as (
select source, priority, random_state, cluster, ball_cnt
from raw_data
where (
(source = 1 and priority = 1 and
ball_cnt BETWEEN 50 and 80
) or
(source = 1 and priority = 2 and (
win_count = 12 and ball_cnt BETWEEN 50 and 80)
) or
(source = 3 and priority = 1 and
(ball_cnt BETWEEN 1 and 30 or ball_cnt BETWEEN 50 and 100)
) or
(source = 3 and priority = 2 and (
win_count=13 and (ball_cnt BETWEEN 1 and 30 or ball_cnt BETWEEN 50 and 100))
) or
(source = 1 and
((win_count between 5 and 10) and ball_cnt BETWEEN 1 and 20)
)
)
group by 1,2,3,4
),
valid_none_0_cluster as (
select source, priority, random_state, cluster, ball_cnt
from raw_data
where b1 <> 0 AND
(
(source = 1 and priority = 1 and
ball_cnt BETWEEN 50 and 80
) or
(source = 1 and priority = 2 and (
win_count = 12 and ball_cnt BETWEEN 50 and 80)
) or
(source = 3 and priority = 1 and
(ball_cnt BETWEEN 1 and 30 or ball_cnt BETWEEN 50 and 100)
) or
(source = 3 and priority = 2 and (
win_count=13 and (ball_cnt BETWEEN 1 and 30 or ball_cnt BETWEEN 50 and 100))
) or
(source = 1 and
((win_count between 5 and 10) and ball_cnt BETWEEN 1 and 20)
)
)
group by 1,2,3,4
)
#
select 1 as col, count(*) from all_cluster
union all
#
select 2 as col, count(*) from valid_total_cluster
union all
# 0
select 3 as col, count(*) from valid_none_0_cluster
;
##### #####
select b1,b2,b3,b4,b5,b6,count(*) as ball_cnt
from recommend_ball
where no=1136
and b1>0
group by 1,2,3,4,5,6
order by 7 desc;
##### #####
with priority as (
select source, random_state, cluster, cluster_count, win_count, priority
from cluster_info
where priority not in (99)
),
recommend as (
select source, random_state, cluster, b1,b2,b3,b4,b5,b6
from recommend_ball
where b1 > 0
and no=1136
),
recommend_count as (
select source, random_state, cluster, count(*) as ball_cnt
from recommend_ball
where b1 > 0
and no=1136
group by 1,2,3
),
raw_data as (
select r.source, r.random_state, r.cluster, p.cluster_count, p.win_count, p.priority, r.b1,r.b2,r.b3,r.b4,r.b5,r.b6, rc.ball_cnt
from recommend r
left join priority p on r.source=p.source and r.random_state=p.random_state and r.cluster=p.cluster
left join recommend_count rc on r.source=rc.source and r.random_state=rc.random_state and r.cluster=rc.cluster
),
candidate as (
select source, random_state, cluster, cluster_count, win_count, priority, b1,b2,b3,b4,b5,b6, ball_cnt
from raw_data
where (
(source = 0 and b1=7)
or (source = 1 and priority=-1 and ball_cnt<=140 and (
b1 not in (13, 19, 28)
and b2 not in (13, 19, 28)
and b3 not in (13, 19, 28)
and b4 not in (13, 19, 28)
and b5 not in (13, 19, 28)
and b6 not in (13, 19, 28)
)
)
or (source = 3 and priority=-1 and ball_cnt<=150 and (
b1 not in (13, 19, 28)
and b2 not in (13, 19, 28)
and b3 not in (13, 19, 28)
and b4 not in (13, 19, 28)
and b5 not in (13, 19, 28)
and b6 not in (13, 19, 28)
)
)
)
)
#select source, random_state,cluster,b1,b2,b3,b4,b5,b6 from candidate order by 4,5,6,7,8,9;
, duplication as (
# 34
select source, random_state, cluster, cluster_count, win_count, priority, b1,b2,b3,b4,b5,b6, ball_cnt
from (
select source, random_state, cluster, cluster_count, win_count, priority, b1,b2,b3,b4,b5,b6, ball_cnt,
ROW_NUMBER() OVER(PARTITION BY b1,b2,b3,b4,b5,b6 ORDER BY b1,b2,b3,b4,b5,b6) AS rnk
from candidate
) a
where rnk=1
order by source,random_state,cluster,b1,b2,b3,b4,b5,b6
)
select count(*) as cnt from duplication;
#select source, priority, random_state, cluster, win_count, count(*) as cnt from duplication group by 1,2,3;
#select b1, count(*) as ball_cnt from duplication group by 1
#select b6, count(*) as ball_cnt from duplication group by 1
#select source,random_state,cluster,b1,b2,b3,b4,b5,b6 from duplication order by 4,5,6,7,8,9;
```

122
TelegramBot.py Normal file
View File

@@ -0,0 +1,122 @@
from datetime import datetime
import telegram
import asyncio
import platform
from multiprocessing import Pool
import os
class TelegramBot:
enable = None
BOT_TOKEN = None
CHANNEL_ID = None
client = None
def __init__(self, enable=True):
"""
텔레그램 봇 설정
환경변수 설정 방법:
export TELEGRAM_BOT_TOKEN="your_bot_token_here"
export TELEGRAM_CHAT_ID="your_chat_id_here"
또는 .env 파일 생성:
TELEGRAM_BOT_TOKEN=your_bot_token_here
TELEGRAM_CHAT_ID=your_chat_id_here
botname: coinbot
username for the bot: ncue_coin_bot
token to access the HTTP API: 6435061393:AAHOh9wB5yGNGUdb3SfCYJrrWTBe7wgConM
botname: lottobot
username for the bot: ncue_lotto_bot
token to access the HTTP API:6791293398:AAFi1zrQTs6UmuHycAuNdsBgHDHaHcOJcYA
botname: stockbot
username for the bot: ncue_stock_bot
token to access the HTTP API: 6874078562:AAEHxGDavfc0ssAXPQIaW8JGYmTR7LNUJOw
"""
# 환경변수에서 토큰과 채팅 ID 읽기
self.token = os.getenv('TELEGRAM_BOT_TOKEN', '6791293398:AAFi1zrQTs6UmuHycAuNdsBgHDHaHcOJcYA')
self.chat_id = os.getenv('TELEGRAM_CHAT_ID', '574661323')
self.botname = "lottobot"
self.username = "ncue_lotto_bot"
self.client = telegram.Bot(token=self.token)
self.enable = enable
if self.token.startswith('6791293398'):
print("⚠️ 경고: 기본 토큰을 사용 중입니다. 보안을 위해 환경변수 설정을 권장합니다.")
return
# https://velog.io/@gyunghoe/%ED%85%94%EB%A0%88%EA%B7%B8%EB%9E%A8-%EB%B4%87-%EC%84%B1%EB%8A%A5-%EC%B5%9C%EC%A0%81%ED%99%94%ED%95%98%EA%B8%B0
@staticmethod
def send(text):
token = os.getenv('TELEGRAM_BOT_TOKEN', '6791293398:AAFi1zrQTs6UmuHycAuNdsBgHDHaHcOJcYA')
chat_id = os.getenv('TELEGRAM_CHAT_ID', '574661323')
client = telegram.Bot(token=token)
if platform.system().lower() == 'windows':
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(client.send_message(chat_id=chat_id, text=text))
return
def alarm_live(self, stock_code, stock_name):
if self.enable:
this_time = datetime.now()
text = "[ALIVE] {} {} ({})".format(this_time.strftime('%H:%M'), stock_code, stock_name)
pool = Pool(12)
pool.map(self.send, [text])
print(" * "+text)
return
def post(self, stock_code, stock_name, type, price, count, rsi, balance=0):
if self.enable:
this_time = datetime.now()
if 0 < balance:
text = "{}, {}, code: {}, name: {}, amount: {}, price: {}, count: {}, (balance: {:.2f}), (rsi: {:.2f})".format(type, this_time.strftime('%H:%M'), stock_code, stock_name, price*count, price, count, balance, rsi)
else:
text = "{}, {}, code: {}, name: {}, amount: {}, price: {}, count: {}, (rsi: {:.2f})".format(type, this_time.strftime('%H:%M'), stock_code, stock_name, price*count, price, count, rsi)
pool = Pool(12)
pool.map(self.send, [text])
print(" * "+text)
return
def sendMsg(self, msg):
if self.enable:
this_time = datetime.now()
text = "[{}]: \n{}".format(this_time.strftime("%Y-%m-%d %H:%M:%S"), msg)
pool = Pool(12)
pool.map(self.send, [text])
print(" * "+text)
return
if __name__ == "__main__":
telegramBot = TelegramBot()
"""
last_weekend = '20240727'
p_no = 1130
p_ball = [15, 19, 21, 25, 27, 27]
ymd = '20240803'
howto = 1
random_state = 28
cluster = 31
recommend_size = 0
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델 #6_c{}_{}_{}] ({}개)\n".format(
last_weekend, p_no, str(p_ball), ymd, (p_no + 1), howto, random_state, cluster, recommend_size
)
telegramBot.sendMsg("{}".format(p_str))
"""
telegramBot.sendMsg("🎯 DeepLottery 시스템 테스트 메시지")

BIN
analsys.xlsx Normal file

Binary file not shown.

4439
filter_model_1.py Normal file

File diff suppressed because it is too large Load Diff

1256
filter_model_2.py Normal file

File diff suppressed because it is too large Load Diff

94
filter_model_3.py Normal file
View File

@@ -0,0 +1,94 @@
"""
filter_model_3.py
OR-composed BallFilter:
- A candidate ball is ACCEPTED if it passes EITHER filter_model_1 OR filter_model_2.
- A candidate ball is REJECTED only if it fails BOTH.
This keeps the same public interface used across the project:
BallFilter(lottoHistoryFileName, ruleset_path=..., ruleset=...)
.filter(ball, no, until_end=False, df=None, filter_ball=None) -> set[str]
.extract_final_candidates(ball, no=None, until_end=False, df=None) -> set[str]
Notes:
- The underlying filters return a non-empty set of failure reasons when rejected.
- Callers treat "len(result) == 0" as PASS.
"""
from __future__ import annotations
from typing import Any, Dict, Optional
import filter_model_1 as fm1
import filter_model_2 as fm2
class BallFilter:
"""
OR composition of filter_model_1.BallFilter and filter_model_2.BallFilter.
- If model1 PASSES OR model2 PASSES -> return empty set()
- If both FAIL -> return union of reasons (prefixed for debugging)
"""
def __init__(
self,
lottoHistoryFileName: Optional[str] = None,
# Backward compatible single ruleset knobs (applied to both if specific ones not provided)
ruleset_path: Optional[str] = None,
ruleset: Optional[Dict[str, Any]] = None,
# Optional per-model overrides
ruleset_path_1: Optional[str] = None,
ruleset_path_2: Optional[str] = None,
ruleset_1: Optional[Dict[str, Any]] = None,
ruleset_2: Optional[Dict[str, Any]] = None,
):
rp1 = ruleset_path_1 if ruleset_path_1 is not None else ruleset_path
rp2 = ruleset_path_2 if ruleset_path_2 is not None else ruleset_path
r1 = ruleset_1 if ruleset_1 is not None else ruleset
r2 = ruleset_2 if ruleset_2 is not None else ruleset
self.m1 = fm1.BallFilter(lottoHistoryFileName, ruleset_path=rp1, ruleset=r1)
self.m2 = fm2.BallFilter(lottoHistoryFileName, ruleset_path=rp2, ruleset=r2)
#
# Delegate common helper methods (both models expose the same API)
#
def getBall(self, no):
return self.m1.getBall(no)
def getLastNo(self, YMD):
return self.m1.getLastNo(YMD)
def getNextNo(self, YMD):
return self.m1.getNextNo(YMD)
def getYMD(self, no):
return self.m1.getYMD(no)
def _prefixed(self, prefix: str, reasons: set) -> set:
# keep stable, readable debug strings
return {f"{prefix}{r}" for r in reasons}
def extract_final_candidates(self, ball, no=None, until_end: bool = False, df=None):
"""
OR-pass semantics:
- If either model returns empty set -> PASS (return empty set)
- Else -> FAIL (return union of reasons)
"""
r1 = self.m1.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
if len(r1) == 0:
return set()
r2 = self.m2.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)
if len(r2) == 0:
return set()
# both failed
return self._prefixed("m1:", set(r1)) | self._prefixed("m2:", set(r2))
def filter(self, ball, no, until_end: bool = False, df=None, filter_ball=None):
"""
Keep signature compatible with existing callers.
- filter_ball is ignored here (callers typically pre-filter before calling .filter()).
"""
return self.extract_final_candidates(ball=ball, no=no, until_end=until_end, df=df)

451
fixed10.py Normal file
View File

@@ -0,0 +1,451 @@
#!/usr/bin/env python3
"""
fixed10.py
요구사항
- "지금까지 당첨되지 않은(=과거 1등 조합으로 나온 적 없는)" 조합만 추천
- 앞으로 10개 조합을 꾸준히 구매할 수 있도록 10개만 출력
- filter_model_1/2/3와 무관한 새로운 최적화 방법
중요한 사실
- 로또는 통계적으로 독립/균등(무작위) 가정이 기본이라 미래 1등을 '예측'할 수는 없습니다.
- 대신 이 코드는 과거 1등 조합들의 전형적 분포(합/홀짝/구간/연속/끝수 등)에
"가까운" 조합을 찾고, 10개 조합 간 중복(겹침)을 줄이는 방향으로 최적화합니다.
동작 개요
1) 히스토리( resources/lotto_history.txt )로부터 과거 1등 조합 집합을 로드
2) 과거 1등들의 feature 분포를 구축(라플라스 스무딩)
3) 고정 seed로 랜덤 샘플 풀을 생성하고, 분포 적합도 + 제약(겹침/최근회차 유사도 등)으로 스코어링
4) 상위 후보에서 다양성 제약을 만족하도록 greedy하게 10개 선택
사용 예:
python fixed10.py
python fixed10.py --history resources/lotto_history.txt --count 10 --seed 42 --pool 300000
"""
from __future__ import annotations
import argparse
import csv
import math
import os
import random
from collections import Counter, defaultdict
from dataclasses import dataclass
from typing import Dict, Iterable, List, Sequence, Set, Tuple, Optional
Ball = Tuple[int, int, int, int, int, int]
def parse_history_txt(path: str) -> List[Ball]:
"""
Parse lotto_history.txt rows: no,b1,b2,b3,b4,b5,b6,bn
Returns list of sorted 6-number tuples (Ball), in file order.
"""
balls: List[Ball] = []
with open(path, "r", encoding="utf-8") as f:
reader = csv.reader(f)
for row in reader:
if not row:
continue
# tolerate whitespace
row = [c.strip() for c in row]
if len(row) < 7:
continue
nums = sorted(int(x) for x in row[1:7])
balls.append(tuple(nums)) # type: ignore[arg-type]
return balls
def max_consecutive_len(nums: Sequence[int]) -> int:
m = 1
cur = 1
for i in range(1, len(nums)):
if nums[i] == nums[i - 1] + 1:
cur += 1
m = max(m, cur)
else:
cur = 1
return m
def decade_bucket(n: int) -> int:
# 1~45 -> 0..4 (1-10, 11-20, 21-30, 31-40, 41-45)
if 1 <= n <= 10:
return 0
if 11 <= n <= 20:
return 1
if 21 <= n <= 30:
return 2
if 31 <= n <= 40:
return 3
return 4
@dataclass(frozen=True)
class Features:
sum6: int
odd: int
low: int # <=22
max_run: int
uniq_last_digit: int
decade_sig: Tuple[int, int, int, int, int] # counts per bucket
def features_of(ball: Ball) -> Features:
nums = ball
s = sum(nums)
odd = sum(1 for x in nums if x % 2 == 1)
low = sum(1 for x in nums if x <= 22)
max_run = max_consecutive_len(nums)
uniq_last = len({x % 10 for x in nums})
buckets = [0, 0, 0, 0, 0]
for x in nums:
buckets[decade_bucket(x)] += 1
return Features(
sum6=s,
odd=odd,
low=low,
max_run=max_run,
uniq_last_digit=uniq_last,
decade_sig=tuple(buckets), # type: ignore[arg-type]
)
class SmoothedDist:
"""
Discrete distribution with Laplace smoothing:
P(v) = (count(v) + alpha) / (N + alpha*|V|)
where V is the observed support.
"""
def __init__(self, counts: Counter, alpha: float = 1.0):
self.counts = counts
self.alpha = float(alpha)
self.n = sum(counts.values())
self.k = max(1, len(counts))
def logp(self, v) -> float:
c = self.counts.get(v, 0)
return math.log((c + self.alpha) / (self.n + self.alpha * self.k))
def build_feature_dists(history: Sequence[Ball]) -> Dict[str, SmoothedDist]:
feats = [features_of(b) for b in history]
return {
"sum6": SmoothedDist(Counter(f.sum6 for f in feats), alpha=1.0),
"odd": SmoothedDist(Counter(f.odd for f in feats), alpha=1.0),
"low": SmoothedDist(Counter(f.low for f in feats), alpha=1.0),
"max_run": SmoothedDist(Counter(f.max_run for f in feats), alpha=1.0),
"uniq_last_digit": SmoothedDist(Counter(f.uniq_last_digit for f in feats), alpha=1.0),
"decade_sig": SmoothedDist(Counter(f.decade_sig for f in feats), alpha=1.0),
}
def overlap(a: Ball, b: Ball) -> int:
return len(set(a) & set(b))
def recent_overlap_penalty(ball: Ball, recent: Sequence[Ball]) -> float:
"""
Penalize candidates that look too similar to very recent winning draws.
This does NOT mean such candidates can't win; it's just a diversification heuristic.
"""
# if overlaps >=4 with any recent draw -> strong penalty
mx = 0
for rb in recent:
mx = max(mx, overlap(ball, rb))
if mx >= 4:
break
if mx >= 4:
return 6.0
if mx == 3:
return 1.0
return 0.0
@dataclass(frozen=True)
class Tuning:
# sampling / search
pool: int
top_k: int
# diversification
recent_window: int
max_pair_overlap: int
# penalty weights
recent_penalty_3: float
recent_penalty_4plus: float
max_run_penalty: float
decade_concentration_penalty: float
PRESETS: Dict[str, Tuning] = {
# balanced: 기본값(지금까지 사용) - 분포 적합 + 적당한 다양성
"balanced": Tuning(
pool=250_000,
top_k=5_000,
recent_window=52,
max_pair_overlap=2,
recent_penalty_3=1.0,
recent_penalty_4plus=6.0,
max_run_penalty=1.5,
decade_concentration_penalty=2.0,
),
# aggressive: 후보를 더 "분포에 딱 맞게" + 최근 유사도 더 강하게 회피 + 서로 겹침 더 엄격
"aggressive": Tuning(
pool=500_000,
top_k=7_500,
recent_window=80,
max_pair_overlap=1,
recent_penalty_3=2.0,
recent_penalty_4plus=10.0,
max_run_penalty=2.5,
decade_concentration_penalty=3.0,
),
# conservative: 후보를 더 넓게(덜 가혹) + 다양성 제약 완화
"conservative": Tuning(
pool=150_000,
top_k=5_000,
recent_window=26,
max_pair_overlap=3,
recent_penalty_3=0.3,
recent_penalty_4plus=2.0,
max_run_penalty=0.8,
decade_concentration_penalty=1.0,
),
}
def max_recent_overlap(ball: Ball, recent: Sequence[Ball]) -> int:
mx = 0
for rb in recent:
mx = max(mx, overlap(ball, rb))
if mx >= 6:
break
return mx
def score_ball(
ball: Ball,
dists: Dict[str, SmoothedDist],
history_set: Set[Ball],
recent: Sequence[Ball],
tuning: Tuning,
) -> float:
# hard reject: already won in history
if ball in history_set:
return float("-inf")
f = features_of(ball)
# distribution-fit score (higher is better)
s = 0.0
s += dists["sum6"].logp(f.sum6)
s += dists["odd"].logp(f.odd)
s += dists["low"].logp(f.low)
s += dists["max_run"].logp(f.max_run)
s += dists["uniq_last_digit"].logp(f.uniq_last_digit)
s += dists["decade_sig"].logp(f.decade_sig)
# mild, human-sensible constraints (soft)
# - avoid very long consecutive runs (>=4)
if f.max_run >= 4:
s -= tuning.max_run_penalty
# - avoid extremely concentrated decades (e.g. 5+ numbers in same bucket)
if max(f.decade_sig) >= 5:
s -= tuning.decade_concentration_penalty
# diversify away from recent draws (soft)
mx = max_recent_overlap(ball, recent)
if mx >= 4:
s -= tuning.recent_penalty_4plus
elif mx == 3:
s -= tuning.recent_penalty_3
return s
def select_diverse(
candidates: Sequence[Ball],
scores: Dict[Ball, float],
count: int,
max_pair_overlap: int,
) -> List[Ball]:
"""
Greedy selection:
- iterate candidates in descending score
- pick if it doesn't overlap too much with already chosen ones
"""
chosen: List[Ball] = []
for b in candidates:
if len(chosen) >= count:
break
ok = True
for c in chosen:
if overlap(b, c) > max_pair_overlap:
ok = False
break
if ok:
chosen.append(b)
return chosen
def generate_fixed10(
history: Sequence[Ball],
count: int = 10,
seed: int = 42,
pool: int = 250_000,
top_k: int = 5_000,
recent_window: int = 52,
max_pair_overlap: int = 2,
recent_penalty_3: float = 1.0,
recent_penalty_4plus: float = 6.0,
max_run_penalty: float = 1.5,
decade_concentration_penalty: float = 2.0,
) -> List[Ball]:
rng = random.Random(seed)
history_set = set(history)
dists = build_feature_dists(history)
recent = list(history[-recent_window:]) if len(history) >= recent_window else list(history)
tuning = Tuning(
pool=pool,
top_k=top_k,
recent_window=recent_window,
max_pair_overlap=max_pair_overlap,
recent_penalty_3=recent_penalty_3,
recent_penalty_4plus=recent_penalty_4plus,
max_run_penalty=max_run_penalty,
decade_concentration_penalty=decade_concentration_penalty,
)
scored: List[Tuple[float, Ball]] = []
seen: Set[Ball] = set()
# sample pool
for _ in range(pool):
ball = tuple(sorted(rng.sample(range(1, 46), 6))) # type: ignore[assignment]
if ball in seen:
continue
seen.add(ball)
sc = score_ball(ball, dists, history_set, recent, tuning)
if sc == float("-inf"):
continue
scored.append((sc, ball))
scored.sort(key=lambda x: x[0], reverse=True)
top = [b for _, b in scored[: top_k]]
scores_map = {b: sc for sc, b in scored[: top_k]}
chosen = select_diverse(top, scores_map, count=count, max_pair_overlap=max_pair_overlap)
# If we couldn't pick enough due to overlap constraints, relax progressively.
if len(chosen) < count:
for relax in [3, 4, 5]:
chosen = select_diverse(top, scores_map, count=count, max_pair_overlap=relax)
if len(chosen) >= count:
chosen = chosen[:count]
break
return chosen
def summarize(picks: Sequence[Ball], recent: Sequence[Ball]) -> Dict[str, object]:
# pairwise overlap stats
mx_pair = 0
pair_hist = Counter()
for i in range(len(picks)):
for j in range(i + 1, len(picks)):
o = overlap(picks[i], picks[j])
mx_pair = max(mx_pair, o)
pair_hist[o] += 1
# overlap with recent draws
mx_recent = 0
recent_hist = Counter()
for b in picks:
o = max_recent_overlap(b, recent)
mx_recent = max(mx_recent, o)
recent_hist[o] += 1
return {
"max_pair_overlap": mx_pair,
"pair_overlap_hist": dict(sorted(pair_hist.items())),
"max_recent_overlap": mx_recent,
"recent_overlap_hist": dict(sorted(recent_hist.items())),
}
def main():
p = argparse.ArgumentParser()
p.add_argument("--history", default=os.path.join("resources", "lotto_history.txt"))
p.add_argument("--count", type=int, default=10)
p.add_argument("--seed", type=int, default=42)
p.add_argument(
"--profile",
choices=sorted(PRESETS.keys()),
default="balanced",
help="Tuning preset. You can still override any individual knob below.",
)
p.add_argument("--pool", type=int, default=None, help="Number of random candidates to sample.")
p.add_argument("--top-k", type=int, default=None, help="Keep top-K scored candidates before diversification.")
p.add_argument("--recent-window", type=int, default=None, help="Recent draw window size for overlap penalty.")
p.add_argument("--max-pair-overlap", type=int, default=None, help="Max allowed overlap between chosen picks (greedy).")
p.add_argument("--recent-penalty-3", type=float, default=None, help="Penalty if overlaps 3 with any recent draw.")
p.add_argument("--recent-penalty-4plus", type=float, default=None, help="Penalty if overlaps >=4 with any recent draw.")
p.add_argument("--max-run-penalty", type=float, default=None, help="Penalty if max consecutive run >=4.")
p.add_argument("--decade-concentration-penalty", type=float, default=None, help="Penalty if >=5 numbers in a decade bucket.")
p.add_argument("--no-report", action="store_true", help="Do not print overlap summary.")
args = p.parse_args()
history = parse_history_txt(args.history)
if not history:
raise SystemExit(f"History is empty or not readable: {args.history}")
preset = PRESETS[args.profile]
pool = int(args.pool) if args.pool is not None else preset.pool
top_k = int(args.top_k) if args.top_k is not None else preset.top_k
recent_window = int(args.recent_window) if args.recent_window is not None else preset.recent_window
max_pair_overlap = int(args.max_pair_overlap) if args.max_pair_overlap is not None else preset.max_pair_overlap
recent_penalty_3 = float(args.recent_penalty_3) if args.recent_penalty_3 is not None else preset.recent_penalty_3
recent_penalty_4plus = float(args.recent_penalty_4plus) if args.recent_penalty_4plus is not None else preset.recent_penalty_4plus
max_run_penalty = float(args.max_run_penalty) if args.max_run_penalty is not None else preset.max_run_penalty
decade_concentration_penalty = float(args.decade_concentration_penalty) if args.decade_concentration_penalty is not None else preset.decade_concentration_penalty
picks = generate_fixed10(
history=history,
count=args.count,
seed=args.seed,
pool=pool,
top_k=top_k,
recent_window=recent_window,
max_pair_overlap=max_pair_overlap,
recent_penalty_3=recent_penalty_3,
recent_penalty_4plus=recent_penalty_4plus,
max_run_penalty=max_run_penalty,
decade_concentration_penalty=decade_concentration_penalty,
)
print(f"history draws: {len(history)}")
print(
"fixed picks "
f"(profile={args.profile}, count={len(picks)}, seed={args.seed}, "
f"pool={pool}, top_k={top_k}, recent_window={recent_window}, max_pair_overlap={max_pair_overlap}):"
)
for i, b in enumerate(picks, start=1):
print(f"{i:2d}. {list(b)}")
if not args.no_report:
recent = list(history[-recent_window:]) if len(history) >= recent_window else list(history)
rep = summarize(picks, recent)
print("\nsummary:")
print(f"- max_pair_overlap: {rep['max_pair_overlap']}")
print(f"- pair_overlap_hist: {rep['pair_overlap_hist']}")
print(f"- max_recent_overlap: {rep['max_recent_overlap']}")
print(f"- recent_overlap_hist: {rep['recent_overlap_hist']}")
if __name__ == "__main__":
main()

1081
practice_0.py Normal file

File diff suppressed because it is too large Load Diff

179
practice_1.py Normal file
View File

@@ -0,0 +1,179 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
from DataCrawler import DataCrawler
import json
import os
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from filter_model_1 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
def __init__(self, resources_path):
self.bot = TelegramBot()
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6,7,10,11,20,45])
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNextNo(ymd)
print("회차: {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
result_json.append(ball)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = p_ball[1:7]
return p_no, p_ball
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

179
practice_2.py Normal file
View File

@@ -0,0 +1,179 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
from DataCrawler import DataCrawler
import json
import os
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from filter_model_2 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
def __init__(self, resources_path):
self.bot = TelegramBot()
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6,7,10,11,20,45])
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNextNo(ymd)
print("회차: {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
result_json.append(ball)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = p_ball[1:7]
return p_no, p_ball
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

546
practice_3.py Normal file
View File

@@ -0,0 +1,546 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
from DataCrawler import DataCrawler
import json
import os
import copy
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from filter_model_3 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
TARGET_MIN_SURVIVORS = 30
TARGET_MAX_SURVIVORS = 150
PREDICT_TIMEOUT_SECONDS = 180
def __init__(self, resources_path):
self.bot = TelegramBot()
self.resources_path = resources_path
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6, 7, 10, 11, 20, 45])
result_json.append([2, 7, 17, 28, 35, 39])
result_json.append([6, 10, 19, 25, 33, 35])
result_json.append([3, 17, 20, 24, 35, 45])
result_json.append([5, 15, 18, 29, 36, 41])
result_json.append([6, 15, 20, 23, 37, 43])
result_json.append([8, 15, 19, 23, 38, 41])
result_json.append([5, 11, 19, 24, 40, 45])
result_json.append([9, 16, 18, 23, 35, 43])
result_json.append([7, 13, 19, 28, 33, 44])
result_json.append([7, 11, 18, 29, 37, 42])
print("회차(predict1)")
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNextNo(ymd)
print("회차(predict2): {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
result_json.append(ball)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = p_ball[1:7]
return p_no, p_ball
def predict3(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
no = BallFilter(lottoHistoryFileName).getNextNo(ymd)
print("회차(predict3): {}".format(no))
predict_start_ts = time.time()
deadline_ts = predict_start_ts + self.PREDICT_TIMEOUT_SECONDS
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = sorted(p_ball[1:7])
base_ruleset = self._get_base_ruleset()
tighten_rulesets = [
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": True,
"ban_triples_legacy": True,
"all_in_previous7": True,
"previous_neighbors": True,
},
allowed_overrides={
"ac_value": [8, 9],
"uniq_last_digit_count": [4, 5],
"even_count": [2, 3, 4],
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": True,
"ban_triples_legacy": True,
"all_in_previous7": True,
"previous_neighbors": True,
},
allowed_overrides={
"ac_value": [8, 9],
"uniq_last_digit_count": [4, 5],
"even_count": [2, 3, 4],
"sum": [112, 114, 121, 123, 126, 127, 131, 132, 138, 146, 148],
"sum_prev_diff": [13, 14, 17, 18, 26, 28, 29, 30, 32, 39, 40],
},
),
]
relax_rulesets = [
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
"previous_neighbors": False,
"all_in_previous7": False,
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
"previous_neighbors": False,
"all_in_previous7": False,
"weeks_8_count": False,
"weeks_12_count": False,
"weeks_16_count": False,
"weeks_20_count": False,
},
),
]
min_survivors = self.TARGET_MIN_SURVIVORS
max_survivors = self.TARGET_MAX_SURVIVORS
chosen = []
stage_name = "base"
current_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=base_ruleset,
stop_when_gt=max_survivors,
stage_name="base",
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
current = current_info["candidates"]
if current_info["timed_out"]:
chosen = self._finalize_on_timeout(current, p_ball, min_survivors, max_survivors)
stage_name = "base_timeout_fallback"
print("predict3 stage: {}, survivors: {}".format(stage_name, len(chosen)))
for ball in chosen:
result_json.append(ball)
return p_no, p_ball
if min_survivors <= len(current) <= max_survivors:
chosen = current
elif len(current) > max_survivors:
chosen = current
stage_name = "base_overflow"
for idx, rs in enumerate(tighten_rulesets, start=1):
t_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=rs,
stop_when_gt=max_survivors,
stage_name="tighten_{}".format(idx),
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
t = t_info["candidates"]
if t_info["timed_out"]:
chosen = self._finalize_on_timeout(t, p_ball, min_survivors, max_survivors)
stage_name = "tighten_{}_timeout_fallback".format(idx)
break
if min_survivors <= len(t) <= max_survivors:
chosen = t
stage_name = "tighten_{}".format(idx)
break
if len(t) <= max_survivors:
chosen = t
stage_name = "tighten_{}".format(idx)
if len(chosen) > max_survivors:
full_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=tighten_rulesets[-1],
stop_when_gt=None,
stage_name="tighten_full_rank",
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
full_for_ranking = full_info["candidates"]
if full_info["timed_out"]:
chosen = self._finalize_on_timeout(full_for_ranking, p_ball, min_survivors, max_survivors)
stage_name = "tighten_rank_timeout_fallback"
else:
chosen = self._rank_and_trim(full_for_ranking, p_ball, max_survivors)
stage_name = "tighten_rank_trim"
else:
chosen = current
stage_name = "base_underflow"
for idx, rs in enumerate(relax_rulesets, start=1):
r_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=rs,
stop_when_gt=None,
stop_when_gte=min_survivors,
stage_name="relax_{}".format(idx),
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
r = r_info["candidates"]
chosen = r
stage_name = "relax_{}".format(idx)
if r_info["timed_out"]:
chosen = self._finalize_on_timeout(r, p_ball, min_survivors, max_survivors)
stage_name = "relax_{}_timeout_fallback".format(idx)
break
if len(r) >= min_survivors:
break
if len(chosen) == 0:
stage_name = "relax_zero_fallback"
chosen = self._fallback_candidates_from_prev(p_ball, min_survivors)
elif len(chosen) < min_survivors:
stage_name = "{}_fill".format(stage_name)
fill = self._fallback_candidates_from_prev(
p_ball,
min_survivors - len(chosen),
exclude=set(tuple(x) for x in chosen),
)
chosen.extend(fill)
print("predict3 stage: {}, survivors: {}".format(stage_name, len(chosen)))
for ball in chosen:
result_json.append(ball)
return p_no, p_ball
def _get_base_ruleset(self):
history_json = os.path.join(self.resources_path, "lotto_history.json")
base_filter = BallFilter(history_json)
return copy.deepcopy(base_filter.m1.ruleset)
def _build_ruleset(self, base_ruleset, enabled_overrides=None, allowed_overrides=None):
ruleset = copy.deepcopy(base_ruleset)
ruleset.setdefault("filters", {})
enabled_overrides = enabled_overrides or {}
allowed_overrides = allowed_overrides or {}
for key, value in enabled_overrides.items():
ruleset["filters"].setdefault(key, {})
ruleset["filters"][key]["enabled"] = bool(value)
for key, values in allowed_overrides.items():
ruleset["filters"].setdefault(key, {})
ruleset["filters"][key]["enabled"] = True
ruleset["filters"][key]["allowed"] = list(values)
return ruleset
def _collect_candidates(
self,
candidates,
no,
df_ball,
ruleset,
stop_when_gt=None,
stop_when_gte=None,
stage_name="base",
predict_start_ts=None,
deadline_ts=None,
):
lottoHistoryFileName = os.path.join(self.resources_path, "lotto_history.json")
ballFilter = BallFilter(lottoHistoryFileName, ruleset=ruleset)
result = []
last_idx = 0
for idx, ball in enumerate(itertools.combinations(candidates, 6), start=1):
last_idx = idx
if deadline_ts is not None and deadline_ts <= time.time():
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
print(" - [{}] timeout after {:,} processed (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
return {"candidates": result, "timed_out": True, "processed": idx}
if idx % 1000000 == 0:
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
print(" - [{}] {:,} processed... (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
b = list(ball)
if len(ballFilter.filter(ball=b, no=no, until_end=False, df=df_ball)) == 0:
result.append(b)
if stop_when_gt is not None and len(result) > stop_when_gt:
return {"candidates": result, "timed_out": False, "processed": idx}
if stop_when_gte is not None and len(result) >= stop_when_gte:
return {"candidates": result, "timed_out": False, "processed": idx}
return {"candidates": result, "timed_out": False, "processed": last_idx}
def _finalize_on_timeout(self, partial_candidates, prev_ball, min_survivors, max_survivors):
chosen = list(partial_candidates)
if len(chosen) > max_survivors:
chosen = self._rank_and_trim(chosen, prev_ball, max_survivors)
elif len(chosen) < min_survivors:
fill = self._fallback_candidates_from_prev(
prev_ball,
min_survivors - len(chosen),
exclude=set(tuple(x) for x in chosen),
)
chosen.extend(fill)
return chosen
def _rank_and_trim(self, candidates, prev_ball, limit):
scored = [(self._score_candidate(ball, prev_ball), ball) for ball in candidates]
scored.sort(key=lambda x: x[0])
return [ball for _, ball in scored[:limit]]
def _score_candidate(self, ball, prev_ball):
sum_diff = abs(sum(ball) - sum(prev_ball))
even_cnt = len([x for x in ball if x % 2 == 0])
uniq_last = len(set([x % 10 for x in ball]))
contiguous_penalty = 0
s = sorted(ball)
for i in range(1, len(s)):
if s[i] - s[i - 1] == 1:
contiguous_penalty += 1
score = 0
score += sum_diff
score += abs(even_cnt - 3) * 2
score += abs(uniq_last - 5) * 2
score += contiguous_penalty
return score
def _fallback_candidates_from_prev(self, prev_ball, need_count, exclude=None):
exclude = exclude or set()
seed = sorted(prev_ball)
out = []
delta_patterns = [
(0, 0, 0, 0, 0, 0),
(-1, 0, 0, 0, 0, 1),
(0, -1, 0, 0, 1, 0),
(0, 0, -1, 1, 0, 0),
(-2, 0, 0, 0, 0, 2),
(0, -2, 0, 0, 2, 0),
(0, 0, -2, 2, 0, 0),
(-1, -1, 0, 0, 1, 1),
(1, 0, -1, 0, 0, 0),
(0, 1, 0, -1, 0, 0),
(1, -1, 1, -1, 1, -1),
(-1, 1, -1, 1, -1, 1),
]
shift = 0
while len(out) < need_count and shift <= 8:
for delta in delta_patterns:
cand = [seed[i] + delta[i] for i in range(6)]
cand = [min(45, max(1, v + shift)) for v in cand]
cand = sorted(cand)
if len(set(cand)) != 6:
continue
t = tuple(cand)
if t in exclude:
continue
exclude.add(t)
out.append(cand)
if len(out) >= need_count:
break
shift += 1
return out
def _merge_unique_balls(self, base_balls, extra_balls):
seen = set(tuple(sorted(x)) for x in base_balls)
for ball in extra_balls:
key = tuple(sorted(ball))
if key not in seen:
base_balls.append(list(ball))
seen.add(key)
return base_balls
def _sorted_unique_balls(self, balls):
"""
Normalize (sort within ball), de-duplicate, then sort lexicographically.
Returns List[List[int]].
"""
uniq = {}
for b in balls:
key = tuple(sorted(b))
uniq[key] = list(key)
return [list(t) for t in sorted(uniq.keys())]
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측(기존/신규): 결과는 합친 후 정렬해서 predict1 결과 뒤에 붙인다.
predict2_json = []
p_no, p_ball = practice.predict2(resources_path, ymd, predict2_json)
predict3_json = []
p_no3, p_ball3 = practice.predict3(resources_path, ymd, predict3_json)
merged_predict = []
practice._merge_unique_balls(merged_predict, predict2_json)
practice._merge_unique_balls(merged_predict, predict3_json)
merged_predict = practice._sorted_unique_balls(merged_predict)
# predict1 결과에 merged_predict를 정렬된 순서로 append(중복 제거)
practice._merge_unique_balls(result_json[ymd], merged_predict)
if p_no3 == p_no:
p_ball = p_ball3
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

216
practice_3_FilterTest.py Normal file
View File

@@ -0,0 +1,216 @@
import os
import pandas as pd
import itertools
from filter_model_3 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def find_filter_method(self, df_ball, filter_ball=None):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
for i in range(len(df_ball)-1, 19, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,}".format(len(no_filter_ball), len(df_ball)))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
resources_path = 'resources'
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path)
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, filter_ball)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

490
practice_3_new.py Normal file
View File

@@ -0,0 +1,490 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
import json
import os
import copy
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from filter_model_3 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
TARGET_MIN_SURVIVORS = 30
TARGET_MAX_SURVIVORS = 150
PREDICT_TIMEOUT_SECONDS = 180
def __init__(self, resources_path):
self.bot = TelegramBot()
self.resources_path = resources_path
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6, 7, 10, 11, 20, 45])
result_json.append([2, 7, 17, 28, 35, 39])
result_json.append([6, 10, 19, 25, 33, 35])
result_json.append([3, 17, 20, 24, 35, 45])
result_json.append([5, 15, 18, 29, 36, 41])
result_json.append([6, 15, 20, 23, 37, 43])
result_json.append([8, 15, 19, 23, 38, 41])
result_json.append([5, 11, 19, 24, 40, 45])
result_json.append([9, 16, 18, 23, 35, 43])
result_json.append([7, 13, 19, 28, 33, 44])
result_json.append([7, 11, 18, 29, 37, 42])
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
no = BallFilter(lottoHistoryFileName).getNextNo(ymd)
print("회차: {}".format(no))
predict_start_ts = time.time()
deadline_ts = predict_start_ts + self.PREDICT_TIMEOUT_SECONDS
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = sorted(p_ball[1:7])
# 기본/강화/완화 단계별 ruleset
base_ruleset = self._get_base_ruleset()
tighten_rulesets = [
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": True,
"ban_triples_legacy": True,
"all_in_previous7": True,
"previous_neighbors": True,
},
allowed_overrides={
"ac_value": [8, 9],
"uniq_last_digit_count": [4, 5],
"even_count": [2, 3, 4],
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": True,
"ban_triples_legacy": True,
"all_in_previous7": True,
"previous_neighbors": True,
},
allowed_overrides={
"ac_value": [8, 9],
"uniq_last_digit_count": [4, 5],
"even_count": [2, 3, 4],
"sum": [112, 114, 121, 123, 126, 127, 131, 132, 138, 146, 148],
"sum_prev_diff": [13, 14, 17, 18, 26, 28, 29, 30, 32, 39, 40],
},
),
]
relax_rulesets = [
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
"previous_neighbors": False,
"all_in_previous7": False,
},
),
self._build_ruleset(
base_ruleset=base_ruleset,
enabled_overrides={
"paper_patterns": False,
"ban_triples_legacy": False,
"previous_neighbors": False,
"all_in_previous7": False,
"weeks_8_count": False,
"weeks_12_count": False,
"weeks_16_count": False,
"weeks_20_count": False,
},
),
]
min_survivors = self.TARGET_MIN_SURVIVORS
max_survivors = self.TARGET_MAX_SURVIVORS
chosen = []
stage_name = "base"
current_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=base_ruleset,
stop_when_gt=max_survivors,
stage_name="base",
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
current = current_info["candidates"]
if current_info["timed_out"]:
chosen = self._finalize_on_timeout(current, p_ball, min_survivors, max_survivors)
stage_name = "base_timeout_fallback"
print("candidate_stage: {}, survivors: {}".format(stage_name, len(chosen)))
for ball in chosen:
result_json.append(ball)
return p_no, p_ball
if min_survivors <= len(current) <= max_survivors:
chosen = current
elif len(current) > max_survivors:
chosen = current
stage_name = "base_overflow"
for idx, rs in enumerate(tighten_rulesets, start=1):
t_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=rs,
stop_when_gt=max_survivors,
stage_name="tighten_{}".format(idx),
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
t = t_info["candidates"]
if t_info["timed_out"]:
chosen = self._finalize_on_timeout(t, p_ball, min_survivors, max_survivors)
stage_name = "tighten_{}_timeout_fallback".format(idx)
break
if min_survivors <= len(t) <= max_survivors:
chosen = t
stage_name = "tighten_{}".format(idx)
break
if len(t) <= max_survivors:
chosen = t
stage_name = "tighten_{}".format(idx)
if len(chosen) > max_survivors:
# 상한 가드 강제 적용: 품질 점수 상위 N개만 사용
full_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=tighten_rulesets[-1],
stop_when_gt=None,
stage_name="tighten_full_rank",
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
full_for_ranking = full_info["candidates"]
if full_info["timed_out"]:
chosen = self._finalize_on_timeout(full_for_ranking, p_ball, min_survivors, max_survivors)
stage_name = "tighten_rank_timeout_fallback"
else:
chosen = self._rank_and_trim(full_for_ranking, p_ball, max_survivors)
stage_name = "tighten_rank_trim"
else:
chosen = current
stage_name = "base_underflow"
for idx, rs in enumerate(relax_rulesets, start=1):
# relax는 하한(min_survivors)만 채우면 충분하므로 조기 종료
r_info = self._collect_candidates(
candidates=candidates,
no=no,
df_ball=df_ball,
ruleset=rs,
stop_when_gt=None,
stop_when_gte=min_survivors,
stage_name="relax_{}".format(idx),
predict_start_ts=predict_start_ts,
deadline_ts=deadline_ts,
)
r = r_info["candidates"]
chosen = r
stage_name = "relax_{}".format(idx)
if r_info["timed_out"]:
chosen = self._finalize_on_timeout(r, p_ball, min_survivors, max_survivors)
stage_name = "relax_{}_timeout_fallback".format(idx)
break
if len(r) >= min_survivors:
break
if len(chosen) == 0:
# 0개 생존 방지: 가장 완화된 규칙에서도 0개면 직전 결과와 유사한 조합으로 최소 개수 확보
stage_name = "relax_zero_fallback"
chosen = self._fallback_candidates_from_prev(p_ball, min_survivors)
elif len(chosen) < min_survivors:
# 하한 가드: 부족분은 완화 후보/고정 후보 기반으로 보강
stage_name = "{}_fill".format(stage_name)
fill = self._fallback_candidates_from_prev(p_ball, min_survivors - len(chosen), exclude=set(tuple(x) for x in chosen))
chosen.extend(fill)
print("candidate_stage: {}, survivors: {}".format(stage_name, len(chosen)))
for ball in chosen:
result_json.append(ball)
return p_no, p_ball
def _get_base_ruleset(self):
history_json = os.path.join(self.resources_path, "lotto_history.json")
base_filter = BallFilter(history_json)
return copy.deepcopy(base_filter.m1.ruleset)
def _build_ruleset(self, base_ruleset, enabled_overrides=None, allowed_overrides=None):
ruleset = copy.deepcopy(base_ruleset)
ruleset.setdefault("filters", {})
enabled_overrides = enabled_overrides or {}
allowed_overrides = allowed_overrides or {}
for key, value in enabled_overrides.items():
ruleset["filters"].setdefault(key, {})
ruleset["filters"][key]["enabled"] = bool(value)
for key, values in allowed_overrides.items():
ruleset["filters"].setdefault(key, {})
ruleset["filters"][key]["enabled"] = True
ruleset["filters"][key]["allowed"] = list(values)
return ruleset
def _collect_candidates(
self,
candidates,
no,
df_ball,
ruleset,
stop_when_gt=None,
stop_when_gte=None,
stage_name="base",
predict_start_ts=None,
deadline_ts=None,
):
lottoHistoryFileName = os.path.join(self.resources_path, "lotto_history.json")
ballFilter = BallFilter(lottoHistoryFileName, ruleset=ruleset)
result = []
last_idx = 0
for idx, ball in enumerate(itertools.combinations(candidates, 6), start=1):
last_idx = idx
if deadline_ts is not None and deadline_ts <= time.time():
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
print(" - [{}] timeout after {:,} processed (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
return {
"candidates": result,
"timed_out": True,
"processed": idx,
}
if idx % 1000000 == 0:
elapsed = (time.time() - predict_start_ts) if predict_start_ts is not None else 0.0
print(" - [{}] {:,} processed... (elapsed: {:.1f}s, survivors: {:,})".format(stage_name, idx, elapsed, len(result)))
b = list(ball)
if len(ballFilter.filter(ball=b, no=no, until_end=False, df=df_ball)) == 0:
result.append(b)
if stop_when_gt is not None and len(result) > stop_when_gt:
return {
"candidates": result,
"timed_out": False,
"processed": idx,
}
if stop_when_gte is not None and len(result) >= stop_when_gte:
return {
"candidates": result,
"timed_out": False,
"processed": idx,
}
return {
"candidates": result,
"timed_out": False,
"processed": last_idx,
}
def _finalize_on_timeout(self, partial_candidates, prev_ball, min_survivors, max_survivors):
chosen = list(partial_candidates)
if len(chosen) > max_survivors:
chosen = self._rank_and_trim(chosen, prev_ball, max_survivors)
elif len(chosen) < min_survivors:
fill = self._fallback_candidates_from_prev(
prev_ball,
min_survivors - len(chosen),
exclude=set(tuple(x) for x in chosen),
)
chosen.extend(fill)
return chosen
def _rank_and_trim(self, candidates, prev_ball, limit):
scored = [(self._score_candidate(ball, prev_ball), ball) for ball in candidates]
scored.sort(key=lambda x: x[0])
return [ball for _, ball in scored[:limit]]
def _score_candidate(self, ball, prev_ball):
sum_diff = abs(sum(ball) - sum(prev_ball))
even_cnt = len([x for x in ball if x % 2 == 0])
uniq_last = len(set([x % 10 for x in ball]))
contiguous_penalty = 0
s = sorted(ball)
for i in range(1, len(s)):
if s[i] - s[i - 1] == 1:
contiguous_penalty += 1
score = 0
score += sum_diff
score += abs(even_cnt - 3) * 2
score += abs(uniq_last - 5) * 2
score += contiguous_penalty
return score
def _fallback_candidates_from_prev(self, prev_ball, need_count, exclude=None):
exclude = exclude or set()
seed = sorted(prev_ball)
out = []
delta_patterns = [
(0, 0, 0, 0, 0, 0),
(-1, 0, 0, 0, 0, 1),
(0, -1, 0, 0, 1, 0),
(0, 0, -1, 1, 0, 0),
(-2, 0, 0, 0, 0, 2),
(0, -2, 0, 0, 2, 0),
(0, 0, -2, 2, 0, 0),
(-1, -1, 0, 0, 1, 1),
(1, 0, -1, 0, 0, 0),
(0, 1, 0, -1, 0, 0),
(1, -1, 1, -1, 1, -1),
(-1, 1, -1, 1, -1, 1),
]
shift = 0
while len(out) < need_count and shift <= 8:
for delta in delta_patterns:
cand = [seed[i] + delta[i] for i in range(6)]
cand = [min(45, max(1, v + shift)) for v in cand]
cand = sorted(cand)
if len(set(cand)) != 6:
continue
t = tuple(cand)
if t in exclude:
continue
exclude.add(t)
out.append(cand)
if len(out) >= need_count:
break
shift += 1
return out
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

189
practice_3_old.py Normal file
View File

@@ -0,0 +1,189 @@
# 웹 호출 라이브러리를 호출합니다.
import time
import requests
from DataCrawler import DataCrawler
import json
import os
import pandas as pd
import itertools
from datetime import datetime, timedelta
from TelegramBot import TelegramBot
from filter_model_3 import BallFilter
class Practice:
bot = None
preprocessor = None
predictor = None
extract_count = None
def __init__(self, resources_path):
self.bot = TelegramBot()
return
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일
def craw(self, lottoHistoryFile, drwNo=None):
ball = None
if drwNo != None:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8")
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
return None
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
else:
# 로또 데이터를 저장할 파일을 선언합니다.
jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8")
textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8")
# 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다.
idx = 1
while True:
# 1회차부터 지정된 회차까지의 URL을 생성합니다.
url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx)
# URL을 호출합니다.
res = requests.post(url)
# 호출한 결과에 대해서 Json 포맷을 가져옵니다.
result = res.json()
if result['returnValue'] != 'success':
break
# 가져온 Json 포맷을 파일로 저장합니다.
jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n")
textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
print("%d,%d,%d,%d,%d,%d,%d,%d" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']))
ball = [result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo']]
idx += 1
time.sleep(0.5)
# 저장한 파일을 종료합니다.
jsonFp.close()
textFp.close()
return ball
def predict1(self, result_json):
result_json.append([6, 7, 10, 11, 20, 45])
result_json.append([2, 7, 17, 28, 35, 39])
result_json.append([6, 10, 19, 25, 33, 35])
result_json.append([3, 17, 20, 24, 35, 45])
result_json.append([5, 15, 18, 29, 36, 41])
result_json.append([6, 15, 20, 23, 37, 43])
result_json.append([8, 15, 19, 23, 38, 41])
result_json.append([5, 11, 19, 24, 40, 45])
result_json.append([9, 16, 18, 23, 35, 43])
result_json.append([7, 13, 19, 28, 33, 44])
result_json.append([7, 11, 18, 29, 37, 42])
return
def predict2(self, resources_path, ymd, result_json):
candidates = [i for i in range(1, 46)]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
ballFilter = BallFilter(lottoHistoryFileName)
no = ballFilter.getNextNo(ymd)
print("회차: {}".format(no))
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
#filter_ball=[1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44]
nCr = list(itertools.combinations(candidates, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
result_json.append(ball)
p_ball = df_ball[df_ball['no'] == no - 1].values.tolist()[0]
p_no = p_ball[0]
p_ball = p_ball[1:7]
return p_no, p_ball
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
today = datetime.today()
if today.weekday() == 5:
if today.hour > 20:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
elif today.weekday() == 6:
this_weekend = today + timedelta(days=(12 - today.weekday()))
else:
this_weekend = today + timedelta(days=(5 - today.weekday()))
last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y%m%d')
ymd = this_weekend.strftime('%Y%m%d')
print("ymd: {}".format(ymd))
# 로또 예측
practice = Practice(resources_path)
# 데이터 수집
lottoHistoryFile = PROJECT_HOME + '/resources/lotto_history'
lottoHistoryFileName = lottoHistoryFile + '.json'
with open(lottoHistoryFileName, "r", encoding='utf-8') as f:
for line in f:
if line != '\n':
last_json = json.loads(line)
#ball = practice.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1)
result_json = {ymd: []}
# 매주 고정
practice.predict1(result_json[ymd])
# 필터 기반 예측
p_no, p_ball = practice.predict2(resources_path, ymd, result_json[ymd])
p_str = "[지난주] {}\n - {} 회차, {}\n[금주] {}\n - {} 회차\n[모델#25]\n".format(last_weekend, p_no, str(p_ball), ymd, (p_no + 1))
for i, ball in enumerate(result_json[ymd]):
p_str += " {}. {}\n".format((i+1), str(ball))
if (i+1) % 100 == 0:
practice.bot.sendMsg("{}".format(p_str))
p_str = ''
if len(result_json[ymd]) % 100 != 0:
practice.bot.sendMsg("{}".format(p_str))
size = len(result_json[ymd])
print("size: {}".format(size))
# https://youtu.be/QjBsui8Ob14?si=4dC3q8p0Yu5ZWK1K
# https://www.youtube.com/watch?v=YwiHaa1KNwA
print("done...")

25
requirements.txt Normal file
View File

@@ -0,0 +1,25 @@
#numpy==1.24.3
#pandas==2.0.3
#scikit-learn==1.3.0
#requests==2.31.0
bs4
numpy>=1.21.0
pandas>=1.3.0
scikit-learn>=1.0.0
requests>=2.25.0
python-telegram-bot>=13.0
# 고급 머신러닝
xgboost>=1.5.0
lightgbm>=3.2.0
# 통계 분석
scipy>=1.7.0
# 웹 스크래핑
beautifulsoup4>=4.9.0
# 데이터 시각화 (선택사항)
matplotlib>=3.3.0
seaborn>=0.11.0

1212
resources/lotto_history.json Normal file

File diff suppressed because it is too large Load Diff

1212
resources/lotto_history.txt Normal file

File diff suppressed because it is too large Load Diff

99
review_1.py Normal file
View File

@@ -0,0 +1,99 @@
import os
import time
import datetime
import pandas as pd
import itertools
from filter_model_1 import BallFilter
class FilterTestReview:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def validate(self, df_ball, nos=None):
win_history = {}
win_history_size = {}
for no in nos:
print("[{} 회차]".format(no))
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = balls[1:7].copy() # copy()로 복사
bonus = balls[7]
final_candidates = []
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
final_candidates.append(ball)
match = len(set(ball) & set(answer))
if match == 6:
if no not in win_history: # 중복 방지
win_history[no] = answer.copy() # copy()로 복사
if ball not in win_dic[1]: # 같은 조합 중복 방지
win_dic[1].append(ball.copy()) # copy()로 복사
else:
if match == 3:
win_dic[5].append(ball)
elif match == 4:
win_dic[4].append(ball)
elif match == 5:
# 2등 판별: 5개 맞고 보너스 번호 포함
if bonus in ball:
win_dic[2].append(ball)
else:
win_dic[3].append(ball)
win_history_size[no] = len(final_candidates)
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
return win_history, win_history_size
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filterTestReview = FilterTestReview(resources_path)
start = time.time()
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
win_history, win_history_size = filterTestReview.validate(
df_ball,
#nos=range(1126, 21, -1),
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print("{} 회 당첨".format(len(win_history)))
sorted_win_history = sorted(win_history.keys())
for i in range(len(sorted_win_history)):
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))

99
review_2.py Normal file
View File

@@ -0,0 +1,99 @@
import os
import time
import datetime
import pandas as pd
import itertools
from filter_model_2 import BallFilter
class FilterTestReview:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def validate(self, df_ball, nos=None):
win_history = {}
win_history_size = {}
for no in nos:
print("[{} 회차]".format(no))
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = balls[1:7].copy() # copy()로 복사
bonus = balls[7]
final_candidates = []
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
final_candidates.append(ball)
match = len(set(ball) & set(answer))
if match == 6:
if no not in win_history: # 중복 방지
win_history[no] = answer.copy() # copy()로 복사
if ball not in win_dic[1]: # 같은 조합 중복 방지
win_dic[1].append(ball.copy()) # copy()로 복사
else:
if match == 3:
win_dic[5].append(ball)
elif match == 4:
win_dic[4].append(ball)
elif match == 5:
# 2등 판별: 5개 맞고 보너스 번호 포함
if bonus in ball:
win_dic[2].append(ball)
else:
win_dic[3].append(ball)
win_history_size[no] = len(final_candidates)
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
return win_history, win_history_size
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filterTestReview = FilterTestReview(resources_path)
start = time.time()
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
win_history, win_history_size = filterTestReview.validate(
df_ball,
#nos=range(1126, 21, -1),
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print("{} 회 당첨".format(len(win_history)))
sorted_win_history = sorted(win_history.keys())
for i in range(len(sorted_win_history)):
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))

99
review_3.py Normal file
View File

@@ -0,0 +1,99 @@
import os
import time
import datetime
import pandas as pd
import itertools
from filter_model_3 import BallFilter
class FilterTestReview:
ballFilter = None
def __init__(self, resources_path):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName)
return
def validate(self, df_ball, nos=None):
win_history = {}
win_history_size = {}
for no in nos:
print("[{} 회차]".format(no))
balls = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = balls[1:7].copy() # copy()로 복사
bonus = balls[7]
final_candidates = []
win_dic = {1: [], 2: [], 3: [], 4: [], 5: []}
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if 0 < filter_size:
continue
final_candidates.append(ball)
match = len(set(ball) & set(answer))
if match == 6:
if no not in win_history: # 중복 방지
win_history[no] = answer.copy() # copy()로 복사
if ball not in win_dic[1]: # 같은 조합 중복 방지
win_dic[1].append(ball.copy()) # copy()로 복사
else:
if match == 3:
win_dic[5].append(ball)
elif match == 4:
win_dic[4].append(ball)
elif match == 5:
# 2등 판별: 5개 맞고 보너스 번호 포함
if bonus in ball:
win_dic[2].append(ball)
else:
win_dic[3].append(ball)
win_history_size[no] = len(final_candidates)
print("no: {}, answer: {}, size: {}".format(no, answer, len(final_candidates)))
print(" > 1등: {}, 2등: {}, 3등: {}, 4등: {}, 5등: {}".format(len(win_dic[1]), len(win_dic[2]), len(win_dic[3]), len(win_dic[4]), len(win_dic[5])))
return win_history, win_history_size
if __name__ == '__main__':
PROJECT_HOME = '.'
resources_path = os.path.join(PROJECT_HOME, 'resources')
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filterTestReview = FilterTestReview(resources_path)
start = time.time()
#win_history = filterTest.validate(df_ball, nos =[1046,1022,1004,900,869,816,797,696,574,524,523,461,356,324,303,289,147,71], filter_ball = [1,2,4,6,10,11,11,17,18,20,21,22,23,24,26,27,28,30,31,32,33,34,37,38,39,40,42,44])
win_history, win_history_size = filterTestReview.validate(
df_ball,
#nos=range(1126, 21, -1),
nos=[1057,1046,1022,900,841,816,696,593,574,426,356,324,303,245,147,139,71])
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print("{} 회 당첨".format(len(win_history)))
sorted_win_history = sorted(win_history.keys())
for i in range(len(sorted_win_history)):
print("\t>{} > {} ({})".format(sorted_win_history[i], win_history[sorted_win_history[i]], win_history_size[sorted_win_history[i]]))

236
test_1.py Normal file
View File

@@ -0,0 +1,236 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_1 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
for i in range(len(df_ball) - 1, -1, -1):
no = df_ball['no'].iloc[i]
no = int(no)
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
)
parser.add_argument("--start-no", type=int, default=1001)
parser.add_argument("--end-no", type=int, default=1204)
args = parser.parse_args()
resources_path = args.resources
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

236
test_2.py Normal file
View File

@@ -0,0 +1,236 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_2 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
for i in range(len(df_ball) - 1, -1, -1):
no = df_ball['no'].iloc[i]
no = int(no)
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
)
parser.add_argument("--start-no", type=int, default=1001)
parser.add_argument("--end-no", type=int, default=1204)
args = parser.parse_args()
resources_path = args.resources
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

236
test_3.py Normal file
View File

@@ -0,0 +1,236 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_3 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# test는 이전회차/최근 N주 윈도우 feature가 필수이므로 전체 히스토리(json)를 사용해야 한다.
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no, filter_ball=None):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# df_ball 은 전체 히스토리일 수 있으며, 채점은 [start_no, end_no] 범위만 수행한다.
for i in range(len(df_ball) - 1, -1, -1):
no = df_ball['no'].iloc[i]
no = int(no)
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (optional). Default: filter_model_1.py 내장 ruleset 사용",
)
parser.add_argument("--start-no", type=int, default=1001)
parser.add_argument("--end-no", type=int, default=1204)
args = parser.parse_args()
resources_path = args.resources
# 전체 히스토리 txt를 사용해 previous/window feature를 정상 계산하되, 채점은 test 범위만 수행한다.
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no, filter_ball=filter_ball)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

231
train_1.py Normal file
View File

@@ -0,0 +1,231 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_1 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
for i in range(len(df_ball)-1, 19, -1):
no = df_ball['no'].iloc[i]
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=1)
parser.add_argument("--end-no", type=int, default=800)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

231
train_2.py Normal file
View File

@@ -0,0 +1,231 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_2 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
for i in range(len(df_ball)-1, 19, -1):
no = df_ball['no'].iloc[i]
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=1)
parser.add_argument("--end-no", type=int, default=800)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

231
train_3.py Normal file
View File

@@ -0,0 +1,231 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_3 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None):
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.json')
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
for i in range(len(df_ball)-1, 19, -1):
no = df_ball['no'].iloc[i]
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), len(df_ball), 100 * len(no_filter_ball) / len(df_ball)))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=1)
parser.add_argument("--end-no", type=int, default=800)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset)
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

234
valid_1.py Normal file
View File

@@ -0,0 +1,234 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_1 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# validation should use full history for previous-draw/window features
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# evaluate only requested range, but allow df_ball to contain full history
for i in range(len(df_ball) - 1, -1, -1):
no = int(df_ball['no'].iloc[i])
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=801)
parser.add_argument("--end-no", type=int, default=1000)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

234
valid_2.py Normal file
View File

@@ -0,0 +1,234 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_2 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# validation should use full history for previous-draw/window features
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# evaluate only requested range, but allow df_ball to contain full history
for i in range(len(df_ball) - 1, -1, -1):
no = int(df_ball['no'].iloc[i])
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=801)
parser.add_argument("--end-no", type=int, default=1000)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개

234
valid_3.py Normal file
View File

@@ -0,0 +1,234 @@
import os
import argparse
import pandas as pd
import itertools
from filter_model_3 import BallFilter
import time
import datetime
class FilterTest:
ballFilter = None
def __init__(self, resources_path, ruleset_path=None, history_json="lotto_history.json"):
# validation should use full history for previous-draw/window features
lottoHistoryFileName = os.path.join(resources_path, history_json)
self.ballFilter = BallFilter(lottoHistoryFileName, ruleset_path=ruleset_path)
return
def find_filter_method(self, df_ball, start_no, end_no):
win_count = 0
no_filter_ball = {}
printLog = True
filter_dic = {}
filter_dic_len = {}
filter_dic_1 = {}
filter_dic_2 = {}
# evaluate only requested range, but allow df_ball to contain full history
for i in range(len(df_ball) - 1, -1, -1):
no = int(df_ball['no'].iloc[i])
if no < start_no or end_no < no:
continue
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
filter_type = self.ballFilter.filter(ball=answer, no=no, until_end=True, df=df_ball)
filter_type = list(filter_type)
size = len(filter_type)
if size == 0:
win_count += 1
no_filter_ball[no] = answer
print("\t", no)
elif size == 1:
key = filter_type[0]
if key not in filter_dic_1:
filter_dic_1[key] = 1
else:
filter_dic_1[key] += 1
if printLog:
print("\t", no, filter_type)
elif size == 2:
key = ','.join(filter_type)
if key not in filter_dic_2:
filter_dic_2[key] = 1
else:
filter_dic_2[key] += 1
if printLog:
print("\t", no, filter_type)
else:
if printLog:
print("\t", no, filter_type)
# 회차별 필터개수가 적은 것을 정렬하기 위함
if size not in filter_dic_len:
filter_dic_len[size] = []
filter_dic_len[size].append(filter_type)
for f_t in filter_type:
if f_t not in filter_dic:
filter_dic[f_t] = 1
else:
filter_dic[f_t] += 1
print("\n\t[필터 개수가 적은 것부터 최적화를 위함]")
sorted_filter_dic_len = sorted(filter_dic_len.keys())
for filter_count in sorted_filter_dic_len:
for filter_type in filter_dic_len[filter_count]:
print("\t\t>{} > {}".format(filter_count, filter_type))
print("\n\t[걸러진 유일 필터]")
sorted_filter_dic_1 = sorted(filter_dic_1.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_1)):
print("\t\t>", sorted_filter_dic_1[i][0], "->", sorted_filter_dic_1[i][1])
print("\n\t[2개 필터에 걸린 경우]")
sorted_filter_dic_2 = sorted(filter_dic_2.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic_2)):
print("\t\t>", sorted_filter_dic_2[i][0], "->", sorted_filter_dic_2[i][1])
print("\n\t[Filter 유형 별 걸린 개수]")
sorted_filter_dic = sorted(filter_dic.items(), key=lambda x: x[1], reverse=True)
for i in range(len(sorted_filter_dic)):
print("\t\t>", sorted_filter_dic[i][0], "->", sorted_filter_dic[i][1])
print("\n\t# 필터에 걸리지 않고 당첨된 회차")
total = max(0, end_no - start_no + 1)
rate = (100 * len(no_filter_ball) / total) if total else 0.0
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
for no in no_filter_ball:
print("\t\t>", no, no_filter_ball[no])
print("\tcount: {:,} / total: {:,} ({:.2})%".format(len(no_filter_ball), total, rate))
return win_count
def find_final_candidates(self, no, df_ball, filter_ball=None):
final_candidates = []
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
if filter_ball is not None and 0 < len(set(ball) & set(filter_ball)):
continue
filter_type = self.ballFilter.filter(ball=ball, no=no, until_end=False, df=df_ball)
filter_size = len(filter_type)
if filter_size:
continue
final_candidates.append(ball)
return final_candidates
def check_filter_method(self, df_ball, p_win_count, filter_ball=None):
win_count = 0
for i in range(len(df_ball)-1, 0, -1):
no = df_ball['no'].iloc[i]
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
if filter_ball is not None and len(set(answer) & set(filter_ball)):
continue
filter_type = self.ballFilter.extract_final_candidates(answer, no=no, until_end=True, df=df_ball)
if len(filter_type) == 0:
win_count += 1
print("\t\t>{}. {}".format(no, answer))
print("\n\t> {} / {} p_win_count, {} total".format( win_count, p_win_count, len(df_ball)-1) )
return
def validate(self, df_ball, nos=None):
win_history = {}
for no in nos:
print(no, "processing...")
answer = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = answer[1:7]
generation_balls = list(range(1, 46))
nCr = list(itertools.combinations(generation_balls, 6))
for idx, ball in enumerate(nCr):
if idx % 1000000 == 0:
print(" - {} processed...".format(idx))
ball = list(ball)
filter_type = self.ballFilter.extract_final_candidates(ball, no=no, until_end=True, df=df_ball)
if 0 == len(filter_type) and len(set(answer)&set(ball))==6:
win_history[no] = answer
print("win.. no: {}, answer: {}".format(no, str(answer)))
break
return win_history
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--resources", default="resources")
parser.add_argument(
"--ruleset",
default=None,
help="Ruleset JSON path (e.g. resources/rulesets/Coverage-First-S230a.json). Default: resources/rulesets/default.json if present.",
)
parser.add_argument("--start-no", type=int, default=801)
parser.add_argument("--end-no", type=int, default=1000)
args = parser.parse_args()
resources_path = args.resources
# Use full history txt to support previous-draw/window features, but only score [start_no, end_no]
lottoHistoryFileName = os.path.join(resources_path, 'lotto_history.txt')
df_ball = pd.read_csv(lottoHistoryFileName, header=None)
df_ball.columns = ['no', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'bn']
filter_ball=[]
filterTest = FilterTest(resources_path, ruleset_path=args.ruleset, history_json='lotto_history.json')
print("STEP #1. 필터 방법 추출")
start = time.time()
win_count = filterTest.find_filter_method(df_ball, start_no=args.start_no, end_no=args.end_no)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
"""
print("\n\n")
no = df_ball['no'].values[-1]
ball = df_ball[df_ball['no'] == no].values.tolist()[0]
answer = ball[1:7]
print("STEP #0. 최종 후보 선정")
start = time.time()
final_candidates = filterTest.find_final_candidates(no, df_ball, filter_ball=None)
process_time = datetime.timedelta(seconds=time.time() - start)
print("process_time: ", process_time)
print(" > size: {}".format(len(final_candidates)))
file_name = os.path.join(resources_path, 'final_candidates.biz_a1.txt')
with open(file_name, 'w+') as outFp:
for ball in final_candidates:
ball_str = [str(b) for b in answer]
outFp.write("{}\n".format(','.join(ball_str)))
print('{}회, 정답: {}\n'.format(no, str(answer)))
"""
#print("\n\n")
#print("STEP #2. 당첨 회수 확인")
#filterTest.check_filter_method(df_ball, win_count)
# 오리지널 버전 (자질 파일에 고정): 당첨 22개