Files
DeepLottery/DataCrawler.py
dsyoon 2bd4ad8fcb Improve lotto DataCrawler and refresh history through draw 1218
- Use session, browser headers, POST/GET fallback, and SSL verify options
- Append missing lotto_history txt/json rows for draws 1214-1218

Made-with: Cursor
2026-04-08 16:52:55 +09:00

217 lines
7.3 KiB
Python

# 웹 호출 라이브러리를 호출합니다.
import os
import time
import requests
# JSON 포맷을 다루기 위한 라이브러리를 호출합니다.
import json
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
try:
from TelegramBot import TelegramBot
except ModuleNotFoundError:
class TelegramBot:
def __init__(self, enable=True):
pass
def sendMsg(self, msg):
pass
_LOTTO_URL = "https://www.dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}"
_REQUEST_TIMEOUT = 15
_BROWSER_HEADERS = {
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
),
"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
"Referer": "https://www.dhlottery.co.kr/gameResult.do?method=byWin",
"X-Requested-With": "XMLHttpRequest",
}
def _ssl_verify_arg():
try:
import certifi
return certifi.where()
except ImportError:
return True
# 로또 데이터를 수집하기 위한 파이썬 클래스를 선언합니다.
class DataCrawler:
bot = None
# 클래스 생성자로 수집할 회차를 입력받습니다.
def __init__(self):
self.bot = TelegramBot()
self._session = requests.Session()
self._session.headers.update(_BROWSER_HEADERS)
def _fetch_draw(self, drw_no):
"""동행복권 API에서 단일 회차 결과를 가져옵니다. 실패 시 None."""
url = _LOTTO_URL.format(int(drw_no))
for verify in (_ssl_verify_arg(), False):
for method in ("POST", "GET"):
try:
res = self._session.request(
method,
url,
timeout=_REQUEST_TIMEOUT,
verify=verify,
)
if res.status_code != 200:
continue
text = res.text.strip()
if not text.startswith("{"):
continue
result = json.loads(text)
except (
requests.RequestException,
ValueError,
json.JSONDecodeError,
):
continue
if isinstance(result, dict) and result.get("returnValue") == "success":
return result
return None
def _append_draw_files(self, lottoHistoryFile, result):
"""성공 응답 한 건을 txt/json에 이어 씁니다."""
drw_no = result["drwNo"]
json_path = lottoHistoryFile + ".json"
txt_path = lottoHistoryFile + ".txt"
with open(json_path, "a", encoding="utf-8") as json_fp:
json_fp.write(json.dumps(result, ensure_ascii=False) + "\n")
with open(txt_path, "a", encoding="utf-8") as text_fp:
text_fp.write(
"%d,%d,%d,%d,%d,%d,%d,%d\n"
% (
drw_no,
result["drwtNo1"],
result["drwtNo2"],
result["drwtNo3"],
result["drwtNo4"],
result["drwtNo5"],
result["drwtNo6"],
result["bnusNo"],
)
)
def _read_last_draw_from_json(self, json_path):
"""JSONL 마지막 유효 레코드의 drwNo를 반환. 없으면 None."""
if not os.path.isfile(json_path) or os.path.getsize(json_path) == 0:
return None
last_json = None
with open(json_path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
try:
last_json = json.loads(line)
except json.JSONDecodeError:
continue
if not last_json or last_json.get("returnValue") != "success":
return None
return last_json.get("drwNo")
# 로또 당첨 데이터를 수집해서 파일로 저장합니다.
# lottoHistoryFile: 로또 당첨 데이터를 저장할 파일 (확장자 제외)
def craw(self, lottoHistoryFile, drwNo=None):
if drwNo is not None:
result = self._fetch_draw(drwNo)
if result is None:
return False
self._append_draw_files(lottoHistoryFile, result)
return True
json_path = lottoHistoryFile + ".json"
text_path = lottoHistoryFile + ".txt"
with open(json_path, "w", encoding="utf-8") as json_fp, open(
text_path, "w", encoding="utf-8"
) as text_fp:
idx = 1
while True:
result = self._fetch_draw(idx)
if result is None:
break
json_fp.write(json.dumps(result, ensure_ascii=False) + "\n")
text_fp.write(
"%d,%d,%d,%d,%d,%d,%d,%d\n"
% (
idx,
result["drwtNo1"],
result["drwtNo2"],
result["drwtNo3"],
result["drwtNo4"],
result["drwtNo5"],
result["drwtNo6"],
result["bnusNo"],
)
)
idx += 1
time.sleep(0.5)
return True
def excute(self, resource_path):
"""
resources/lotto_history.* 를 마지막 회차 이후까지 연속으로 갱신합니다.
(기존: 날짜 비교 + 1회차만 수집 → 누락·정지가 잦음)
"""
lottoHistoryFile = os.path.join(resource_path, "lotto_history")
json_path = lottoHistoryFile + ".json"
last_no = self._read_last_draw_from_json(json_path)
if last_no is None:
self.craw(lottoHistoryFile)
try:
self.bot.sendMsg("[Lottery Crawler] full history rebuilt (no valid json).")
except Exception:
pass
return True
added = 0
next_no = last_no + 1
while True:
result = self._fetch_draw(next_no)
if result is None:
break
self._append_draw_files(lottoHistoryFile, result)
added += 1
next_no += 1
time.sleep(0.35)
if added == 0:
try:
self.bot.sendMsg(
"[Lottery Crawler] up to date (last drwNo={}).".format(last_no)
)
except Exception:
pass
else:
try:
self.bot.sendMsg(
"[Lottery Crawler] appended {} draw(s), last drwNo={}.".format(
added, last_no + added
)
)
except Exception:
pass
return True
# 오타 호환: 기존 코드에서 excute 를 쓰고 있음
execute = excute
if __name__ == "__main__":
PROJECT_HOME = '.'
resource_path = os.path.join(PROJECT_HOME, 'resources')
# 로또 데이터를 수집하기 위한 파이썬 클래스를 지정합니다.
dataCrawler = DataCrawler()
dataCrawler.excute(resource_path)