diff --git a/DataCrawler.py b/DataCrawler.py index 5d7caea..f8734c3 100644 --- a/DataCrawler.py +++ b/DataCrawler.py @@ -1,12 +1,45 @@ # 웹 호출 라이브러리를 호출합니다. import os -import pandas as pd import time -from datetime import datetime, timedelta import requests # JSON 포맷을 다루기 위한 라이브러리를 호출합니다. import json -from TelegramBot import TelegramBot + +import urllib3 + +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +try: + from TelegramBot import TelegramBot +except ModuleNotFoundError: + class TelegramBot: + def __init__(self, enable=True): + pass + + def sendMsg(self, msg): + pass + +_LOTTO_URL = "https://www.dhlottery.co.kr/common.do?method=getLottoNumber&drwNo={}" +_REQUEST_TIMEOUT = 15 +_BROWSER_HEADERS = { + "User-Agent": ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" + ), + "Accept": "application/json, text/javascript, */*; q=0.01", + "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7", + "Referer": "https://www.dhlottery.co.kr/gameResult.do?method=byWin", + "X-Requested-With": "XMLHttpRequest", +} + + +def _ssl_verify_arg(): + try: + import certifi + + return certifi.where() + except ImportError: + return True # 로또 데이터를 수집하기 위한 파이썬 클래스를 선언합니다. class DataCrawler: @@ -16,96 +49,165 @@ class DataCrawler: # 클래스 생성자로 수집할 회차를 입력받습니다. def __init__(self): self.bot = TelegramBot() + self._session = requests.Session() + self._session.headers.update(_BROWSER_HEADERS) - return + def _fetch_draw(self, drw_no): + """동행복권 API에서 단일 회차 결과를 가져옵니다. 실패 시 None.""" + url = _LOTTO_URL.format(int(drw_no)) + for verify in (_ssl_verify_arg(), False): + for method in ("POST", "GET"): + try: + res = self._session.request( + method, + url, + timeout=_REQUEST_TIMEOUT, + verify=verify, + ) + if res.status_code != 200: + continue + text = res.text.strip() + if not text.startswith("{"): + continue + result = json.loads(text) + except ( + requests.RequestException, + ValueError, + json.JSONDecodeError, + ): + continue + if isinstance(result, dict) and result.get("returnValue") == "success": + return result + return None + + def _append_draw_files(self, lottoHistoryFile, result): + """성공 응답 한 건을 txt/json에 이어 씁니다.""" + drw_no = result["drwNo"] + json_path = lottoHistoryFile + ".json" + txt_path = lottoHistoryFile + ".txt" + with open(json_path, "a", encoding="utf-8") as json_fp: + json_fp.write(json.dumps(result, ensure_ascii=False) + "\n") + with open(txt_path, "a", encoding="utf-8") as text_fp: + text_fp.write( + "%d,%d,%d,%d,%d,%d,%d,%d\n" + % ( + drw_no, + result["drwtNo1"], + result["drwtNo2"], + result["drwtNo3"], + result["drwtNo4"], + result["drwtNo5"], + result["drwtNo6"], + result["bnusNo"], + ) + ) + + def _read_last_draw_from_json(self, json_path): + """JSONL 마지막 유효 레코드의 drwNo를 반환. 없으면 None.""" + if not os.path.isfile(json_path) or os.path.getsize(json_path) == 0: + return None + last_json = None + with open(json_path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + last_json = json.loads(line) + except json.JSONDecodeError: + continue + if not last_json or last_json.get("returnValue") != "success": + return None + return last_json.get("drwNo") # 로또 당첨 데이터를 수집해서 파일로 저장합니다. - # lottoHistoryFile: 로또 당첨 데이터를 저장할 파일 + # lottoHistoryFile: 로또 당첨 데이터를 저장할 파일 (확장자 제외) def craw(self, lottoHistoryFile, drwNo=None): - if drwNo != None: - # 로또 데이터를 저장할 파일을 선언합니다. - jsonFp = open(lottoHistoryFile + ".json", 'a', encoding="utf-8") - textFp = open(lottoHistoryFile + ".txt", 'a', encoding="utf-8") + if drwNo is not None: + result = self._fetch_draw(drwNo) + if result is None: + return False + self._append_draw_files(lottoHistoryFile, result) + return True - url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(drwNo) - # URL을 호출합니다. - res = requests.post(url) - # 호출한 결과에 대해서 Json 포맷을 가져옵니다. - result = res.json() - - # 가져온 Json 포맷을 파일로 저장합니다. - jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n") - - textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (drwNo, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo'])) - else: - # 로또 데이터를 저장할 파일을 선언합니다. - jsonFp = open(lottoHistoryFile + ".json", 'w', encoding="utf-8") - textFp = open(lottoHistoryFile + ".txt", 'w', encoding="utf-8") - - # 1회차부터 지정된 회차까지 로또 당첨 번호를 수집합니다. + json_path = lottoHistoryFile + ".json" + text_path = lottoHistoryFile + ".txt" + with open(json_path, "w", encoding="utf-8") as json_fp, open( + text_path, "w", encoding="utf-8" + ) as text_fp: idx = 1 while True: - # 1회차부터 지정된 회차까지의 URL을 생성합니다. - url = 'https://dhlottery.co.kr/common.do?method=getLottoNumber&drwNo=' + str(idx) - # URL을 호출합니다. - res = requests.post(url) - # 호출한 결과에 대해서 Json 포맷을 가져옵니다. - result = res.json() - if result['returnValue'] != 'success': + result = self._fetch_draw(idx) + if result is None: break - # 가져온 Json 포맷을 파일로 저장합니다. - jsonFp.write(json.dumps(result, ensure_ascii=False) + "\n") - textFp.write("%d,%d,%d,%d,%d,%d,%d,%d\n" % (idx, result['drwtNo1'], result['drwtNo2'], result['drwtNo3'], result['drwtNo4'], result['drwtNo5'], result['drwtNo6'], result['bnusNo'])) + json_fp.write(json.dumps(result, ensure_ascii=False) + "\n") + text_fp.write( + "%d,%d,%d,%d,%d,%d,%d,%d\n" + % ( + idx, + result["drwtNo1"], + result["drwtNo2"], + result["drwtNo3"], + result["drwtNo4"], + result["drwtNo5"], + result["drwtNo6"], + result["bnusNo"], + ) + ) idx += 1 time.sleep(0.5) - # 저장한 파일을 종료합니다. - jsonFp.close() - textFp.close() - return + return True def excute(self, resource_path): """ - # 가져올 로또 회차를 지정합니다. - sDrwNo = 915 - eDrwNo = 947 - for i in range (sDrwNo, eDrwNo+1): - # 로또 데이터를 수집하기 위한 함수를 호출합니다. - dataCrawler.crawl(lottoHistoryFile, i) + resources/lotto_history.* 를 마지막 회차 이후까지 연속으로 갱신합니다. + (기존: 날짜 비교 + 1회차만 수집 → 누락·정지가 잦음) """ + lottoHistoryFile = os.path.join(resource_path, "lotto_history") + json_path = lottoHistoryFile + ".json" - lottoHistoryFile = os.path.join(resource_path, 'lotto_history') + last_no = self._read_last_draw_from_json(json_path) + if last_no is None: + self.craw(lottoHistoryFile) + try: + self.bot.sendMsg("[Lottery Crawler] full history rebuilt (no valid json).") + except Exception: + pass + return True - today = datetime.today() - if today.weekday() == 5: - if today.hour > 20: - this_weekend = today + timedelta(days=(12 - today.weekday())) - else: - this_weekend = today + timedelta(days=(5 - today.weekday())) - elif today.weekday() == 6: - this_weekend = today + timedelta(days=(12 - today.weekday())) + added = 0 + next_no = last_no + 1 + while True: + result = self._fetch_draw(next_no) + if result is None: + break + self._append_draw_files(lottoHistoryFile, result) + added += 1 + next_no += 1 + time.sleep(0.35) + + if added == 0: + try: + self.bot.sendMsg( + "[Lottery Crawler] up to date (last drwNo={}).".format(last_no) + ) + except Exception: + pass else: - this_weekend = today + timedelta(days=(5 - today.weekday())) - - last_weekend = (this_weekend - timedelta(days=7)).strftime('%Y-%m-%d') - - lottoHistoryFileName = lottoHistoryFile + '.json' - with open(lottoHistoryFileName, "r", encoding='utf-8') as f: - for line in f: - if line != '\n': - last_json = json.loads(line) - - if 'returnValue' not in last_json or last_json['returnValue'] == 'fail': - return False - - if last_json['drwNoDate'] == last_weekend: - self.bot.sendMsg("[Lottery Crawler] {} already existed..".format(last_weekend)) - else: - self.craw(lottoHistoryFile, drwNo=last_json['drwNo'] + 1) - self.bot.sendMsg("[Lottery Crawler] {} ({}) crawled..".format(last_weekend, last_json['drwNo'] + 1)) - + try: + self.bot.sendMsg( + "[Lottery Crawler] appended {} draw(s), last drwNo={}.".format( + added, last_no + added + ) + ) + except Exception: + pass return True + # 오타 호환: 기존 코드에서 excute 를 쓰고 있음 + execute = excute + if __name__ == "__main__": PROJECT_HOME = '.' resource_path = os.path.join(PROJECT_HOME, 'resources') diff --git a/resources/lotto_history.json b/resources/lotto_history.json index 844735a..b864e84 100644 --- a/resources/lotto_history.json +++ b/resources/lotto_history.json @@ -1211,3 +1211,8 @@ {"returnValue": "success", "drwNoDate": "2026-02-14", "drwNo": 1211, "drwtNo1": 23, "drwtNo2": 26, "drwtNo3": 27, "drwtNo4": 35, "drwtNo5": 38, "drwtNo6": 40, "bnusNo": 10} {"returnValue": "success", "drwNoDate": "2026-02-21", "drwNo": 1212, "drwtNo1": 5, "drwtNo2": 8, "drwtNo3": 25, "drwtNo4": 31, "drwtNo5": 41, "drwtNo6": 44, "bnusNo": 45} {"returnValue": "success", "drwNoDate": "2026-02-28", "drwNo": 1213, "drwtNo1": 5, "drwtNo2": 11, "drwtNo3": 25, "drwtNo4": 27, "drwtNo5": 36, "drwtNo6": 38, "bnusNo": 2} +{"returnValue": "success", "drwNoDate": "2026-03-07", "drwNo": 1214, "drwtNo1": 10, "drwtNo2": 15, "drwtNo3": 19, "drwtNo4": 27, "drwtNo5": 30, "drwtNo6": 33, "bnusNo": 14} +{"returnValue": "success", "drwNoDate": "2026-03-14", "drwNo": 1215, "drwtNo1": 13, "drwtNo2": 15, "drwtNo3": 19, "drwtNo4": 21, "drwtNo5": 44, "drwtNo6": 45, "bnusNo": 39} +{"returnValue": "success", "drwNoDate": "2026-03-21", "drwNo": 1216, "drwtNo1": 3, "drwtNo2": 10, "drwtNo3": 14, "drwtNo4": 15, "drwtNo5": 23, "drwtNo6": 24, "bnusNo": 25} +{"returnValue": "success", "drwNoDate": "2026-03-28", "drwNo": 1217, "drwtNo1": 8, "drwtNo2": 10, "drwtNo3": 15, "drwtNo4": 20, "drwtNo5": 29, "drwtNo6": 31, "bnusNo": 41} +{"returnValue": "success", "drwNoDate": "2026-04-04", "drwNo": 1218, "drwtNo1": 3, "drwtNo2": 28, "drwtNo3": 31, "drwtNo4": 32, "drwtNo5": 42, "drwtNo6": 45, "bnusNo": 25} diff --git a/resources/lotto_history.txt b/resources/lotto_history.txt index 7684da7..5ad2bfd 100644 --- a/resources/lotto_history.txt +++ b/resources/lotto_history.txt @@ -1211,3 +1211,8 @@ 1211,23,26,27,35,38,40,10 1212,5,8,25,31,41,44,45 1213,5,11,25,27,36,38,2 +1214,10,15,19,27,30,33,14 +1215,13,15,19,21,44,45,39 +1216,3,10,14,15,23,24,25 +1217,8,10,15,20,29,31,41 +1218,3,28,31,32,42,45,25