From 8628b25b9b53afffc55c961ad7fcef5c3152569b Mon Sep 17 00:00:00 2001 From: dsyoon Date: Sat, 17 Jan 2026 20:05:36 +0900 Subject: [PATCH] Fix news OG encoding --- backend/app.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/backend/app.py b/backend/app.py index d90efb4..1ee269f 100644 --- a/backend/app.py +++ b/backend/app.py @@ -405,17 +405,27 @@ class AiNewsCreateDTO(BaseModel): author_id: Optional[str] = None author_email: Optional[str] = None +def _decode_html(resp: requests.Response) -> str: + encoding = resp.encoding + if not encoding or encoding.lower() == "iso-8859-1": + encoding = resp.apparent_encoding + try: + return resp.content.decode(encoding or "utf-8", errors="replace") + except Exception: + return resp.content.decode("utf-8", errors="replace") + def _extract_og(url: str) -> dict: meta = {"title": "", "description": "", "image": "", "url": url} try: # 외부 사이트 응답 지연이 잦아 타임아웃을 짧게 유지(캐시 + 병렬로 커버) resp = requests.get(url, timeout=2.5, headers={"User-Agent": "Mozilla/5.0"}) if resp.ok: - soup = BeautifulSoup(resp.text, 'html.parser') - og_title = soup.find('meta', property='og:title') - og_desc = soup.find('meta', property='og:description') - og_img = soup.find('meta', property='og:image') - title_tag = soup.find('title') + html = _decode_html(resp) + soup = BeautifulSoup(html, "html.parser") + og_title = soup.find("meta", property="og:title") + og_desc = soup.find("meta", property="og:description") + og_img = soup.find("meta", property="og:image") + title_tag = soup.find("title") meta["title"] = (og_title["content"].strip() if og_title and og_title.get("content") else (title_tag.text.strip() if title_tag else "")) meta["description"] = (og_desc["content"].strip() if og_desc and og_desc.get("content") else "") meta["image"] = (og_img["content"].strip() if og_img and og_img.get("content") else "")