Fix news OG encoding

This commit is contained in:
dsyoon
2026-01-17 20:05:36 +09:00
parent 62e86b09d4
commit 8628b25b9b

View File

@@ -405,17 +405,27 @@ class AiNewsCreateDTO(BaseModel):
author_id: Optional[str] = None
author_email: Optional[str] = None
def _decode_html(resp: requests.Response) -> str:
encoding = resp.encoding
if not encoding or encoding.lower() == "iso-8859-1":
encoding = resp.apparent_encoding
try:
return resp.content.decode(encoding or "utf-8", errors="replace")
except Exception:
return resp.content.decode("utf-8", errors="replace")
def _extract_og(url: str) -> dict:
meta = {"title": "", "description": "", "image": "", "url": url}
try:
# 외부 사이트 응답 지연이 잦아 타임아웃을 짧게 유지(캐시 + 병렬로 커버)
resp = requests.get(url, timeout=2.5, headers={"User-Agent": "Mozilla/5.0"})
if resp.ok:
soup = BeautifulSoup(resp.text, 'html.parser')
og_title = soup.find('meta', property='og:title')
og_desc = soup.find('meta', property='og:description')
og_img = soup.find('meta', property='og:image')
title_tag = soup.find('title')
html = _decode_html(resp)
soup = BeautifulSoup(html, "html.parser")
og_title = soup.find("meta", property="og:title")
og_desc = soup.find("meta", property="og:description")
og_img = soup.find("meta", property="og:image")
title_tag = soup.find("title")
meta["title"] = (og_title["content"].strip() if og_title and og_title.get("content") else (title_tag.text.strip() if title_tag else ""))
meta["description"] = (og_desc["content"].strip() if og_desc and og_desc.get("content") else "")
meta["image"] = (og_img["content"].strip() if og_img and og_img.get("content") else "")