Fix news OG encoding
This commit is contained in:
@@ -405,17 +405,27 @@ class AiNewsCreateDTO(BaseModel):
|
||||
author_id: Optional[str] = None
|
||||
author_email: Optional[str] = None
|
||||
|
||||
def _decode_html(resp: requests.Response) -> str:
|
||||
encoding = resp.encoding
|
||||
if not encoding or encoding.lower() == "iso-8859-1":
|
||||
encoding = resp.apparent_encoding
|
||||
try:
|
||||
return resp.content.decode(encoding or "utf-8", errors="replace")
|
||||
except Exception:
|
||||
return resp.content.decode("utf-8", errors="replace")
|
||||
|
||||
def _extract_og(url: str) -> dict:
|
||||
meta = {"title": "", "description": "", "image": "", "url": url}
|
||||
try:
|
||||
# 외부 사이트 응답 지연이 잦아 타임아웃을 짧게 유지(캐시 + 병렬로 커버)
|
||||
resp = requests.get(url, timeout=2.5, headers={"User-Agent": "Mozilla/5.0"})
|
||||
if resp.ok:
|
||||
soup = BeautifulSoup(resp.text, 'html.parser')
|
||||
og_title = soup.find('meta', property='og:title')
|
||||
og_desc = soup.find('meta', property='og:description')
|
||||
og_img = soup.find('meta', property='og:image')
|
||||
title_tag = soup.find('title')
|
||||
html = _decode_html(resp)
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
og_title = soup.find("meta", property="og:title")
|
||||
og_desc = soup.find("meta", property="og:description")
|
||||
og_img = soup.find("meta", property="og:image")
|
||||
title_tag = soup.find("title")
|
||||
meta["title"] = (og_title["content"].strip() if og_title and og_title.get("content") else (title_tag.text.strip() if title_tag else ""))
|
||||
meta["description"] = (og_desc["content"].strip() if og_desc and og_desc.get("content") else "")
|
||||
meta["image"] = (og_img["content"].strip() if og_img and og_img.get("content") else "")
|
||||
|
||||
Reference in New Issue
Block a user