init

2026-01-16 01:59:04 +09:00
parent 5bdbec0138
commit 62e86b09d4
1 changed files with 48 additions and 3 deletions
--- a/backend/app.py
+++ b/backend/app.py
@@ -78,6 +78,9 @@ import psycopg2
 from psycopg2.extras import RealDictCursor
 import requests
 from bs4 import BeautifulSoup
 import time
 import threading
 from concurrent.futures import ThreadPoolExecutor, as_completed
 # GxP 챗봇 제거로 관련 컨트롤러 import 삭제
 from engines.chatgpt_tool.controller.ChatGPTController import router as chatgpt_router
@@ -405,7 +408,8 @@ class AiNewsCreateDTO(BaseModel):
 def _extract_og(url: str) -> dict:
    meta = {"title": "", "description": "", "image": "", "url": url}
    try:
-        resp = requests.get(url, timeout=5, headers={"User-Agent": "Mozilla/5.0"})
+        # 외부 사이트 응답 지연이 잦아 타임아웃을 짧게 유지(캐시 + 병렬로 커버)
        resp = requests.get(url, timeout=2.5, headers={"User-Agent": "Mozilla/5.0"})
        if resp.ok:
            soup = BeautifulSoup(resp.text, 'html.parser')
            og_title = soup.find('meta', property='og:title')
@@ -419,6 +423,32 @@ def _extract_og(url: str) -> dict:
        pass
    return meta
 # --- OG cache (in-memory) ---
 # 목적: 리스트 로딩 시 매번 외부 사이트를 때려 3~10초 지연되는 문제를 방지.
 # 운영 시 Redis 같은 외부 캐시가 더 좋지만, 우선 체감 개선을 위해 프로세스 메모리 캐시를 사용한다.
 _OG_CACHE_LOCK = threading.Lock()
 _OG_CACHE: Dict[str, Dict[str, object]] = {}  # url -> {"ts": float, "meta": dict}
 _OG_CACHE_TTL_SEC = float(os.getenv("OG_CACHE_TTL_SEC", "3600"))  # default 1h
 _OG_CACHE_MAX = int(os.getenv("OG_CACHE_MAX", "2000"))
 def _extract_og_cached(url: str) -> dict:
    now = time.time()
    with _OG_CACHE_LOCK:
        ent = _OG_CACHE.get(url)
        if ent and (now - float(ent.get("ts", 0))) < _OG_CACHE_TTL_SEC:
            return ent.get("meta") or {"title": "", "description": "", "image": "", "url": url}
    meta = _extract_og(url)
    with _OG_CACHE_LOCK:
        _OG_CACHE[url] = {"ts": now, "meta": meta}
        # 단순한 크기 제한 (초과 시 오래된 엔트리부터 정리)
        if len(_OG_CACHE) > _OG_CACHE_MAX:
            items = sorted(_OG_CACHE.items(), key=lambda kv: float(kv[1].get("ts", 0)))
            for k, _ in items[: max(1, len(_OG_CACHE) - _OG_CACHE_MAX)]:
                _OG_CACHE.pop(k, None)
    return meta
@app.get("/community/ai_news")
 def list_ai_news(offset: int = 0, limit: int = 10):
    with _get_db_conn() as conn, conn.cursor(cursor_factory=RealDictCursor) as cur:
@@ -427,10 +457,25 @@ def list_ai_news(offset: int = 0, limit: int = 10):
            (limit, offset),
        )
        rows = cur.fetchall() or []
        # OG 메타는 외부 요청이므로 병렬 + 캐시로 속도 개선
        metas: Dict[str, dict] = {}
        urls = [r.get("url") for r in rows if r.get("url")]
        if urls:
            # limit이 커져도 서버를 과도하게 압박하지 않도록 상한
            max_workers = min(8, len(urls))
            with ThreadPoolExecutor(max_workers=max_workers) as ex:
                fut_map = {ex.submit(_extract_og_cached, u): u for u in urls}
                for fut in as_completed(fut_map):
                    u = fut_map[fut]
                    try:
                        metas[u] = fut.result() or {"title": "", "description": "", "image": "", "url": u}
                    except Exception:
                        metas[u] = {"title": "", "description": "", "image": "", "url": u}
        enriched = []
        for r in rows:
-            og = _extract_og(r["url"]) if r.get("url") else {"title":"","description":"","image":"","url":r.get("url")}
+            u = r.get("url")
-            r.update({"meta": og})
+            r.update({"meta": metas.get(u) if u else {"title": "", "description": "", "image": "", "url": u}})
            enriched.append(r)
        # Frontend infinite-scroll safety:
        # - Return `nextOffset: null` when there is no next page.