init

2026-01-16 01:59:04 +09:00
parent 5bdbec0138
commit 62e86b09d4
1 changed files with 48 additions and 3 deletions
--- a/backend/app.py
+++ b/backend/app.py
@@ -78,6 +78,9 @@ import psycopg2
 from psycopg2.extras import RealDictCursor
 import requests
 from bs4 import BeautifulSoup
+import time
+import threading
+from concurrent.futures import ThreadPoolExecutor, as_completed

 # GxP 챗봇 제거로 관련 컨트롤러 import 삭제
 from engines.chatgpt_tool.controller.ChatGPTController import router as chatgpt_router
@@ -405,7 +408,8 @@ class AiNewsCreateDTO(BaseModel):
 def _extract_og(url: str) -> dict:
    meta = {"title": "", "description": "", "image": "", "url": url}
    try:
-        resp = requests.get(url, timeout=5, headers={"User-Agent": "Mozilla/5.0"})
+        # 외부 사이트 응답 지연이 잦아 타임아웃을 짧게 유지(캐시 + 병렬로 커버)
+        resp = requests.get(url, timeout=2.5, headers={"User-Agent": "Mozilla/5.0"})
        if resp.ok:
            soup = BeautifulSoup(resp.text, 'html.parser')
            og_title = soup.find('meta', property='og:title')
@@ -419,6 +423,32 @@ def _extract_og(url: str) -> dict:
        pass
    return meta

+# --- OG cache (in-memory) ---
+# 목적: 리스트 로딩 시 매번 외부 사이트를 때려 3~10초 지연되는 문제를 방지.
+# 운영 시 Redis 같은 외부 캐시가 더 좋지만, 우선 체감 개선을 위해 프로세스 메모리 캐시를 사용한다.
+_OG_CACHE_LOCK = threading.Lock()
+_OG_CACHE: Dict[str, Dict[str, object]] = {}  # url -> {"ts": float, "meta": dict}
+_OG_CACHE_TTL_SEC = float(os.getenv("OG_CACHE_TTL_SEC", "3600"))  # default 1h
+_OG_CACHE_MAX = int(os.getenv("OG_CACHE_MAX", "2000"))
+
+def _extract_og_cached(url: str) -> dict:
+    now = time.time()
+    with _OG_CACHE_LOCK:
+        ent = _OG_CACHE.get(url)
+        if ent and (now - float(ent.get("ts", 0))) < _OG_CACHE_TTL_SEC:
+            return ent.get("meta") or {"title": "", "description": "", "image": "", "url": url}
+
+    meta = _extract_og(url)
+
+    with _OG_CACHE_LOCK:
+        _OG_CACHE[url] = {"ts": now, "meta": meta}
+        # 단순한 크기 제한 (초과 시 오래된 엔트리부터 정리)
+        if len(_OG_CACHE) > _OG_CACHE_MAX:
+            items = sorted(_OG_CACHE.items(), key=lambda kv: float(kv[1].get("ts", 0)))
+            for k, _ in items[: max(1, len(_OG_CACHE) - _OG_CACHE_MAX)]:
+                _OG_CACHE.pop(k, None)
+    return meta
+
@app.get("/community/ai_news")
 def list_ai_news(offset: int = 0, limit: int = 10):
    with _get_db_conn() as conn, conn.cursor(cursor_factory=RealDictCursor) as cur:
@@ -427,10 +457,25 @@ def list_ai_news(offset: int = 0, limit: int = 10):
            (limit, offset),
        )
        rows = cur.fetchall() or []
+        # OG 메타는 외부 요청이므로 병렬 + 캐시로 속도 개선
+        metas: Dict[str, dict] = {}
+        urls = [r.get("url") for r in rows if r.get("url")]
+        if urls:
+            # limit이 커져도 서버를 과도하게 압박하지 않도록 상한
+            max_workers = min(8, len(urls))
+            with ThreadPoolExecutor(max_workers=max_workers) as ex:
+                fut_map = {ex.submit(_extract_og_cached, u): u for u in urls}
+                for fut in as_completed(fut_map):
+                    u = fut_map[fut]
+                    try:
+                        metas[u] = fut.result() or {"title": "", "description": "", "image": "", "url": u}
+                    except Exception:
+                        metas[u] = {"title": "", "description": "", "image": "", "url": u}
+
        enriched = []
        for r in rows:
-            og = _extract_og(r["url"]) if r.get("url") else {"title":"","description":"","image":"","url":r.get("url")}
-            r.update({"meta": og})
+            u = r.get("url")
+            r.update({"meta": metas.get(u) if u else {"title": "", "description": "", "image": "", "url": u}})
            enriched.append(r)
        # Frontend infinite-scroll safety:
        # - Return `nextOffset: null` when there is no next page.