Initial commit after re-install
This commit is contained in:
467
app.py
Normal file
467
app.py
Normal file
@@ -0,0 +1,467 @@
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import psycopg2
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from dotenv import load_dotenv
|
||||
from flask import Flask, jsonify, make_response, render_template, request, send_from_directory
|
||||
|
||||
load_dotenv()
|
||||
|
||||
app = Flask(__name__, static_folder="static", template_folder="templates")
|
||||
|
||||
|
||||
DEFAULT_DESCRIPTION = "설명 없음"
|
||||
DEFAULT_IMAGE = "/static/placeholder.svg"
|
||||
CACHE_TTL_SECONDS = int(os.getenv("CACHE_TTL_SECONDS", "3600"))
|
||||
FAILED_TTL_SECONDS = int(os.getenv("FAILED_TTL_SECONDS", "300"))
|
||||
METADATA_CACHE = {}
|
||||
TABLE_COLUMNS_CACHE = {}
|
||||
PLACEHOLDER_DATA_URI = (
|
||||
"data:image/svg+xml;utf8,"
|
||||
"<svg%20width='640'%20height='360'%20viewBox='0%200%20640%20360'%20fill='none'%20"
|
||||
"xmlns='http://www.w3.org/2000/svg'>"
|
||||
"<rect%20width='640'%20height='360'%20fill='%23e9ecef'/>"
|
||||
"<rect%20x='120'%20y='90'%20width='400'%20height='180'%20rx='16'%20fill='%23dee2e6'/>"
|
||||
"<path%20d='M210%20210l60-70%2070%2080%2060-60%2090%2090H210z'%20fill='%23adb5bd'/>"
|
||||
"<circle%20cx='260'%20cy='150'%20r='22'%20fill='%23adb5bd'/>"
|
||||
"<text%20x='320'%20y='260'%20text-anchor='middle'%20font-size='18'%20"
|
||||
"fill='%236c757d'%20font-family='Arial,%20sans-serif'>No%20Image</text>"
|
||||
"</svg>"
|
||||
)
|
||||
DEFAULT_PAGE_SIZE = int(os.getenv("DEFAULT_PAGE_SIZE", "30"))
|
||||
MAX_PAGE_SIZE = int(os.getenv("MAX_PAGE_SIZE", "60"))
|
||||
DEFAULT_SCHEMA = os.getenv("DB_SCHEMA", "public")
|
||||
|
||||
|
||||
def _safe_identifier(name: str) -> str:
|
||||
if not name or not re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", name):
|
||||
raise ValueError(f"Invalid SQL identifier: {name!r}")
|
||||
return name
|
||||
|
||||
|
||||
def _table_ref(schema: str, table: str) -> str:
|
||||
return f"{_safe_identifier(schema)}.{_safe_identifier(table)}"
|
||||
|
||||
|
||||
def get_db_connection():
|
||||
return psycopg2.connect(
|
||||
host=os.getenv("DB_HOST"),
|
||||
port=os.getenv("DB_PORT"),
|
||||
dbname=os.getenv("DB_NAME"),
|
||||
user=os.getenv("DB_USER"),
|
||||
password=os.getenv("DB_PASSWORD"),
|
||||
)
|
||||
|
||||
|
||||
def normalize_url(raw_url: str) -> str:
|
||||
if not raw_url:
|
||||
return raw_url
|
||||
parsed = urlparse(raw_url)
|
||||
if parsed.scheme:
|
||||
return raw_url
|
||||
return f"https://{raw_url}"
|
||||
|
||||
|
||||
def extract_meta(soup: BeautifulSoup, property_name: str, name: str):
|
||||
tag = soup.find("meta", property=property_name)
|
||||
if tag and tag.get("content"):
|
||||
return tag.get("content").strip()
|
||||
tag = soup.find("meta", attrs={"name": name})
|
||||
if tag and tag.get("content"):
|
||||
return tag.get("content").strip()
|
||||
return ""
|
||||
|
||||
|
||||
def extract_fallback_description(soup: BeautifulSoup) -> str:
|
||||
for paragraph in soup.find_all("p"):
|
||||
text = paragraph.get_text(" ", strip=True)
|
||||
if len(text) >= 40:
|
||||
return text[:180]
|
||||
return ""
|
||||
|
||||
|
||||
def fetch_metadata(url: str):
|
||||
fallback = {
|
||||
"title": url,
|
||||
"description": DEFAULT_DESCRIPTION,
|
||||
"image": DEFAULT_IMAGE,
|
||||
}
|
||||
cached = METADATA_CACHE.get(url)
|
||||
now = time.time()
|
||||
if cached and cached["expires_at"] > now:
|
||||
return cached["data"]
|
||||
try:
|
||||
response = requests.get(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/121.0.0.0 Safari/537.36"
|
||||
)
|
||||
},
|
||||
timeout=6,
|
||||
)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
resolved_url = response.url or url
|
||||
|
||||
title = (
|
||||
extract_meta(soup, "og:title", "twitter:title")
|
||||
or extract_meta(soup, "twitter:title", "title")
|
||||
or (soup.title.string.strip() if soup.title and soup.title.string else "")
|
||||
or resolved_url
|
||||
)
|
||||
description = (
|
||||
extract_meta(soup, "og:description", "description")
|
||||
or extract_meta(soup, "twitter:description", "description")
|
||||
)
|
||||
if not description:
|
||||
description = extract_fallback_description(soup) or DEFAULT_DESCRIPTION
|
||||
image = (
|
||||
extract_meta(soup, "og:image", "twitter:image")
|
||||
or extract_meta(soup, "twitter:image", "image")
|
||||
or DEFAULT_IMAGE
|
||||
)
|
||||
data = {"title": title, "description": description, "image": image}
|
||||
METADATA_CACHE[url] = {
|
||||
"data": data,
|
||||
"expires_at": now + CACHE_TTL_SECONDS,
|
||||
"ok": True,
|
||||
}
|
||||
return data
|
||||
except Exception:
|
||||
METADATA_CACHE[url] = {
|
||||
"data": fallback,
|
||||
"expires_at": now + FAILED_TTL_SECONDS,
|
||||
"ok": False,
|
||||
}
|
||||
return fallback
|
||||
|
||||
|
||||
def _clamp_int(value, default: int, minimum: int, maximum: int) -> int:
|
||||
try:
|
||||
parsed = int(value)
|
||||
except Exception:
|
||||
return default
|
||||
return max(minimum, min(parsed, maximum))
|
||||
|
||||
|
||||
def get_table_columns(schema: str, table: str):
|
||||
key = (schema, table)
|
||||
cached = TABLE_COLUMNS_CACHE.get(key)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
with get_db_connection() as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
""",
|
||||
(schema, table),
|
||||
)
|
||||
cols = {row[0] for row in cur.fetchall()}
|
||||
|
||||
TABLE_COLUMNS_CACHE[key] = cols
|
||||
return cols
|
||||
|
||||
|
||||
def get_table_columns_info(schema: str, table: str):
|
||||
key = ("info", schema, table)
|
||||
cached = TABLE_COLUMNS_CACHE.get(key)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
with get_db_connection() as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT column_name, data_type, udt_name, is_nullable
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
""",
|
||||
(schema, table),
|
||||
)
|
||||
info = {
|
||||
row[0]: {
|
||||
"data_type": row[1],
|
||||
"udt_name": row[2],
|
||||
"is_nullable": row[3],
|
||||
}
|
||||
for row in cur.fetchall()
|
||||
}
|
||||
|
||||
TABLE_COLUMNS_CACHE[key] = info
|
||||
return info
|
||||
|
||||
|
||||
def get_request_identity(req) -> tuple[str | None, str | None]:
|
||||
"""
|
||||
가능한 경우 (email, ip)를 반환.
|
||||
- 이메일: 프록시/SSO가 주입하는 헤더에서 추출
|
||||
- IP: X-Forwarded-For / X-Real-IP / remote_addr 순
|
||||
"""
|
||||
# 0) ncue.net/go 연동: ref_type/ref 를 쿼리스트링 또는 쿠키로 전달받을 수 있음
|
||||
ref_type = (req.args.get("ref_type") or req.cookies.get("ref_type") or "").strip()
|
||||
ref = (req.args.get("ref") or req.cookies.get("ref") or "").strip()
|
||||
if ref_type in ("email", "ip") and ref:
|
||||
if ref_type == "email":
|
||||
return ref, None
|
||||
return None, ref
|
||||
|
||||
email_headers = [
|
||||
"X-User-Email",
|
||||
"X-Forwarded-Email",
|
||||
"X-Auth-Request-Email",
|
||||
"X-Forwarded-User",
|
||||
"Remote-User",
|
||||
"X-Email",
|
||||
]
|
||||
email = None
|
||||
for h in email_headers:
|
||||
v = (req.headers.get(h) or "").strip()
|
||||
if v and "@" in v:
|
||||
email = v
|
||||
break
|
||||
|
||||
xff = (req.headers.get("X-Forwarded-For") or "").strip()
|
||||
if xff:
|
||||
ip = xff.split(",")[0].strip()
|
||||
else:
|
||||
ip = (req.headers.get("X-Real-IP") or "").strip() or (req.remote_addr or "")
|
||||
ip = ip.strip() or None
|
||||
return email, ip
|
||||
|
||||
|
||||
def _maybe_set_ref_cookies(resp):
|
||||
ref_type = (request.args.get("ref_type") or "").strip()
|
||||
ref = (request.args.get("ref") or "").strip()
|
||||
if ref_type in ("email", "ip") and ref:
|
||||
# JS 요청(/links)에서도 유지되도록 쿠키 저장 (SameSite=Lax)
|
||||
max_age = 60 * 60 * 24 * 30 # 30일
|
||||
resp.set_cookie("ref_type", ref_type, max_age=max_age, samesite="Lax")
|
||||
resp.set_cookie("ref", ref, max_age=max_age, samesite="Lax")
|
||||
return resp
|
||||
|
||||
|
||||
def fetch_links_page_from_db(limit: int, offset: int):
|
||||
table = os.getenv("TABLE", "news_link")
|
||||
schema = os.getenv("DB_SCHEMA", DEFAULT_SCHEMA)
|
||||
table_ref = _table_ref(schema, table)
|
||||
with get_db_connection() as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"SELECT id, url, created_at FROM {table_ref} ORDER BY created_at DESC OFFSET %s LIMIT %s",
|
||||
(offset, limit),
|
||||
)
|
||||
return cur.fetchall()
|
||||
|
||||
|
||||
@app.get("/")
|
||||
def index():
|
||||
links = []
|
||||
error_message = ""
|
||||
try:
|
||||
rows = fetch_links_page_from_db(DEFAULT_PAGE_SIZE, 0)
|
||||
for link_id, url, created_at in rows:
|
||||
links.append(
|
||||
{
|
||||
"id": link_id,
|
||||
"url": url,
|
||||
"created_at": created_at.isoformat()
|
||||
if isinstance(created_at, datetime)
|
||||
else str(created_at),
|
||||
"title": "",
|
||||
"description": "",
|
||||
"image": "",
|
||||
}
|
||||
)
|
||||
except Exception as exc:
|
||||
error_message = f"DB 조회 실패: {exc}"
|
||||
resp = make_response(
|
||||
render_template(
|
||||
"index.html",
|
||||
links=links,
|
||||
error_message=error_message,
|
||||
placeholder_data_uri=PLACEHOLDER_DATA_URI,
|
||||
default_image=DEFAULT_IMAGE,
|
||||
)
|
||||
)
|
||||
return _maybe_set_ref_cookies(resp)
|
||||
|
||||
|
||||
@app.get("/favicon.ico")
|
||||
def favicon():
|
||||
# ncue.net 등에서 /favicon.ico 로 직접 가져갈 수 있게 제공
|
||||
return send_from_directory(
|
||||
app.static_folder,
|
||||
"favicon.ico",
|
||||
mimetype="image/x-icon",
|
||||
max_age=60 * 60 * 24 * 7,
|
||||
)
|
||||
|
||||
|
||||
@app.get("/links")
|
||||
def get_links():
|
||||
limit = _clamp_int(
|
||||
request.args.get("limit"), DEFAULT_PAGE_SIZE, minimum=1, maximum=MAX_PAGE_SIZE
|
||||
)
|
||||
offset = _clamp_int(request.args.get("offset"), 0, minimum=0, maximum=10_000_000)
|
||||
|
||||
try:
|
||||
rows_plus_one = fetch_links_page_from_db(limit + 1, offset)
|
||||
except Exception as exc:
|
||||
return jsonify({"error": "DB 조회 실패", "detail": str(exc)}), 500
|
||||
|
||||
has_more = len(rows_plus_one) > limit
|
||||
rows = rows_plus_one[:limit]
|
||||
|
||||
urls = [url for _, url, _ in rows]
|
||||
metas = []
|
||||
if urls:
|
||||
with ThreadPoolExecutor(max_workers=min(8, len(urls))) as executor:
|
||||
metas = list(executor.map(fetch_metadata, urls))
|
||||
|
||||
results = []
|
||||
for (link_id, url, created_at), meta in zip(rows, metas):
|
||||
results.append(
|
||||
{
|
||||
"id": link_id,
|
||||
"url": url,
|
||||
"created_at": created_at.isoformat()
|
||||
if isinstance(created_at, datetime)
|
||||
else str(created_at),
|
||||
**meta,
|
||||
}
|
||||
)
|
||||
return jsonify(
|
||||
{
|
||||
"items": results,
|
||||
"limit": limit,
|
||||
"offset": offset,
|
||||
"next_offset": offset + len(results),
|
||||
"has_more": has_more,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.post("/links")
|
||||
def add_link():
|
||||
data = request.get_json(silent=True) or {}
|
||||
raw_url = (data.get("url") or "").strip()
|
||||
if not raw_url:
|
||||
return jsonify({"error": "URL을 입력해주세요."}), 400
|
||||
|
||||
url = normalize_url(raw_url)
|
||||
table = os.getenv("TABLE", "news_link")
|
||||
schema = os.getenv("DB_SCHEMA", DEFAULT_SCHEMA)
|
||||
table_ref = _table_ref(schema, table)
|
||||
try:
|
||||
cols = get_table_columns(schema, table)
|
||||
cols_info = get_table_columns_info(schema, table)
|
||||
email, ip = get_request_identity(request)
|
||||
identity = email or ip # 이메일 우선, 없으면 IP
|
||||
|
||||
insert_cols = ["url"]
|
||||
insert_vals_sql = ["%s"]
|
||||
insert_params = [url]
|
||||
|
||||
# 운영 DB 스키마 호환: created_at/updated_at, author_id 등이 NOT NULL일 수 있음
|
||||
if "created_at" in cols:
|
||||
insert_cols.append("created_at")
|
||||
insert_vals_sql.append("NOW()")
|
||||
if "updated_at" in cols:
|
||||
insert_cols.append("updated_at")
|
||||
insert_vals_sql.append("NOW()")
|
||||
|
||||
if "author_id" in cols:
|
||||
author_col = cols_info.get("author_id", {})
|
||||
data_type = (author_col.get("data_type") or "").lower()
|
||||
udt = (author_col.get("udt_name") or "").lower()
|
||||
|
||||
# 1) author_id가 텍스트 계열이면: 이메일/아이피 문자열을 그대로 저장
|
||||
if data_type in ("text", "character varying", "character"):
|
||||
insert_cols.append("author_id")
|
||||
insert_vals_sql.append("%s")
|
||||
insert_params.append(identity or "unknown")
|
||||
|
||||
# 2) author_id가 숫자(정수/숫자)면: 문자열 저장 불가
|
||||
# → 기존 DEFAULT_AUTHOR_ID로 채우고, 가능한 경우 author_email/author_ip에 따로 저장(스키마 호환)
|
||||
elif udt in ("int2", "int4", "int8") or data_type in ("smallint", "integer", "bigint", "numeric"):
|
||||
raw_author_id = os.getenv("DEFAULT_AUTHOR_ID")
|
||||
if raw_author_id is None or str(raw_author_id).strip() == "":
|
||||
return (
|
||||
jsonify(
|
||||
{
|
||||
"error": "DB 저장 실패",
|
||||
"detail": "author_id가 정수 NOT NULL입니다. .env에 DEFAULT_AUTHOR_ID(정수)를 설정하거나, author_id 타입을 text로 변경하세요.",
|
||||
}
|
||||
),
|
||||
500,
|
||||
)
|
||||
try:
|
||||
author_id_int = int(raw_author_id)
|
||||
except Exception:
|
||||
return (
|
||||
jsonify(
|
||||
{
|
||||
"error": "DB 저장 실패",
|
||||
"detail": f"DEFAULT_AUTHOR_ID는 정수여야 합니다: {raw_author_id!r}",
|
||||
}
|
||||
),
|
||||
500,
|
||||
)
|
||||
insert_cols.append("author_id")
|
||||
insert_vals_sql.append("%s")
|
||||
insert_params.append(author_id_int)
|
||||
|
||||
if "author_email" in cols and email:
|
||||
insert_cols.append("author_email")
|
||||
insert_vals_sql.append("%s")
|
||||
insert_params.append(email)
|
||||
if "author_ip" in cols and ip:
|
||||
insert_cols.append("author_ip")
|
||||
insert_vals_sql.append("%s")
|
||||
insert_params.append(ip)
|
||||
|
||||
# 3) 기타 타입(uuid 등): 우선 문자열을 넣되 실패 시 detail로 노출
|
||||
else:
|
||||
insert_cols.append("author_id")
|
||||
insert_vals_sql.append("%s")
|
||||
insert_params.append(identity or "unknown")
|
||||
|
||||
with get_db_connection() as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"INSERT INTO {table_ref} ({', '.join(insert_cols)}) VALUES ({', '.join(insert_vals_sql)}) RETURNING id, created_at",
|
||||
tuple(insert_params),
|
||||
)
|
||||
link_id, created_at = cur.fetchone()
|
||||
conn.commit()
|
||||
except Exception as exc:
|
||||
return jsonify({"error": "DB 저장 실패", "detail": str(exc)}), 500
|
||||
|
||||
meta = fetch_metadata(url)
|
||||
return jsonify(
|
||||
{
|
||||
"id": link_id,
|
||||
"url": url,
|
||||
"created_at": created_at.isoformat()
|
||||
if isinstance(created_at, datetime)
|
||||
else str(created_at),
|
||||
**meta,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="0.0.0.0", port=8021, debug=True)
|
||||
Reference in New Issue
Block a user