diff --git a/README.md b/README.md index fdc96a4..80a5748 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,8 @@ TABLE=news_link ```dotenv PORT=8021 +DB_SCHEMA=public +DEFAULT_AUTHOR_ID=1 DEFAULT_PAGE_SIZE=30 MAX_PAGE_SIZE=60 CACHE_TTL_SECONDS=3600 diff --git a/app.py b/app.py index fe3a59d..d77866b 100644 --- a/app.py +++ b/app.py @@ -1,4 +1,5 @@ import os +import re import time from concurrent.futures import ThreadPoolExecutor from datetime import datetime @@ -20,6 +21,7 @@ DEFAULT_IMAGE = "/static/placeholder.svg" CACHE_TTL_SECONDS = int(os.getenv("CACHE_TTL_SECONDS", "3600")) FAILED_TTL_SECONDS = int(os.getenv("FAILED_TTL_SECONDS", "300")) METADATA_CACHE = {} +TABLE_COLUMNS_CACHE = {} PLACEHOLDER_DATA_URI = ( "data:image/svg+xml;utf8," " str: + if not name or not re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", name): + raise ValueError(f"Invalid SQL identifier: {name!r}") + return name + + +def _table_ref(schema: str, table: str) -> str: + return f"{_safe_identifier(schema)}.{_safe_identifier(table)}" def get_db_connection(): @@ -140,12 +153,95 @@ def _clamp_int(value, default: int, minimum: int, maximum: int) -> int: return max(minimum, min(parsed, maximum)) -def fetch_links_page_from_db(limit: int, offset: int): - table = os.getenv("TABLE", "news_link") +def get_table_columns(schema: str, table: str): + key = (schema, table) + cached = TABLE_COLUMNS_CACHE.get(key) + if cached is not None: + return cached + with get_db_connection() as conn: with conn.cursor() as cur: cur.execute( - f"SELECT id, url, created_at FROM {table} ORDER BY created_at DESC OFFSET %s LIMIT %s", + """ + SELECT column_name + FROM information_schema.columns + WHERE table_schema = %s AND table_name = %s + """, + (schema, table), + ) + cols = {row[0] for row in cur.fetchall()} + + TABLE_COLUMNS_CACHE[key] = cols + return cols + + +def get_table_columns_info(schema: str, table: str): + key = ("info", schema, table) + cached = TABLE_COLUMNS_CACHE.get(key) + if cached is not None: + return cached + + with get_db_connection() as conn: + with conn.cursor() as cur: + cur.execute( + """ + SELECT column_name, data_type, udt_name, is_nullable + FROM information_schema.columns + WHERE table_schema = %s AND table_name = %s + """, + (schema, table), + ) + info = { + row[0]: { + "data_type": row[1], + "udt_name": row[2], + "is_nullable": row[3], + } + for row in cur.fetchall() + } + + TABLE_COLUMNS_CACHE[key] = info + return info + + +def get_request_identity(req) -> tuple[str | None, str | None]: + """ + 가능한 경우 (email, ip)를 반환. + - 이메일: 프록시/SSO가 주입하는 헤더에서 추출 + - IP: X-Forwarded-For / X-Real-IP / remote_addr 순 + """ + email_headers = [ + "X-User-Email", + "X-Forwarded-Email", + "X-Auth-Request-Email", + "X-Forwarded-User", + "Remote-User", + "X-Email", + ] + email = None + for h in email_headers: + v = (req.headers.get(h) or "").strip() + if v and "@" in v: + email = v + break + + xff = (req.headers.get("X-Forwarded-For") or "").strip() + if xff: + ip = xff.split(",")[0].strip() + else: + ip = (req.headers.get("X-Real-IP") or "").strip() or (req.remote_addr or "") + ip = ip.strip() or None + return email, ip + + +def fetch_links_page_from_db(limit: int, offset: int): + table = os.getenv("TABLE", "news_link") + schema = os.getenv("DB_SCHEMA", DEFAULT_SCHEMA) + table_ref = _table_ref(schema, table) + with get_db_connection() as conn: + with conn.cursor() as cur: + cur.execute( + f"SELECT id, url, created_at FROM {table_ref} ORDER BY created_at DESC OFFSET %s LIMIT %s", (offset, limit), ) return cur.fetchall() @@ -234,13 +330,87 @@ def add_link(): url = normalize_url(raw_url) table = os.getenv("TABLE", "news_link") + schema = os.getenv("DB_SCHEMA", DEFAULT_SCHEMA) + table_ref = _table_ref(schema, table) try: + cols = get_table_columns(schema, table) + cols_info = get_table_columns_info(schema, table) + email, ip = get_request_identity(request) + identity = email or ip # 이메일 우선, 없으면 IP + + insert_cols = ["url"] + insert_vals_sql = ["%s"] + insert_params = [url] + + # 운영 DB 스키마 호환: created_at/updated_at, author_id 등이 NOT NULL일 수 있음 + if "created_at" in cols: + insert_cols.append("created_at") + insert_vals_sql.append("NOW()") + if "updated_at" in cols: + insert_cols.append("updated_at") + insert_vals_sql.append("NOW()") + + if "author_id" in cols: + author_col = cols_info.get("author_id", {}) + data_type = (author_col.get("data_type") or "").lower() + udt = (author_col.get("udt_name") or "").lower() + + # 1) author_id가 텍스트 계열이면: 이메일/아이피 문자열을 그대로 저장 + if data_type in ("text", "character varying", "character"): + insert_cols.append("author_id") + insert_vals_sql.append("%s") + insert_params.append(identity or "unknown") + + # 2) author_id가 숫자(정수/숫자)면: 문자열 저장 불가 + # → 기존 DEFAULT_AUTHOR_ID로 채우고, 가능한 경우 author_email/author_ip에 따로 저장(스키마 호환) + elif udt in ("int2", "int4", "int8") or data_type in ("smallint", "integer", "bigint", "numeric"): + raw_author_id = os.getenv("DEFAULT_AUTHOR_ID") + if raw_author_id is None or str(raw_author_id).strip() == "": + return ( + jsonify( + { + "error": "DB 저장 실패", + "detail": "author_id가 정수 NOT NULL입니다. .env에 DEFAULT_AUTHOR_ID(정수)를 설정하거나, author_id 타입을 text로 변경하세요.", + } + ), + 500, + ) + try: + author_id_int = int(raw_author_id) + except Exception: + return ( + jsonify( + { + "error": "DB 저장 실패", + "detail": f"DEFAULT_AUTHOR_ID는 정수여야 합니다: {raw_author_id!r}", + } + ), + 500, + ) + insert_cols.append("author_id") + insert_vals_sql.append("%s") + insert_params.append(author_id_int) + + if "author_email" in cols and email: + insert_cols.append("author_email") + insert_vals_sql.append("%s") + insert_params.append(email) + if "author_ip" in cols and ip: + insert_cols.append("author_ip") + insert_vals_sql.append("%s") + insert_params.append(ip) + + # 3) 기타 타입(uuid 등): 우선 문자열을 넣되 실패 시 detail로 노출 + else: + insert_cols.append("author_id") + insert_vals_sql.append("%s") + insert_params.append(identity or "unknown") + with get_db_connection() as conn: with conn.cursor() as cur: cur.execute( - # created_at에 DEFAULT가 없더라도 저장되도록 NOW()를 함께 기록 - f"INSERT INTO {table} (url, created_at) VALUES (%s, NOW()) RETURNING id, created_at", - (url,), + f"INSERT INTO {table_ref} ({', '.join(insert_cols)}) VALUES ({', '.join(insert_vals_sql)}) RETURNING id, created_at", + tuple(insert_params), ) link_id, created_at = cur.fetchone() conn.commit()