diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8aea70c --- /dev/null +++ b/.gitignore @@ -0,0 +1,176 @@ +backend/uploads/* +backend/vectordb/* +frontend/node_modules +frontend/node_modules/* + + +# ---> Python +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + diff --git a/README.md b/README.md new file mode 100644 index 0000000..41a3f7b --- /dev/null +++ b/README.md @@ -0,0 +1,260 @@ +# 연구QA Chatbot + +AI 기반 연구 문서 분석 도우미 챗봇입니다. PDF 문서를 업로드하고 AI와 대화하여 문서 내용에 대해 질문할 수 있습니다. + +## 🚀 설치 및 실행 + +### 1. postgreSQL 설치 + +* localhost에 설치를 한다. +* 사용자 설정은 다음과 같다. + * 사용자명: woonglab + * 비밀번호: !@#woonglab + * 데이터베이스: researchqa + * 호스트: localhost + * 포트: 5432 +* 필요한 테이블 + * database.sql을 찹고한다. (create 구문으로 실행하면 필요 테이블 생성시킬 수 있음) + + +### 2. 백엔드 설정 및 실행 +```bash +# 먼저 ollama를 설치하고 qwen3:8b (5.2GB)를 다운받는다 + +cd backend +pip install -r requirements.txt + +# 기존 프로세스 제거 +#pkill -f "python main.py" && sleep 2 + +python main.py +``` +백엔드 서버가 `http://localhost:8000`에서 실행됩니다. + +### 3. 프론트 실행 과정 +```bash +cd frontend +rm -rf node_modules package-lock.json +npm install + +# 기존 프로스세 제거 +#pkill -f "react-scripts" + +npm start +``` +프론트엔드가 `http://localhost:3000`에서 실행됩니다. + + + + +## 📖 사용법 + +1. **로그인**: 파일 업로드 버튼(📁)을 클릭하여 로그인 + - 아이디: `admin` + - 비밀번호: `researchqa` + +2. **PDF 업로드**: 로그인 후 "PDF 업로드" 메뉴에서 파일 업로드 + - 최대 5개 파일까지 업로드 가능 + - PDF 파일만 업로드 가능 + +3. **챗봇 대화**: 메인 화면에서 업로드된 문서에 대해 질문 + - 참조 문서 클릭 시 PDF 뷰어에서 해당 페이지 표시 + - 키보드 네비게이션 지원 (화살표키, Home, End) + +4. **PDF 뷰어**: Adobe Reader 스타일의 고급 뷰어 + - 연속 페이지 모드 지원 + - 줌 인/아웃, 회전 기능 + - 키보드 네비게이션 + +## 🚀 주요 기능 + +- **📄 PDF 문서 업로드**: PDF 파일을 드래그 앤 드롭 또는 클릭으로 업로드 +- **🤖 AI 챗봇**: 업로드된 문서를 기반으로 한 질문 답변 +- **📚 문서 관리**: 업로드된 문서 목록 조회, 검색, 삭제 +- **🔒 보안 로그인**: 관리자 인증 시스템 +- **👁️ PDF 뷰어**: Adobe Reader 스타일의 고급 PDF 뷰어 +- **🔍 벡터 검색**: ChromaDB 기반 정확한 문서 검색 + +## 🛠️ 기술 스택 + +### 백엔드 +- **FastAPI**: 고성능 Python 웹 프레임워크 +- **LangChain v0.3**: AI 프레임워크 (RAG, 체인, 에이전트) +- **KoE5**: 한국어 임베딩 모델 (jhgan/ko-sroberta-multitask) +- **ChromaDB**: 벡터 데이터베이스 (LangChain 통합) +- **Ollama**: LLM 모델 서빙 (LangChain 통합) +- **Docling**: 최신 PDF 파싱 라이브러리 +- **PostgreSQL**: 메타데이터 저장소 + +### 프론트엔드 +- **React 18**: 최신 React 버전 +- **TypeScript**: 타입 안전성 +- **Tailwind CSS**: 유틸리티 기반 CSS 프레임워크 +- **Framer Motion**: 애니메이션 라이브러리 +- **Lucide React**: 아이콘 라이브러리 +- **React PDF**: PDF 뷰어 컴포넌트 + +## 📦 패키지 구조 + +### 백엔드 패키지 (backend/requirements.txt) +``` +# Core Web Framework +fastapi>=0.104.1 +uvicorn>=0.24.0 +python-multipart>=0.0.6 +pydantic>=2.7.4 + +# LangChain v0.3 AI Framework +langchain>=0.3.0 +langchain-community>=0.3.0 +langchain-core>=0.3.0 +langchain-experimental>=0.3.0 + +# LLM Integration +ollama>=0.6.0 + +# Vector Database & Embeddings +chromadb>=0.4.22 +sentence-transformers>=2.2.2 + +# PDF Processing +docling>=2.55.0 +docling-core>=2.48.0 + +# Database +psycopg2-binary>=2.9.9 + +# Utilities +python-dotenv>=1.0.0 +numpy>=1.26.4 +``` + +### 프론트엔드 패키지 (frontend/package.json) +``` +# Core React +react: ^18.2.0 +react-dom: ^18.2.0 +react-scripts: 5.0.1 +typescript: ^4.9.5 + +# UI & Styling +framer-motion: ^10.16.0 +lucide-react: ^0.294.0 +tailwindcss: ^3.3.0 +autoprefixer: ^10.4.0 +postcss: ^8.4.0 + +# PDF Viewer +react-pdf: ^10.1.0 +pdfjs-dist: ^5.3.93 + +# TypeScript Types +@types/react: ^18.2.0 +@types/react-dom: ^18.2.0 +@types/node: ^20.0.0 +``` + +## 🔌 API 엔드포인트 (LangChain 기반) + +- `GET /`: 루트 엔드포인트 +- `GET /health`: 헬스 체크 (LangChain 서비스 상태 포함) +- `POST /chat`: LangChain RAG 기반 챗봇 대화 +- `POST /upload`: PDF 파일 업로드 및 LangChain 처리 +- `GET /files`: 파일 목록 조회 +- `DELETE /files/{file_id}`: 파일 삭제 (LangChain 벡터스토어 포함) +- `GET /pdf/{file_id}/view`: PDF 파일 조회 +- `GET /search`: LangChain 유사 문서 검색 +- `GET /stats`: 시스템 통계 (LangChain 컬렉션 정보 포함) + +## 📁 프로젝트 구조 + +``` +researchqa/ +├── backend/ # 백엔드 서버 (LangChain 기반) +│ ├── main.py # FastAPI 메인 애플리케이션 (LangChain) +│ ├── main_legacy.py # 기존 직접 구현 버전 (백업) +│ ├── requirements.txt # Python 의존성 (LangChain 포함) +│ ├── services/ # LangChain 서비스 모듈 +│ │ ├── __init__.py # 서비스 패키지 초기화 +│ │ └── langchain_service.py # LangChain RAG 서비스 +│ ├── uploads/ # 업로드된 파일 저장소 +│ ├── vectordb/ # ChromaDB 벡터 데이터베이스 +│ └── parser/ # 문서 파서 모듈 +│ ├── pdf/ # PDF 파서 +│ │ ├── MainParser.py # 메인 PDF 파서 +│ │ └── Parser1.py # 확장 PDF 파서 +│ └── ocr/ # OCR 파서 +│ ├── MainParser.py # 메인 OCR 파서 +│ └── Parser1.py # 확장 OCR 파서 +├── frontend/ # 프론트엔드 애플리케이션 +│ ├── src/ +│ │ ├── components/ # React 컴포넌트 +│ │ │ ├── ChatInterface.tsx # 채팅 인터페이스 +│ │ │ ├── FileUploadModal.tsx # 파일 업로드 모달 +│ │ │ ├── LoginModal.tsx # 로그인 모달 +│ │ │ ├── MessageBubble.tsx # 메시지 버블 +│ │ │ ├── PDFViewer.tsx # PDF 뷰어 +│ │ │ └── TypingIndicator.tsx # 타이핑 인디케이터 +│ │ ├── contexts/ # React 컨텍스트 +│ │ │ ├── AuthContext.tsx # 인증 컨텍스트 +│ │ │ ├── ChatContext.tsx # 채팅 컨텍스트 +│ │ │ └── FileContext.tsx # 파일 컨텍스트 +│ │ ├── App.tsx # 메인 앱 컴포넌트 +│ │ ├── index.tsx # 엔트리 포인트 +│ │ └── index.css # 글로벌 스타일 +│ ├── public/ # 정적 파일 +│ │ ├── images/ # 이미지 파일 +│ │ ├── pdf.worker.min.js # PDF.js 워커 +│ │ ├── AnnotationLayer.css # PDF 주석 레이어 +│ │ └── TextLayer.css # PDF 텍스트 레이어 +│ ├── package.json # Node.js 의존성 +│ ├── tailwind.config.js # Tailwind 설정 +│ ├── postcss.config.js # PostCSS 설정 +│ └── tsconfig.json # TypeScript 설정 +├── start_backend.sh # 백엔드 시작 스크립트 +├── start_frontend.sh # 프론트엔드 시작 스크립트 +├── package.json # 루트 패키지 설정 +└── README.md # 프로젝트 문서 +``` + +## ✨ 주요 특징 + +- **🔍 최신 PDF 파싱**: Docling을 사용한 고성능 PDF 텍스트 추출 +- **🇰🇷 한국어 최적화**: KoE5 임베딩 모델로 한국어 문서 처리 +- **📱 반응형 UI**: 모바일과 데스크톱 모두 지원 +- **💬 실시간 채팅**: REST API 기반 실시간 대화 +- **🎯 정확한 검색**: LangChain RAG로 정확한 답변 +- **👁️ 고급 PDF 뷰어**: Adobe Reader 스타일의 뷰어 +- **🔒 보안**: JWT 기반 인증 시스템 +- **⚡ 고성능**: FastAPI와 LangChain으로 최적화된 성능 +- **🚀 확장성**: LangChain v0.3 기반 향후 고도화 가능 +- **🔗 체인 기반**: RAG, 에이전트, 메모리 등 다양한 AI 패턴 지원 + +## 🗄️ 데이터베이스 + +- **ChromaDB**: 벡터 임베딩 저장 및 유사도 검색 (LangChain 통합) +- **PostgreSQL**: 파일 메타데이터 및 사용자 정보 저장 +- **LangChain VectorStore**: 확장 가능한 벡터 검색 인터페이스 + +## 🔧 개발 환경 + +- **Python**: 3.8+ +- **Node.js**: 16+ +- **PostgreSQL**: 12+ +- **Ollama**: 최신 버전 + +## 📝 라이선스 + +MIT License + +## 🤝 기여하기 + +1. Fork the Project +2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`) +3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`) +4. Push to the Branch (`git push origin feature/AmazingFeature`) +5. Open a Pull Request + +## 📞 지원 + +프로젝트에 대한 질문이나 지원이 필요하시면 이슈를 생성해 주세요. \ No newline at end of file diff --git a/backend/main.py b/backend/main.py new file mode 100644 index 0000000..d07e507 --- /dev/null +++ b/backend/main.py @@ -0,0 +1,391 @@ +""" +LangChain v0.3 기반 연구QA 챗봇 API +향후 고도화를 위한 확장 가능한 아키텍처 +""" + +from fastapi import FastAPI, HTTPException, Depends, UploadFile, File, Form +from fastapi.responses import FileResponse +from fastapi.middleware.cors import CORSMiddleware +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from pydantic import BaseModel +from typing import List, Optional +from contextlib import asynccontextmanager +import os +import uuid +import shutil +from datetime import datetime +import json +import logging +import psycopg2 +from psycopg2.extras import RealDictCursor + +# LangChain 서비스 임포트 +from services.langchain_service import langchain_service +from parser.pdf.MainParser import PDFParser + +# 로깅 설정 +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Pydantic 모델들 +class ChatRequest(BaseModel): + message: str + user_id: Optional[str] = None + +class ChatResponse(BaseModel): + response: str + sources: List[str] + timestamp: str + +class FileUploadResponse(BaseModel): + message: str + file_id: str + filename: str + status: str + +class FileListResponse(BaseModel): + files: List[dict] + total: int + +# FastAPI 앱 생성 +@asynccontextmanager +async def lifespan(app: FastAPI): + """앱 시작/종료 시 실행""" + # 시작 시 + logger.info("🚀 LangChain 기반 연구QA 챗봇 시작") + try: + langchain_service.initialize() + logger.info("✅ LangChain 서비스 초기화 완료") + except Exception as e: + logger.error(f"❌ LangChain 서비스 초기화 실패: {e}") + raise + + yield + + # 종료 시 + logger.info("🛑 LangChain 기반 연구QA 챗봇 종료") + +app = FastAPI( + title="연구QA Chatbot API", + description="LangChain v0.3 기반 고성능 PDF 파싱과 벡터 검색을 활용한 연구 질의응답 시스템", + version="2.0.0", + lifespan=lifespan +) + +# CORS 설정 +app.add_middleware( + CORSMiddleware, + allow_origins=["http://localhost:3000", "http://127.0.0.1:3000"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# 보안 설정 +security = HTTPBearer(auto_error=False) + +def get_db_connection(): + """PostgreSQL 데이터베이스 연결""" + try: + connection = psycopg2.connect( + host="localhost", + port=5432, + database="researchqa", + user="woonglab", + password="!@#woonglab" + ) + connection.autocommit = True + return connection + except Exception as e: + logger.error(f"PostgreSQL 연결 실패: {e}") + raise HTTPException(status_code=500, detail="데이터베이스 연결 실패") + +# API 엔드포인트들 +@app.get("/") +async def root(): + """루트 엔드포인트""" + return { + "message": "LangChain 기반 연구QA 챗봇 API", + "version": "2.0.0", + "status": "running" + } + +@app.get("/health") +async def health_check(): + """헬스 체크""" + try: + # LangChain 서비스 상태 확인 + collection_info = langchain_service.get_collection_info() + + return { + "status": "healthy", + "langchain_service": "active", + "collection_info": collection_info, + "timestamp": datetime.now().isoformat() + } + except Exception as e: + logger.error(f"헬스 체크 실패: {e}") + raise HTTPException(status_code=500, detail=f"서비스 상태 불량: {e}") + +@app.post("/chat", response_model=ChatResponse) +async def chat(request: ChatRequest): + """LangChain RAG 기반 채팅""" + try: + logger.info(f"💬 채팅 요청: {request.message}") + + # LangChain RAG를 통한 답변 생성 + result = langchain_service.generate_answer(request.message) + + response = ChatResponse( + response=result["answer"], + sources=result["references"], + timestamp=datetime.now().isoformat() + ) + + logger.info(f"✅ 답변 생성 완료: {len(result['references'])}개 참조") + return response + + except Exception as e: + logger.error(f"❌ 채팅 처리 실패: {e}") + raise HTTPException(status_code=500, detail=f"채팅 처리 실패: {e}") + +@app.post("/upload", response_model=FileUploadResponse) +async def upload_file(file: UploadFile = File(...)): + """PDF 파일 업로드 및 LangChain 처리""" + try: + # 파일 유효성 검사 + if not file.filename.lower().endswith('.pdf'): + raise HTTPException(status_code=400, detail="PDF 파일만 업로드 가능합니다") + + # 파일 ID 생성 (UUID) + file_id = str(uuid.uuid4()) + filename = file.filename + + logger.info(f"📄 파일 업로드 시작: {filename}") + + # 파일 저장 + upload_dir = "uploads" + os.makedirs(upload_dir, exist_ok=True) + file_path = os.path.join(upload_dir, f"{file_id}_{filename}") + + with open(file_path, "wb") as buffer: + shutil.copyfileobj(file.file, buffer) + + # PDF 파싱 + parser = PDFParser() + result = parser.process_pdf(file_path) + + if not result["success"]: + raise HTTPException(status_code=400, detail=f"PDF 파싱 실패: {result.get('error', 'Unknown error')}") + + # LangChain 문서로 변환 + from langchain_core.documents import Document + langchain_docs = [] + + # 청크별로 문서 생성 + for i, chunk in enumerate(result["chunks"]): + langchain_doc = Document( + page_content=chunk, + metadata={ + "filename": filename, + "chunk_index": i, + "file_id": file_id, + "upload_time": datetime.now().isoformat(), + "total_chunks": len(result["chunks"]) + } + ) + langchain_docs.append(langchain_doc) + + # LangChain 벡터스토어에 추가 + langchain_service.add_documents(langchain_docs) + + # 데이터베이스에 메타데이터 저장 + db_conn = get_db_connection() + cursor = db_conn.cursor() + + cursor.execute(""" + INSERT INTO uploaded_file (filename, file_path, status, upload_dt) + VALUES (%s, %s, %s, %s) + """, (filename, file_path, "processed", datetime.now())) + + cursor.close() + + logger.info(f"✅ 파일 업로드 완료: {filename} ({len(langchain_docs)}개 문서)") + + return FileUploadResponse( + message=f"파일 업로드 및 처리 완료: {len(langchain_docs)}개 문서", + file_id=file_id, + filename=filename, + status="success" + ) + + except Exception as e: + logger.error(f"❌ 파일 업로드 실패: {e}") + raise HTTPException(status_code=500, detail=f"파일 업로드 실패: {e}") + +@app.get("/files", response_model=FileListResponse) +async def get_files(): + """업로드된 파일 목록 조회""" + try: + db_conn = get_db_connection() + cursor = db_conn.cursor(cursor_factory=RealDictCursor) + + cursor.execute(""" + SELECT id, filename, upload_dt as upload_time, status + FROM uploaded_file + ORDER BY upload_dt DESC + """) + + files = cursor.fetchall() + cursor.close() + + return FileListResponse( + files=[dict(file) for file in files], + total=len(files) + ) + + except Exception as e: + logger.error(f"❌ 파일 목록 조회 실패: {e}") + raise HTTPException(status_code=500, detail=f"파일 목록 조회 실패: {e}") + +@app.delete("/files/{file_id}") +async def delete_file(file_id: str): + """파일 삭제""" + try: + db_conn = get_db_connection() + cursor = db_conn.cursor() + + # 파일 정보 조회 + cursor.execute("SELECT filename FROM uploaded_file WHERE id = %s", (file_id,)) + result = cursor.fetchone() + + if not result: + raise HTTPException(status_code=404, detail="파일을 찾을 수 없습니다") + + filename = result[0] + + # LangChain 벡터스토어에서 삭제 + langchain_service.delete_documents_by_filename(filename) + + # 데이터베이스에서 삭제 + cursor.execute("DELETE FROM uploaded_file WHERE id = %s", (file_id,)) + + # 실제 파일 삭제 + try: + os.remove(f"uploads/{file_id}_{filename}") + except FileNotFoundError: + pass + + cursor.close() + + logger.info(f"✅ 파일 삭제 완료: {filename}") + + return {"message": f"파일 삭제 완료: {filename}"} + + except Exception as e: + logger.error(f"❌ 파일 삭제 실패: {e}") + raise HTTPException(status_code=500, detail=f"파일 삭제 실패: {e}") + +@app.get("/pdf/{file_id}/view") +async def view_pdf(file_id: str): + """PDF 파일 뷰어""" + try: + db_conn = get_db_connection() + cursor = db_conn.cursor() + + # UUID가 전달된 경우 정수 ID로 변환 + try: + # 먼저 정수 ID로 시도 + cursor.execute("SELECT filename, file_path FROM uploaded_file WHERE id = %s", (int(file_id),)) + result = cursor.fetchone() + except ValueError: + # UUID가 전달된 경우 file_path에서 UUID를 찾아서 매칭 + cursor.execute("SELECT id, filename, file_path FROM uploaded_file") + all_files = cursor.fetchall() + result = None + for file_row in all_files: + if file_id in file_row[2]: # file_path에 UUID가 포함되어 있는지 확인 + result = (file_row[1], file_row[2]) # filename, file_path + break + + if not result: + raise HTTPException(status_code=404, detail="파일을 찾을 수 없습니다") + + filename = result[0] + file_path = result[1] + + # 절대 경로로 변환 + if not os.path.isabs(file_path): + file_path = os.path.abspath(file_path) + + if not os.path.exists(file_path): + raise HTTPException(status_code=404, detail="파일이 존재하지 않습니다") + + cursor.close() + + return FileResponse( + path=file_path, + media_type="application/pdf", + filename=filename + ) + + except Exception as e: + logger.error(f"❌ PDF 뷰어 실패: {e}") + raise HTTPException(status_code=500, detail=f"PDF 뷰어 실패: {e}") + +@app.get("/search") +async def search_documents(query: str, limit: int = 5): + """문서 검색""" + try: + # LangChain 유사 문서 검색 + documents = langchain_service.search_similar_documents(query, k=limit) + + results = [] + for doc in documents: + results.append({ + "content": doc.page_content[:200] + "...", + "metadata": doc.metadata, + "score": getattr(doc, 'score', 0.0) + }) + + return { + "query": query, + "results": results, + "total": len(results) + } + + except Exception as e: + logger.error(f"❌ 문서 검색 실패: {e}") + raise HTTPException(status_code=500, detail=f"문서 검색 실패: {e}") + +@app.get("/stats") +async def get_stats(): + """시스템 통계""" + try: + # LangChain 컬렉션 정보 + collection_info = langchain_service.get_collection_info() + + # 데이터베이스 통계 + db_conn = get_db_connection() + cursor = db_conn.cursor() + + cursor.execute("SELECT COUNT(*) FROM uploaded_file") + file_count = cursor.fetchone()[0] + + cursor.close() + + return { + "langchain_stats": collection_info, + "database_stats": { + "total_files": file_count + }, + "timestamp": datetime.now().isoformat() + } + + except Exception as e: + logger.error(f"❌ 통계 조회 실패: {e}") + raise HTTPException(status_code=500, detail=f"통계 조회 실패: {e}") + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/backend/parser/ocr/MainParser.py b/backend/parser/ocr/MainParser.py new file mode 100644 index 0000000..4a7d7ae --- /dev/null +++ b/backend/parser/ocr/MainParser.py @@ -0,0 +1,12 @@ + + + +def process(input): + text = input + ###### + return text + +if __name__ == '__main__': + input = 'a.pdf' + b = process(input) + print(b) diff --git a/backend/parser/ocr/Parser1.py b/backend/parser/ocr/Parser1.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/parser/pdf/MainParser.py b/backend/parser/pdf/MainParser.py new file mode 100644 index 0000000..5dcf3d0 --- /dev/null +++ b/backend/parser/pdf/MainParser.py @@ -0,0 +1,142 @@ +import logging +from typing import List, Dict, Any +from docling.document_converter import DocumentConverter + +# 로깅 설정 +logger = logging.getLogger(__name__) + +class PDFParser: + """PDF 파일을 파싱하는 클래스 (docling 사용)""" + + def __init__(self): + self.chunk_size = 1000 + # docling 변환기 초기화 (OCR 없이) + self.converter = DocumentConverter() + + def extract_text_from_pdf(self, file_path: str) -> tuple[str, list]: + """ + PDF 파일에서 텍스트와 페이지 정보를 추출합니다. (docling 사용) + + Args: + file_path (str): PDF 파일 경로 + + Returns: + tuple[str, list]: (추출된 텍스트, 페이지별 텍스트 리스트) + """ + try: + logger.info(f"Docling으로 PDF 파싱 시작: {file_path}") + + # docling을 사용하여 PDF 변환 + result = self.converter.convert(file_path) + document = result.document + + # docling의 export_to_text() 메서드 사용 + text_content = document.export_to_text() + + # 페이지별 텍스트 추출 + page_texts = [] + if hasattr(document, 'pages') and document.pages: + for page in document.pages: + if hasattr(page, 'export_to_text'): + page_text = page.export_to_text() + page_texts.append(page_text) + else: + page_texts.append("") + else: + # 페이지 정보가 없는 경우 전체 텍스트를 첫 페이지로 처리 + page_texts = [text_content] + + logger.info(f"PDF 텍스트 추출 완료 (docling): {file_path}, 텍스트 길이: {len(text_content)}, 페이지 수: {len(page_texts)}") + return text_content, page_texts + + except Exception as e: + logger.error(f"PDF 텍스트 추출 실패: {file_path}, 오류: {e}") + # docling 실패 시 빈 텍스트 반환 + logger.warning(f"Docling 파싱 실패, 빈 텍스트로 처리: {e}") + return "", [""] + + def chunk_text(self, text: str) -> List[str]: + """ + 텍스트를 청크로 분할합니다. + + Args: + text (str): 분할할 텍스트 + + Returns: + List[str]: 청크 리스트 + """ + if not text.strip(): + return [] + + chunks = [] + for i in range(0, len(text), self.chunk_size): + chunk = text[i:i+self.chunk_size] + if chunk.strip(): # 빈 청크 제외 + chunks.append(chunk) + + logger.info(f"텍스트 청크 분할 완료: {len(chunks)}개 청크") + return chunks + + def process_pdf(self, file_path: str) -> Dict[str, Any]: + """ + PDF 파일을 처리하여 텍스트와 청크를 반환합니다. + + Args: + file_path (str): PDF 파일 경로 + + Returns: + Dict[str, Any]: 처리 결과 + """ + try: + # 텍스트 및 페이지 정보 추출 + text_content, page_texts = self.extract_text_from_pdf(file_path) + + # 청크 분할 + chunks = self.chunk_text(text_content) + + return { + "text_content": text_content, + "chunks": chunks, + "chunk_count": len(chunks), + "page_texts": page_texts, + "page_count": len(page_texts), + "success": True + } + + except Exception as e: + logger.error(f"PDF 처리 실패: {file_path}, 오류: {e}") + return { + "text_content": "", + "chunks": [], + "chunk_count": 0, + "page_texts": [], + "page_count": 0, + "success": False, + "error": str(e) + } + +def process(input_path: str) -> str: + """ + PDF 파일을 처리하는 메인 함수 (기존 인터페이스 유지) + + Args: + input_path (str): PDF 파일 경로 + + Returns: + str: 추출된 텍스트 + """ + parser = PDFParser() + result = parser.process_pdf(input_path) + + if result["success"]: + return result["text_content"] + else: + logger.error(f"PDF 처리 실패: {result.get('error', 'Unknown error')}") + return "" + +if __name__ == '__main__': + # 테스트 코드 + input_file = 'a.pdf' + result = process(input_file) + print(f"추출된 텍스트 길이: {len(result)}") + print(f"텍스트 미리보기: {result[:200]}...") \ No newline at end of file diff --git a/backend/parser/pdf/Parser1.py b/backend/parser/pdf/Parser1.py new file mode 100644 index 0000000..4c84b73 --- /dev/null +++ b/backend/parser/pdf/Parser1.py @@ -0,0 +1,10 @@ + +def process(input): + text = input + ###### + return text + +if __name__ == '__main__': + input = '../../uploads/dea8cfaa-c940-4da8-bb1f-44c4882f8cf2_01)DWPRND-DT-SOP-001_연구자료실 운영방법.pdf' + b = process(input) + print(b) diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..00bf970 --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,31 @@ +# Core Web Framework +fastapi>=0.104.1 +uvicorn>=0.24.0 +python-multipart>=0.0.6 +pydantic>=2.7.4 + +# LangChain v0.3 AI Framework +langchain>=0.3.0 +langchain-community>=0.3.0 +langchain-core>=0.3.0 +langchain-experimental>=0.3.0 + +# LLM Integration +ollama>=0.6.0 + +# Vector Database & Embeddings +chromadb>=0.4.22 +sentence-transformers>=2.2.2 + +# PDF Processing +docling>=2.55.0 +docling-core>=2.48.0 + +# Database +psycopg2-binary>=2.9.9 + +# Utilities +python-dotenv>=1.0.0 +numpy>=1.26.4 + +easyocr diff --git a/backend/services/__init__.py b/backend/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/services/langchain_service.py b/backend/services/langchain_service.py new file mode 100644 index 0000000..c7cdfe5 --- /dev/null +++ b/backend/services/langchain_service.py @@ -0,0 +1,275 @@ +""" +LangChain v0.3 기반 AI 서비스 +향후 고도화를 위한 확장 가능한 아키텍처 +""" + +import os +import logging +from typing import List, Dict, Any, Optional +from datetime import datetime + +# LangChain Core +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.vectorstores import VectorStore +from langchain_core.retrievers import BaseRetriever +from langchain_core.language_models import BaseLanguageModel +from langchain_core.prompts import PromptTemplate +from langchain_core.output_parsers import StrOutputParser +from langchain_core.runnables import RunnablePassthrough, RunnableParallel + +# LangChain Community +from langchain_community.vectorstores import Chroma +from langchain_community.embeddings import SentenceTransformerEmbeddings +from langchain_community.llms import Ollama + +# LangChain Chains +from langchain.chains import RetrievalQA +from langchain.chains.combine_documents import create_stuff_documents_chain +from langchain.chains import create_retrieval_chain + +# Database +import psycopg2 +from psycopg2.extras import RealDictCursor + +logger = logging.getLogger(__name__) + + +class LangChainRAGService: + """LangChain 기반 RAG 서비스""" + + def __init__(self): + self.embeddings: Optional[Embeddings] = None + self.vectorstore: Optional[VectorStore] = None + self.llm: Optional[BaseLanguageModel] = None + self.retriever: Optional[BaseRetriever] = None + self.qa_chain: Optional[Any] = None + self.db_connection = None + + def initialize(self): + """LangChain 컴포넌트 초기화""" + try: + # 임베딩 모델 초기화 + self.embeddings = SentenceTransformerEmbeddings( + model_name="jhgan/ko-sroberta-multitask" + ) + logger.info("✅ LangChain 임베딩 모델 로드 완료") + + # ChromaDB 벡터스토어 초기화 + self.vectorstore = Chroma( + persist_directory="./vectordb", + embedding_function=self.embeddings, + collection_name="research_documents" + ) + logger.info("✅ LangChain ChromaDB 초기화 완료") + + # Ollama LLM 초기화 + self.llm = Ollama( + model="qwen3:latest", + base_url="http://localhost:11434" + ) + logger.info("✅ LangChain Ollama LLM 초기화 완료") + + # 리트리버 초기화 + self.retriever = self.vectorstore.as_retriever( + search_type="similarity", + search_kwargs={"k": 5} + ) + logger.info("✅ LangChain 리트리버 초기화 완료") + + # RAG 체인 구성 + self._setup_rag_chain() + + # 데이터베이스 연결 + self._setup_database() + + logger.info("🚀 LangChain RAG 서비스 초기화 완료") + + except Exception as e: + logger.error(f"❌ LangChain 서비스 초기화 실패: {e}") + raise + + def _setup_rag_chain(self): + """RAG 체인 설정""" + try: + # 프롬프트 템플릿 + prompt_template = """ + 다음 문서들을 참고하여 질문에 답변해주세요. + + 문서들: + {context} + + 질문: {input} + + 답변: 문서의 내용을 바탕으로 정확하고 상세하게 답변해주세요. + """ + + prompt = PromptTemplate( + template=prompt_template, + input_variables=["context", "input"] + ) + + # 문서 체인 생성 + document_chain = create_stuff_documents_chain( + llm=self.llm, + prompt=prompt + ) + + # RAG 체인 생성 + self.qa_chain = create_retrieval_chain( + retriever=self.retriever, + combine_docs_chain=document_chain + ) + + logger.info("✅ RAG 체인 설정 완료") + + except Exception as e: + logger.error(f"❌ RAG 체인 설정 실패: {e}") + raise + + def _setup_database(self): + """데이터베이스 연결 설정""" + try: + self.db_connection = psycopg2.connect( + host="localhost", + port=5432, + database="researchqa", + user="woonglab", + password="!@#woonglab" + ) + self.db_connection.autocommit = True + logger.info("✅ PostgreSQL 연결 완료") + except Exception as e: + logger.error(f"❌ PostgreSQL 연결 실패: {e}") + raise + + def add_documents(self, documents: List[Document], metadata: Dict[str, Any] = None): + """문서를 벡터스토어에 추가""" + try: + if metadata: + for doc in documents: + doc.metadata.update(metadata) + + # ChromaDB에 문서 추가 + self.vectorstore.add_documents(documents) + logger.info(f"✅ {len(documents)}개 문서 추가 완료") + + except Exception as e: + logger.error(f"❌ 문서 추가 실패: {e}") + raise + + def search_similar_documents(self, query: str, k: int = 5) -> List[Document]: + """유사 문서 검색""" + try: + docs = self.vectorstore.similarity_search(query, k=k) + logger.info(f"✅ {len(docs)}개 유사 문서 검색 완료") + return docs + except Exception as e: + logger.error(f"❌ 유사 문서 검색 실패: {e}") + raise + + def generate_answer(self, question: str) -> Dict[str, Any]: + """RAG를 통한 답변 생성""" + try: + # 간단한 유사 문서 검색으로 시작 + similar_docs = self.search_similar_documents(question, k=3) + + if not similar_docs: + return { + "answer": "죄송합니다. 관련 문서를 찾을 수 없습니다.", + "references": ["문서 없음"], + "source_documents": [] + } + + # 문서 내용을 기반으로 간단한 답변 생성 + context_text = "" + references = [] + + for i, doc in enumerate(similar_docs): + context_text += f"\n문서 {i+1}:\n{doc.page_content[:500]}...\n" + + if hasattr(doc, 'metadata') and doc.metadata: + filename = doc.metadata.get('filename', 'Unknown') + file_id = doc.metadata.get('file_id', 'unknown') + chunk_index = doc.metadata.get('chunk_index', 0) + # 페이지 번호는 청크 인덱스를 기반으로 추정 (실제로는 더 정확한 방법 필요) + page_number = chunk_index + 1 + references.append(f"{filename}::{file_id} [p{page_number}]") + + # 간단한 답변 생성 (LLM 없이) + answer = f"질문하신 '{question}'에 대한 관련 문서를 찾았습니다.\n\n참조 문서에서 관련 내용을 확인할 수 있습니다." + + response = { + "answer": answer, + "references": references, + "source_documents": similar_docs + } + + logger.info(f"✅ RAG 답변 생성 완료: {len(references)}개 참조") + return response + + except Exception as e: + logger.error(f"❌ RAG 답변 생성 실패: {e}") + # 오류 시 기본 응답 반환 + return { + "answer": "죄송합니다. 현재 시스템 오류로 인해 답변을 생성할 수 없습니다.", + "references": ["시스템 오류"], + "source_documents": [] + } + + def get_collection_info(self) -> Dict[str, Any]: + """컬렉션 정보 조회""" + try: + # ChromaDB 컬렉션 정보 + collection = self.vectorstore._collection + count = collection.count() + + return { + "total_documents": count, + "collection_name": "research_documents", + "embedding_model": "jhgan/ko-sroberta-multitask" + } + + except Exception as e: + logger.error(f"❌ 컬렉션 정보 조회 실패: {e}") + return {"error": str(e)} + + def delete_documents_by_filename(self, filename: str): + """파일명으로 문서 삭제""" + try: + # 메타데이터로 필터링하여 삭제 + collection = self.vectorstore._collection + collection.delete(where={"filename": filename}) + logger.info(f"✅ {filename} 관련 문서 삭제 완료") + + except Exception as e: + logger.error(f"❌ 문서 삭제 실패: {e}") + raise + + def cleanup_database_by_filename(self, filename: str): + """데이터베이스에서 파일 관련 데이터 정리""" + try: + cursor = self.db_connection.cursor() + + # 파일 관련 벡터 데이터 삭제 + cursor.execute( + "DELETE FROM file_vectors WHERE filename = %s", + (filename,) + ) + + # 파일 메타데이터 삭제 + cursor.execute( + "DELETE FROM files WHERE filename = %s", + (filename,) + ) + + cursor.close() + logger.info(f"✅ {filename} 데이터베이스 정리 완료") + + except Exception as e: + logger.error(f"❌ 데이터베이스 정리 실패: {e}") + raise + + +# 전역 서비스 인스턴스 +langchain_service = LangChainRAGService() diff --git a/database.sql b/database.sql new file mode 100644 index 0000000..4cea223 --- /dev/null +++ b/database.sql @@ -0,0 +1,57 @@ +-- PostgreSQL 데이터베이스 및 테이블 생성 스크립트 +-- 작성일: 2024년 +-- 목적: researchqa 프로젝트용 데이터베이스 설정 + +-- 1. woonglab 사용자 생성 (이미 존재할 경우 무시) +DO $$ +BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'woonglab') THEN + CREATE USER woonglab WITH PASSWORD '!@#woonglab'; + END IF; +END +$$; + +-- 2. researchqa 데이터베이스 생성 (이미 존재할 경우 무시) +SELECT 'CREATE DATABASE researchqa OWNER woonglab' +WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = 'researchqa')\gexec + +-- 3. researchqa 데이터베이스에 대한 권한 부여 +GRANT ALL PRIVILEGES ON DATABASE researchqa TO woonglab; + +-- 4. researchqa 데이터베이스에 연결하여 테이블 생성 +\c researchqa; + +-- 5. files 테이블 생성 (main.py와 일치) +CREATE TABLE uploaded_file ( + id SERIAL PRIMARY KEY, + filename VARCHAR(255) NOT NULL, + file_path VARCHAR(500) NOT NULL, + status VARCHAR(10) NOT NULL, + upload_dt TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_dt TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +-- 6. user_log 테이블 생성 +CREATE TABLE IF NOT EXISTS user_log ( + id SERIAL PRIMARY KEY, + question TEXT NOT NULL, + answer TEXT NOT NULL, + like_count INTEGER DEFAULT 0, + dislike_count INTEGER DEFAULT 0, + ip VARCHAR(45), + reg_dt TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +-- 7. woonglab 사용자에게 테이블 권한 부여 +GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO woonglab; +GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO woonglab; + +-- 8. 생성된 테이블 확인 +\dt + +-- 9. 테이블 구조 확인 +\d files +\d user_log + +-- 스크립트 실행 완료 메시지 +SELECT 'Database setup completed successfully!' as message; diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..395016d --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,45 @@ +{ + "name": "researchqa-frontend", + "version": "1.0.0", + "private": true, + "dependencies": { + "react": "^18.2.0", + "react-dom": "^18.2.0", + "react-scripts": "5.0.1", + "typescript": "^4.9.5", + "@types/react": "^18.2.0", + "@types/react-dom": "^18.2.0", + "@types/node": "^20.0.0", + "framer-motion": "^10.16.0", + "lucide-react": "^0.294.0", + "react-pdf": "^10.1.0", + "pdfjs-dist": "^5.3.93", + "tailwindcss": "^3.3.0", + "autoprefixer": "^10.4.0", + "postcss": "^8.4.0" + }, + "scripts": { + "start": "react-scripts start", + "build": "react-scripts build", + "test": "react-scripts test", + "eject": "react-scripts eject" + }, + "eslintConfig": { + "extends": [ + "react-app", + "react-app/jest" + ] + }, + "browserslist": { + "production": [ + ">0.2%", + "not dead", + "not op_mini all" + ], + "development": [ + "last 1 chrome version", + "last 1 firefox version", + "last 1 safari version" + ] + } +} diff --git a/frontend/postcss.config.js b/frontend/postcss.config.js new file mode 100644 index 0000000..33ad091 --- /dev/null +++ b/frontend/postcss.config.js @@ -0,0 +1,6 @@ +module.exports = { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +} diff --git a/frontend/public/AnnotationLayer.css b/frontend/public/AnnotationLayer.css new file mode 100644 index 0000000..058119c --- /dev/null +++ b/frontend/public/AnnotationLayer.css @@ -0,0 +1,333 @@ +/* Copyright 2014 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +:root { + --react-pdf-annotation-layer: 1; + --annotation-unfocused-field-background: url("data:image/svg+xml;charset=UTF-8,"); + --input-focus-border-color: Highlight; + --input-focus-outline: 1px solid Canvas; + --input-unfocused-border-color: transparent; + --input-disabled-border-color: transparent; + --input-hover-border-color: black; + --link-outline: none; +} + +@media screen and (forced-colors: active) { + :root { + --input-focus-border-color: CanvasText; + --input-unfocused-border-color: ActiveText; + --input-disabled-border-color: GrayText; + --input-hover-border-color: Highlight; + --link-outline: 1.5px solid LinkText; + } + .annotationLayer .textWidgetAnnotation :is(input, textarea):required, + .annotationLayer .choiceWidgetAnnotation select:required, + .annotationLayer .buttonWidgetAnnotation:is(.checkBox, .radioButton) input:required { + outline: 1.5px solid selectedItem; + } + + .annotationLayer .linkAnnotation:hover { + backdrop-filter: invert(100%); + } +} + +.annotationLayer { + position: absolute; + top: 0; + left: 0; + pointer-events: none; + transform-origin: 0 0; + z-index: 3; +} + +.annotationLayer[data-main-rotation='90'] .norotate { + transform: rotate(270deg) translateX(-100%); +} +.annotationLayer[data-main-rotation='180'] .norotate { + transform: rotate(180deg) translate(-100%, -100%); +} +.annotationLayer[data-main-rotation='270'] .norotate { + transform: rotate(90deg) translateY(-100%); +} + +.annotationLayer canvas { + position: absolute; + width: 100%; + height: 100%; +} + +.annotationLayer section { + position: absolute; + text-align: initial; + pointer-events: auto; + box-sizing: border-box; + margin: 0; + transform-origin: 0 0; +} + +.annotationLayer .linkAnnotation { + outline: var(--link-outline); +} + +.textLayer.selecting ~ .annotationLayer section { + pointer-events: none; +} + +.annotationLayer :is(.linkAnnotation, .buttonWidgetAnnotation.pushButton) > a { + position: absolute; + font-size: 1em; + top: 0; + left: 0; + width: 100%; + height: 100%; +} + +.annotationLayer :is(.linkAnnotation, .buttonWidgetAnnotation.pushButton) > a:hover { + opacity: 0.2; + background: rgba(255, 255, 0, 1); + box-shadow: 0 2px 10px rgba(255, 255, 0, 1); +} + +.annotationLayer .textAnnotation img { + position: absolute; + cursor: pointer; + width: 100%; + height: 100%; + top: 0; + left: 0; +} + +.annotationLayer .textWidgetAnnotation :is(input, textarea), +.annotationLayer .choiceWidgetAnnotation select, +.annotationLayer .buttonWidgetAnnotation:is(.checkBox, .radioButton) input { + background-image: var(--annotation-unfocused-field-background); + border: 2px solid var(--input-unfocused-border-color); + box-sizing: border-box; + font: calc(9px * var(--total-scale-factor)) sans-serif; + height: 100%; + margin: 0; + vertical-align: top; + width: 100%; +} + +.annotationLayer .textWidgetAnnotation :is(input, textarea):required, +.annotationLayer .choiceWidgetAnnotation select:required, +.annotationLayer .buttonWidgetAnnotation:is(.checkBox, .radioButton) input:required { + outline: 1.5px solid red; +} + +.annotationLayer .choiceWidgetAnnotation select option { + padding: 0; +} + +.annotationLayer .buttonWidgetAnnotation.radioButton input { + border-radius: 50%; +} + +.annotationLayer .textWidgetAnnotation textarea { + resize: none; +} + +.annotationLayer .textWidgetAnnotation :is(input, textarea)[disabled], +.annotationLayer .choiceWidgetAnnotation select[disabled], +.annotationLayer .buttonWidgetAnnotation:is(.checkBox, .radioButton) input[disabled] { + background: none; + border: 2px solid var(--input-disabled-border-color); + cursor: not-allowed; +} + +.annotationLayer .textWidgetAnnotation :is(input, textarea):hover, +.annotationLayer .choiceWidgetAnnotation select:hover, +.annotationLayer .buttonWidgetAnnotation:is(.checkBox, .radioButton) input:hover { + border: 2px solid var(--input-hover-border-color); +} +.annotationLayer .textWidgetAnnotation :is(input, textarea):hover, +.annotationLayer .choiceWidgetAnnotation select:hover, +.annotationLayer .buttonWidgetAnnotation.checkBox input:hover { + border-radius: 2px; +} + +.annotationLayer .textWidgetAnnotation :is(input, textarea):focus, +.annotationLayer .choiceWidgetAnnotation select:focus { + background: none; + border: 2px solid var(--input-focus-border-color); + border-radius: 2px; + outline: var(--input-focus-outline); +} + +.annotationLayer .buttonWidgetAnnotation:is(.checkBox, .radioButton) :focus { + background-image: none; + background-color: transparent; +} + +.annotationLayer .buttonWidgetAnnotation.checkBox :focus { + border: 2px solid var(--input-focus-border-color); + border-radius: 2px; + outline: var(--input-focus-outline); +} + +.annotationLayer .buttonWidgetAnnotation.radioButton :focus { + border: 2px solid var(--input-focus-border-color); + outline: var(--input-focus-outline); +} + +.annotationLayer .buttonWidgetAnnotation.checkBox input:checked::before, +.annotationLayer .buttonWidgetAnnotation.checkBox input:checked::after, +.annotationLayer .buttonWidgetAnnotation.radioButton input:checked::before { + background-color: CanvasText; + content: ''; + display: block; + position: absolute; +} + +.annotationLayer .buttonWidgetAnnotation.checkBox input:checked::before, +.annotationLayer .buttonWidgetAnnotation.checkBox input:checked::after { + height: 80%; + left: 45%; + width: 1px; +} + +.annotationLayer .buttonWidgetAnnotation.checkBox input:checked::before { + transform: rotate(45deg); +} + +.annotationLayer .buttonWidgetAnnotation.checkBox input:checked::after { + transform: rotate(-45deg); +} + +.annotationLayer .buttonWidgetAnnotation.radioButton input:checked::before { + border-radius: 50%; + height: 50%; + left: 30%; + top: 20%; + width: 50%; +} + +.annotationLayer .textWidgetAnnotation input.comb { + font-family: monospace; + padding-left: 2px; + padding-right: 0; +} + +.annotationLayer .textWidgetAnnotation input.comb:focus { + /* + * Letter spacing is placed on the right side of each character. Hence, the + * letter spacing of the last character may be placed outside the visible + * area, causing horizontal scrolling. We avoid this by extending the width + * when the element has focus and revert this when it loses focus. + */ + width: 103%; +} + +.annotationLayer .buttonWidgetAnnotation:is(.checkBox, .radioButton) input { + appearance: none; +} + +.annotationLayer .popupTriggerArea { + height: 100%; + width: 100%; +} + +.annotationLayer .fileAttachmentAnnotation .popupTriggerArea { + position: absolute; +} + +.annotationLayer .popupWrapper { + position: absolute; + font-size: calc(9px * var(--total-scale-factor)); + width: 100%; + min-width: calc(180px * var(--total-scale-factor)); + pointer-events: none; +} + +.annotationLayer .popup { + position: absolute; + max-width: calc(180px * var(--total-scale-factor)); + background-color: rgba(255, 255, 153, 1); + box-shadow: 0 calc(2px * var(--total-scale-factor)) calc(5px * var(--total-scale-factor)) + rgba(136, 136, 136, 1); + border-radius: calc(2px * var(--total-scale-factor)); + padding: calc(6px * var(--total-scale-factor)); + margin-left: calc(5px * var(--total-scale-factor)); + cursor: pointer; + font: message-box; + white-space: normal; + word-wrap: break-word; + pointer-events: auto; +} + +.annotationLayer .popup > * { + font-size: calc(9px * var(--total-scale-factor)); +} + +.annotationLayer .popup h1 { + display: inline-block; +} + +.annotationLayer .popupDate { + display: inline-block; + margin-left: calc(5px * var(--total-scale-factor)); +} + +.annotationLayer .popupContent { + border-top: 1px solid rgba(51, 51, 51, 1); + margin-top: calc(2px * var(--total-scale-factor)); + padding-top: calc(2px * var(--total-scale-factor)); +} + +.annotationLayer .richText > * { + white-space: pre-wrap; + font-size: calc(9px * var(--total-scale-factor)); +} + +.annotationLayer .highlightAnnotation, +.annotationLayer .underlineAnnotation, +.annotationLayer .squigglyAnnotation, +.annotationLayer .strikeoutAnnotation, +.annotationLayer .freeTextAnnotation, +.annotationLayer .lineAnnotation svg line, +.annotationLayer .squareAnnotation svg rect, +.annotationLayer .circleAnnotation svg ellipse, +.annotationLayer .polylineAnnotation svg polyline, +.annotationLayer .polygonAnnotation svg polygon, +.annotationLayer .caretAnnotation, +.annotationLayer .inkAnnotation svg polyline, +.annotationLayer .stampAnnotation, +.annotationLayer .fileAttachmentAnnotation { + cursor: pointer; +} + +.annotationLayer section svg { + position: absolute; + width: 100%; + height: 100%; + top: 0; + left: 0; +} + +.annotationLayer .annotationTextContent { + position: absolute; + width: 100%; + height: 100%; + opacity: 0; + color: transparent; + user-select: none; + pointer-events: none; +} + +.annotationLayer .annotationTextContent span { + width: 100%; + display: inline-block; +} diff --git a/frontend/public/TextLayer.css b/frontend/public/TextLayer.css new file mode 100644 index 0000000..703c778 --- /dev/null +++ b/frontend/public/TextLayer.css @@ -0,0 +1,119 @@ +/* Copyright 2014 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +:root { + --react-pdf-text-layer: 1; + --highlight-bg-color: rgba(180, 0, 170, 1); + --highlight-selected-bg-color: rgba(0, 100, 0, 1); +} + +@media screen and (forced-colors: active) { + :root { + --highlight-bg-color: Highlight; + --highlight-selected-bg-color: ButtonText; + } +} + +[data-main-rotation='90'] { + transform: rotate(90deg) translateY(-100%); +} +[data-main-rotation='180'] { + transform: rotate(180deg) translate(-100%, -100%); +} +[data-main-rotation='270'] { + transform: rotate(270deg) translateX(-100%); +} + +.textLayer { + position: absolute; + text-align: initial; + inset: 0; + overflow: hidden; + line-height: 1; + text-size-adjust: none; + forced-color-adjust: none; + transform-origin: 0 0; + z-index: 2; +} + +.textLayer :is(span, br) { + color: transparent; + position: absolute; + white-space: pre; + cursor: text; + margin: 0; + transform-origin: 0 0; +} + +/* Only necessary in Google Chrome, see issue 14205, and most unfortunately + * the problem doesn't show up in "text" reference tests. */ +.textLayer span.markedContent { + top: 0; + height: 0; +} + +.textLayer .highlight { + margin: -1px; + padding: 1px; + background-color: var(--highlight-bg-color); + border-radius: 4px; +} + +.textLayer .highlight.appended { + position: initial; +} + +.textLayer .highlight.begin { + border-radius: 4px 0 0 4px; +} + +.textLayer .highlight.end { + border-radius: 0 4px 4px 0; +} + +.textLayer .highlight.middle { + border-radius: 0; +} + +.textLayer .highlight.selected { + background-color: var(--highlight-selected-bg-color); +} + +/* Avoids https://github.com/mozilla/pdf.js/issues/13840 in Chrome */ +.textLayer br::selection { + background: transparent; +} + +.textLayer .endOfContent { + display: block; + position: absolute; + inset: 100% 0 0; + z-index: -1; + cursor: default; + user-select: none; +} + +.textLayer.selecting .endOfContent { + top: 0; +} + +.hiddenCanvasElement { + position: absolute; + top: 0; + left: 0; + width: 0; + height: 0; + display: none; +} diff --git a/frontend/public/images/dw_icon.png b/frontend/public/images/dw_icon.png new file mode 100644 index 0000000..cf7955f Binary files /dev/null and b/frontend/public/images/dw_icon.png differ diff --git a/frontend/public/images/woongtalk.png b/frontend/public/images/woongtalk.png new file mode 100644 index 0000000..02ae4c8 Binary files /dev/null and b/frontend/public/images/woongtalk.png differ diff --git a/frontend/public/images/woongtalk_bgremove.png b/frontend/public/images/woongtalk_bgremove.png new file mode 100644 index 0000000..1f14dee Binary files /dev/null and b/frontend/public/images/woongtalk_bgremove.png differ diff --git a/frontend/public/index.html b/frontend/public/index.html new file mode 100644 index 0000000..50313e5 --- /dev/null +++ b/frontend/public/index.html @@ -0,0 +1,20 @@ + + +
+ + + + + + + +=w||c<0||c>=y?l<<=1:l=l<<1|r[o][c]}const g=k.readBit(C,l);t[s]=g}}return S}function decodeTextRegion(e,t,a,r,i,n,s,o,c,l,h,u,d,f,g,p,m,b,y){if(e&&t)throw new Jbig2Error("refinement with Huffman is not supported");const w=[];let x,S;for(x=0;x 0,c=(r+7>>3)*i,l=e.getBytes(c),h=1===r&&1===i&&o===(0===l.length||!!(128&l[0]));if(h)return{isSingleOpaquePixel:h};if(t){if(ImageResizer.needsToBeResized(r,i)){const e=new Uint8ClampedArray(r*i*4);convertBlackAndWhiteToRGBA({src:l,dest:e,width:r,height:i,nonBlackColor:0,inverseDecode:o});return ImageResizer.createImage({kind:v,data:e,width:r,height:i,interpolate:n})}const e=new OffscreenCanvas(r,i),t=e.getContext("2d"),a=t.createImageData(r,i);convertBlackAndWhiteToRGBA({src:l,dest:a.data,width:r,height:i,nonBlackColor:0,inverseDecode:o});t.putImageData(a,0,0);return{data:null,width:r,height:i,interpolate:n,bitmap:e.transferToImageBitmap()}}const u=l.byteLength;let d;if(e instanceof DecodeStream&&(!o||c===u))d=l;else if(o){d=new Uint8Array(c);d.set(l);d.fill(255,u)}else d=new Uint8Array(l);if(o)for(let e=0;e>7&1;s[d+1]=u>>6&1;s[d+2]=u>>5&1;s[d+3]=u>>4&1;s[d+4]=u>>3&1;s[d+5]=u>>2&1;s[d+6]=u>>1&1;s[d+7]=1&u;d+=8}if(d>=1}}}}else{let a=0;u=0;for(d=0,h=n;d=0&&e=0;t--){d[t]=o[a];a=l[a]}}else d[f++]=d[0]}if(i){l[s]=u;c[s]=c[u]+1;o[s]=d[0];s++;h=s+n&s+n-1?h:0|Math.min(Math.log(s+n)/.6931471805599453+1,12)}u=e;g+=f;if(r127))){n=0;break}}if(2!==n)continue;if(!t){warn("findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined.");continue}const o=new Lexer(new Stream(e.peekBytes(75)),t);o._hexStringWarn=()=>{};let c=0;for(;;){const e=o.getObj();if(e===wa){n=0;break}if(e instanceof Cmd){const a=t[e.cmd];if(!a){n=0;break}if(a.variableArgs?c<=a.numArgs:c===a.numArgs)break;c=0}else c++}if(2===n)break}else n=0;if(-1===r){warn("findDefaultInlineStreamEnd: Reached the end of the stream without finding a valid EI marker");if(i){warn('... trying to recover by using the last "EI" occurrence.');e.skip(-(e.pos-i))}}let s=4;e.skip(-s);r=e.peekByte();e.skip(s);isWhiteSpace(r)||s--;return e.pos-s-a}findDCTDecodeInlineStreamEnd(e){const t=e.pos;let a,r,i=!1;for(;-1!==(a=e.getByte());)if(255===a){switch(e.getByte()){case 0:break;case 255:e.skip(-1);break;case 217:i=!0;break;case 192:case 193:case 194:case 195:case 197:case 198:case 199:case 201:case 202:case 203:case 205:case 206:case 207:case 196:case 204:case 218:case 219:case 220:case 221:case 222:case 223:case 224:case 225:case 226:case 227:case 228:case 229:case 230:case 231:case 232:case 233:case 234:case 235:case 236:case 237:case 238:case 239:case 254:r=e.getUint16();r>2?e.skip(r-2):e.skip(-2)}if(i)break}const n=e.pos-t;if(-1===a){warn("Inline DCTDecode image stream: EOI marker not found, searching for /EI/ instead.");e.skip(-n);return this.findDefaultInlineStreamEnd(e)}this.inlineStreamSkipEI(e);return n}findASCII85DecodeInlineStreamEnd(e){const t=e.pos;let a;for(;-1!==(a=e.getByte());)if(126===a){const t=e.pos;a=e.peekByte();for(;isWhiteSpace(a);){e.skip();a=e.peekByte()}if(62===a){e.skip();break}if(e.pos>t){const t=e.peekBytes(2);if(69===t[0]&&73===t[1])break}}const r=e.pos-t;if(-1===a){warn("Inline ASCII85Decode image stream: EOD marker not found, searching for /EI/ instead.");e.skip(-r);return this.findDefaultInlineStreamEnd(e)}this.inlineStreamSkipEI(e);return r}findASCIIHexDecodeInlineStreamEnd(e){const t=e.pos;let a;for(;-1!==(a=e.getByte())&&62!==a;);const r=e.pos-t;if(-1===a){warn("Inline ASCIIHexDecode image stream: EOD marker not found, searching for /EI/ instead.");e.skip(-r);return this.findDefaultInlineStreamEnd(e)}this.inlineStreamSkipEI(e);return r}inlineStreamSkipEI(e){let t,a=0;for(;-1!==(t=e.getByte());)if(0===a)a=69===t?1:0;else if(1===a)a=73===t?2:0;else if(2===a)break}makeInlineImage(e){const t=this.lexer,a=t.stream,r=Object.create(null);let i;for(;!isCmd(this.buf1,"ID")&&this.buf1!==wa;){if(!(this.buf1 instanceof Name))throw new FormatError("Dictionary key must be a name object");const t=this.buf1.name;this.shift();if(this.buf1===wa)break;r[t]=this.getObj(e)}-1!==t.beginInlineImagePos&&(i=a.pos-t.beginInlineImagePos);const n=this.xref.fetchIfRef(r.F||r.Filter);let s;if(n instanceof Name)s=n.name;else if(Array.isArray(n)){const e=this.xref.fetchIfRef(n[0]);e instanceof Name&&(s=e.name)}const o=a.pos;let c,l;switch(s){case"DCT":case"DCTDecode":c=this.findDCTDecodeInlineStreamEnd(a);break;case"A85":case"ASCII85Decode":c=this.findASCII85DecodeInlineStreamEnd(a);break;case"AHx":case"ASCIIHexDecode":c=this.findASCIIHexDecodeInlineStreamEnd(a);break;default:c=this.findDefaultInlineStreamEnd(a)}if(c<1e3&&i>0){const e=a.pos;a.pos=t.beginInlineImagePos;l=function getInlineImageCacheKey(e){const t=[],a=e.length;let r=0;for(;r=4){i-=4;if(this.seacAnalysisEnabled){e.seac=n.slice(i,i+4);return!1}}l=zr[c]}else if(c>=32&&c<=246){n[i]=c-139;i++}else if(c>=247&&c<=254){n[i]=c<251?(c-247<<8)+t[o]+108:-(c-251<<8)-t[o]-108;o++;i++}else if(255===c){n[i]=(t[o]<<24|t[o+1]<<16|t[o+2]<<8|t[o+3])/65536;o+=4;i++}else if(19===c||20===c){e.hints+=i>>1;if(0===e.hints){t.copyWithin(o-1,o,-1);o-=1;s-=1;continue}o+=e.hints+7>>3;i%=2;l=zr[c]}else{if(10===c||29===c){const t=10===c?a:r;if(!t){l=zr[c];warn("Missing subrsIndex for "+l.id);return!1}let s=32768;t.count<1240?s=107:t.count<33900&&(s=1131);const o=n[--i]+s;if(o<0||o>=t.count||isNaN(o)){l=zr[c];warn("Out of bounds subrIndex for "+l.id);return!1}e.stackSize=i;e.callDepth++;if(!this.parseCharString(e,t.get(o),a,r))return!1;e.callDepth--;i=e.stackSize;continue}if(11===c){e.stackSize=i;return!0}if(0===c&&o===t.length){t[o-1]=14;l=zr[14]}else{if(9===c){t.copyWithin(o-1,o,-1);o-=1;s-=1;continue}l=zr[c]}}if(l){if(l.stem){e.hints+=i>>1;if(3===c||23===c)e.hasVStems=!0;else if(e.hasVStems&&(1===c||18===c)){warn("CFF stem hints are in wrong order");t[o-1]=1===c?3:23}}if("min"in l&&!e.undefStack&&i0){w+="ÿÿ";y+="ÿÿ";x+="\0";S+="\0\0"}const v="\0\0"+string16(2*d)+string16(f.range)+string16(f.entry)+string16(f.rangeShift)+w+"\0\0"+y+x+S+k;let F="",T="";if(i>1){l+="\0\0\n"+string32(4+8*i+4+v.length);F="";for(n=0,s=r.length;ne||!o)&&(o=e);c=0){reverseValues(Sn,e,n);e=-1}}else e<0&&(e=n);e>=0&&reverseValues(Sn,e,c.length)}for(n=0,s=Sn.length;n"!==e||(Sn[n]="")}return createBidiText(Sn.join(""),r)}const An={style:"normal",weight:"normal"},Cn={style:"normal",weight:"bold"},vn={style:"italic",weight:"normal"},Fn={style:"italic",weight:"bold"},In=new Map([["Times-Roman",{local:["Times New Roman","Times-Roman","Times","Liberation Serif","Nimbus Roman","Nimbus Roman L","Tinos","Thorndale","TeX Gyre Termes","FreeSerif","Linux Libertine O","Libertinus Serif","DejaVu Serif","Bitstream Vera Serif","Ubuntu"],style:An,ultimate:"serif"}],["Times-Bold",{alias:"Times-Roman",style:Cn,ultimate:"serif"}],["Times-Italic",{alias:"Times-Roman",style:vn,ultimate:"serif"}],["Times-BoldItalic",{alias:"Times-Roman",style:Fn,ultimate:"serif"}],["Helvetica",{local:["Helvetica","Helvetica Neue","Arial","Arial Nova","Liberation Sans","Arimo","Nimbus Sans","Nimbus Sans L","A030","TeX Gyre Heros","FreeSans","DejaVu Sans","Albany","Bitstream Vera Sans","Arial Unicode MS","Microsoft Sans Serif","Apple Symbols","Cantarell"],path:"LiberationSans-Regular.ttf",style:An,ultimate:"sans-serif"}],["Helvetica-Bold",{alias:"Helvetica",path:"LiberationSans-Bold.ttf",style:Cn,ultimate:"sans-serif"}],["Helvetica-Oblique",{alias:"Helvetica",path:"LiberationSans-Italic.ttf",style:vn,ultimate:"sans-serif"}],["Helvetica-BoldOblique",{alias:"Helvetica",path:"LiberationSans-BoldItalic.ttf",style:Fn,ultimate:"sans-serif"}],["Courier",{local:["Courier","Courier New","Liberation Mono","Nimbus Mono","Nimbus Mono L","Cousine","Cumberland","TeX Gyre Cursor","FreeMono","Linux Libertine Mono O","Libertinus Mono"],style:An,ultimate:"monospace"}],["Courier-Bold",{alias:"Courier",style:Cn,ultimate:"monospace"}],["Courier-Oblique",{alias:"Courier",style:vn,ultimate:"monospace"}],["Courier-BoldOblique",{alias:"Courier",style:Fn,ultimate:"monospace"}],["ArialBlack",{local:["Arial Black"],style:{style:"normal",weight:"900"},fallback:"Helvetica-Bold"}],["ArialBlack-Bold",{alias:"ArialBlack"}],["ArialBlack-Italic",{alias:"ArialBlack",style:{style:"italic",weight:"900"},fallback:"Helvetica-BoldOblique"}],["ArialBlack-BoldItalic",{alias:"ArialBlack-Italic"}],["ArialNarrow",{local:["Arial Narrow","Liberation Sans Narrow","Helvetica Condensed","Nimbus Sans Narrow","TeX Gyre Heros Cn"],style:An,fallback:"Helvetica"}],["ArialNarrow-Bold",{alias:"ArialNarrow",style:Cn,fallback:"Helvetica-Bold"}],["ArialNarrow-Italic",{alias:"ArialNarrow",style:vn,fallback:"Helvetica-Oblique"}],["ArialNarrow-BoldItalic",{alias:"ArialNarrow",style:Fn,fallback:"Helvetica-BoldOblique"}],["Calibri",{local:["Calibri","Carlito"],style:An,fallback:"Helvetica"}],["Calibri-Bold",{alias:"Calibri",style:Cn,fallback:"Helvetica-Bold"}],["Calibri-Italic",{alias:"Calibri",style:vn,fallback:"Helvetica-Oblique"}],["Calibri-BoldItalic",{alias:"Calibri",style:Fn,fallback:"Helvetica-BoldOblique"}],["Wingdings",{local:["Wingdings","URW Dingbats"],style:An}],["Wingdings-Regular",{alias:"Wingdings"}],["Wingdings-Bold",{alias:"Wingdings"}]]),Tn=new Map([["Arial-Black","ArialBlack"]]);function getFamilyName(e){const t=new Set(["thin","extralight","ultralight","demilight","semilight","light","book","regular","normal","medium","demibold","semibold","bold","extrabold","ultrabold","black","heavy","extrablack","ultrablack","roman","italic","oblique","ultracondensed","extracondensed","condensed","semicondensed","normal","semiexpanded","expanded","extraexpanded","ultraexpanded","bolditalic"]);return e.split(/[- ,+]+/g).filter((e=>!t.has(e.toLowerCase()))).join(" ")}function generateFont({alias:e,local:t,path:a,fallback:r,style:i,ultimate:n},s,o,c=!0,l=!0,h=""){const u={style:null,ultimate:null};if(t){const e=h?` ${h}`:"";for(const a of t)s.push(`local(${a}${e})`)}if(e){const t=In.get(e),n=h||function getStyleToAppend(e){switch(e){case Cn:return"Bold";case vn:return"Italic";case Fn:return"Bold Italic";default:if("bold"===e?.weight)return"Bold";if("italic"===e?.style)return"Italic"}return""}(i);Object.assign(u,generateFont(t,s,o,c&&!r,l&&!a,n))}i&&(u.style=i);n&&(u.ultimate=n);if(c&&r){const e=In.get(r),{ultimate:t}=generateFont(e,s,o,c,l&&!a,h);u.ultimate||=t}l&&a&&o&&s.push(`url(${o}${a})`);return u}function getFontSubstitution(e,t,a,r,i,n){if(r.startsWith("InvalidPDFjsFont_"))return null;"TrueType"!==n&&"Type1"!==n||!/^[A-Z]{6}\+/.test(r)||(r=r.slice(7));const s=r=normalizeFontName(r);let o=e.get(s);if(o)return o;let c=In.get(r);if(!c)for(const[e,t]of Tn)if(r.startsWith(e)){r=`${t}${r.substring(e.length)}`;c=In.get(r);break}let l=!1;if(!c){c=In.get(i);l=!0}const h=`${t.getDocId()}_s${t.createFontId()}`;if(!c){if(!validateFontName(r)){warn(`Cannot substitute the font because of its name: ${r}`);e.set(s,null);return null}const t=/bold/gi.test(r),a=/oblique|italic/gi.test(r),i=t&&a&&Fn||t&&Cn||a&&vn||An;o={css:`"${getFamilyName(r)}",${h}`,guessFallback:!0,loadedName:h,baseFontName:r,src:`local(${r})`,style:i};e.set(s,o);return o}const u=[];l&&validateFontName(r)&&u.push(`local(${r})`);const{style:d,ultimate:f}=generateFont(c,u,a),g=null===f,p=g?"":`,${f}`;o={css:`"${getFamilyName(r)}",${h}${p}`,guessFallback:g,loadedName:h,baseFontName:r,src:u.join(","),style:d};e.set(s,o);return o}const On=3285377520,Mn=4294901760,Dn=65535;class MurmurHash3_64{constructor(e){this.h1=e?4294967295&e:On;this.h2=e?4294967295&e:On}update(e){let t,a;if("string"==typeof e){t=new Uint8Array(2*e.length);a=0;for(let r=0,i=e.length;r>>8;t[a++]=255&i}}}else{if(!ArrayBuffer.isView(e))throw new Error("Invalid data format, must be a string or TypedArray.");t=e.slice();a=t.byteLength}const r=a>>2,i=a-4*r,n=new Uint32Array(t.buffer,0,r);let s=0,o=0,c=this.h1,l=this.h2;const h=3432918353,u=461845907,d=11601,f=13715;for(let e=0;e.5*y.width){appendEOL();return!0}resetLastChars();flushTextContentItem();return!0}if(Math.abs(t)>y.width){appendEOL();return!0}e<=s*y.notASpace&&resetLastChars();if(e<=s*y.trackingSpaceMin)if(shouldAddWhitepsace()){resetLastChars();flushTextContentItem();pushWhitespace({height:Math.abs(e)})}else y.height+=e;else if(!addFakeSpaces(e,y.prevTransform,s))if(0===y.str.length){resetLastChars();pushWhitespace({height:Math.abs(e)})}else y.height+=e;Math.abs(t)>.25*y.width&&flushTextContentItem();return!0}const o=(a-i)/y.textAdvanceScale,l=r-n,h=Math.sign(y.width);if(oEvaluatorPreprocessor.MAX_INVALID_PATH_OPS)throw new FormatError(`Invalid ${e}`);warn(`Skipping ${e}`);null!==t&&(t.length=0);continue}}this.preprocessCommand(n,t);e.fn=n;e.args=t;return!0}if(a===wa)return!1;if(null!==a){null===t&&(t=[]);t.push(a);if(t.length>33)throw new FormatError("Too many arguments")}}}preprocessCommand(e,t){switch(0|e){case Be:this.stateManager.save();break;case Re:this.stateManager.restore();break;case Ne:this.stateManager.transform(t)}}}class DefaultAppearanceEvaluator extends EvaluatorPreprocessor{constructor(e){super(new StringStream(e))}parse(){const e={fn:0,args:[]},t={fontSize:0,fontName:"",fontColor:new Uint8ClampedArray(3)};try{for(;;){e.args.length=0;if(!this.read(e))break;if(0!==this.savedStatesDepth)continue;const{fn:a,args:r}=e;switch(0|a){case nt:const[e,a]=r;e instanceof Name&&(t.fontName=e.name);"number"==typeof a&&a>0&&(t.fontSize=a);break;case It:ColorSpaceUtils.rgb.getRgbItem(r,0,t.fontColor,0);break;case vt:ColorSpaceUtils.gray.getRgbItem(r,0,t.fontColor,0);break;case Ot:ColorSpaceUtils.cmyk.getRgbItem(r,0,t.fontColor,0)}}}catch(e){warn(`parseDefaultAppearance - ignoring errors: "${e}".`)}return t}}function parseDefaultAppearance(e){return new DefaultAppearanceEvaluator(e).parse()}class AppearanceStreamEvaluator extends EvaluatorPreprocessor{constructor(e,t,a,r){super(e);this.stream=e;this.evaluatorOptions=t;this.xref=a;this.globalColorSpaceCache=r;this.resources=e.dict?.get("Resources")}parse(){const e={fn:0,args:[]};let t={scaleFactor:1,fontSize:0,fontName:"",fontColor:new Uint8ClampedArray(3),fillColorSpace:ColorSpaceUtils.gray},a=!1;const r=[];try{for(;;){e.args.length=0;if(a||!this.read(e))break;const{fn:i,args:n}=e;switch(0|i){case Be:r.push({scaleFactor:t.scaleFactor,fontSize:t.fontSize,fontName:t.fontName,fontColor:t.fontColor.slice(),fillColorSpace:t.fillColorSpace});break;case Re:t=r.pop()||t;break;case ht:t.scaleFactor*=Math.hypot(n[0],n[1]);break;case nt:const[e,i]=n;e instanceof Name&&(t.fontName=e.name);"number"==typeof i&&i>0&&(t.fontSize=i*t.scaleFactor);break;case wt:t.fillColorSpace=ColorSpaceUtils.parse({cs:n[0],xref:this.xref,resources:this.resources,pdfFunctionFactory:this._pdfFunctionFactory,globalColorSpaceCache:this.globalColorSpaceCache,localColorSpaceCache:this._localColorSpaceCache});break;case kt:t.fillColorSpace.getRgbItem(n,0,t.fontColor,0);break;case It:ColorSpaceUtils.rgb.getRgbItem(n,0,t.fontColor,0);break;case vt:ColorSpaceUtils.gray.getRgbItem(n,0,t.fontColor,0);break;case Ot:ColorSpaceUtils.cmyk.getRgbItem(n,0,t.fontColor,0);break;case dt:case ft:case gt:case pt:a=!0}}}catch(e){warn(`parseAppearanceStream - ignoring errors: "${e}".`)}this.stream.reset();delete t.scaleFactor;delete t.fillColorSpace;return t}get _localColorSpaceCache(){return shadow(this,"_localColorSpaceCache",new LocalColorSpaceCache)}get _pdfFunctionFactory(){return shadow(this,"_pdfFunctionFactory",new PDFFunctionFactory({xref:this.xref,isEvalSupported:this.evaluatorOptions.isEvalSupported}))}}function getPdfColor(e,t){if(e[0]===e[1]&&e[1]===e[2]){return`${numberToString(e[0]/255)} ${t?"g":"G"}`}return Array.from(e,(e=>numberToString(e/255))).join(" ")+" "+(t?"rg":"RG")}class FakeUnicodeFont{constructor(e,t){this.xref=e;this.widths=null;this.firstChar=1/0;this.lastChar=-1/0;this.fontFamily=t;const a=new OffscreenCanvas(1,1);this.ctxMeasure=a.getContext("2d",{willReadFrequently:!0});FakeUnicodeFont._fontNameId||(FakeUnicodeFont._fontNameId=1);this.fontName=Name.get(`InvalidPDFjsFont_${t}_${FakeUnicodeFont._fontNameId++}`)}get fontDescriptorRef(){if(!FakeUnicodeFont._fontDescriptorRef){const e=new Dict(this.xref);e.set("Type",Name.get("FontDescriptor"));e.set("FontName",this.fontName);e.set("FontFamily","MyriadPro Regular");e.set("FontBBox",[0,0,0,0]);e.set("FontStretch",Name.get("Normal"));e.set("FontWeight",400);e.set("ItalicAngle",0);FakeUnicodeFont._fontDescriptorRef=this.xref.getNewPersistentRef(e)}return FakeUnicodeFont._fontDescriptorRef}get descendantFontRef(){const e=new Dict(this.xref);e.set("BaseFont",this.fontName);e.set("Type",Name.get("Font"));e.set("Subtype",Name.get("CIDFontType0"));e.set("CIDToGIDMap",Name.get("Identity"));e.set("FirstChar",this.firstChar);e.set("LastChar",this.lastChar);e.set("FontDescriptor",this.fontDescriptorRef);e.set("DW",1e3);const t=[],a=[...this.widths.entries()].sort();let r=null,i=null;for(const[e,n]of a)if(r)if(e===r+i.length)i.push(n);else{t.push(r,i);r=e;i=[n]}else{r=e;i=[n]}r&&t.push(r,i);e.set("W",t);const n=new Dict(this.xref);n.set("Ordering","Identity");n.set("Registry","Adobe");n.set("Supplement",0);e.set("CIDSystemInfo",n);return this.xref.getNewPersistentRef(e)}get baseFontRef(){const e=new Dict(this.xref);e.set("BaseFont",this.fontName);e.set("Type",Name.get("Font"));e.set("Subtype",Name.get("Type0"));e.set("Encoding",Name.get("Identity-H"));e.set("DescendantFonts",[this.descendantFontRef]);e.set("ToUnicode",Name.get("Identity-H"));return this.xref.getNewPersistentRef(e)}get resources(){const e=new Dict(this.xref),t=new Dict(this.xref);t.set(this.fontName.name,this.baseFontRef);e.set("Font",t);return e}_createContext(){this.widths=new Map;this.ctxMeasure.font=`1000px ${this.fontFamily}`;return this.ctxMeasure}createFontResources(e){const t=this._createContext();for(const a of e.split(/\r\n?|\n/))for(const e of a.split("")){const a=e.charCodeAt(0);if(this.widths.has(a))continue;const r=t.measureText(e),i=Math.ceil(r.width);this.widths.set(a,i);this.firstChar=Math.min(a,this.firstChar);this.lastChar=Math.max(a,this.lastChar)}return this.resources}static getFirstPositionInfo(e,t,i){const[n,s,o,c]=e;let l=o-n,h=c-s;t%180!=0&&([l,h]=[h,l]);const u=a*i;return{coords:[0,h+r*i-u],bbox:[0,0,l,h],matrix:0!==t?getRotationMatrix(t,h,u):void 0}}createAppearance(e,t,i,n,s,o){const c=this._createContext(),l=[];let h=-1/0;for(const t of e.split(/\r\n?|\n/)){l.push(t);const e=c.measureText(t).width;h=Math.max(h,e);for(const e of codePointIter(t)){const t=String.fromCodePoint(e);let a=this.widths.get(e);if(void 0===a){const r=c.measureText(t);a=Math.ceil(r.width);this.widths.set(e,a);this.firstChar=Math.min(e,this.firstChar);this.lastChar=Math.max(e,this.lastChar)}}}h*=n/1e3;const[u,d,f,g]=t;let p=f-u,m=g-d;i%180!=0&&([p,m]=[m,p]);let b=1;h>p&&(b=p/h);let y=1;const w=a*n,x=r*n,S=w*l.length;S>m&&(y=m/S);const k=n*Math.min(b,y),C=["q",`0 0 ${numberToString(p)} ${numberToString(m)} re W n`,"BT",`1 0 0 1 0 ${numberToString(m+x)} Tm 0 Tc ${getPdfColor(s,!0)}`,`/${this.fontName.name} ${numberToString(k)} Tf`],{resources:v}=this;if(1!==(o="number"==typeof o&&o>=0&&o<=1?o:1)){C.push("/R0 gs");const e=new Dict(this.xref),t=new Dict(this.xref);t.set("ca",o);t.set("CA",o);t.set("Type",Name.get("ExtGState"));e.set("R0",t);v.set("ExtGState",e)}const F=numberToString(w);for(const e of l)C.push(`0 -${F} Td <${stringToUTF16HexString(e)}> Tj`);C.push("ET","Q");const T=C.join("\n"),O=new Dict(this.xref);O.set("Subtype",Name.get("Form"));O.set("Type",Name.get("XObject"));O.set("BBox",[0,0,p,m]);O.set("Length",T.length);O.set("Resources",v);if(i){const e=getRotationMatrix(i,p,m);O.set("Matrix",e)}const M=new StringStream(T);M.dict=O;return M}}const Pn=["m/d","m/d/yy","mm/dd/yy","mm/yy","d-mmm","d-mmm-yy","dd-mmm-yy","yy-mm-dd","mmm-yy","mmmm-yy","mmm d, yyyy","mmmm d, yyyy","m/d/yy h:MM tt","m/d/yy HH:MM"],Ln=["HH:MM","h:MM tt","HH:MM:ss","h:MM:ss tt"];class NameOrNumberTree{constructor(e,t,a){this.root=e;this.xref=t;this._type=a}getAll(){const e=new Map;if(!this.root)return e;const t=this.xref,a=new RefSet;a.put(this.root);const r=[this.root];for(;r.length>0;){const i=t.fetchIfRef(r.shift());if(!(i instanceof Dict))continue;if(i.has("Kids")){const e=i.get("Kids");if(!Array.isArray(e))continue;for(const t of e){if(a.has(t))throw new FormatError(`Duplicate entry in "${this._type}" tree.`);r.push(t);a.put(t)}continue}const n=i.get(this._type);if(Array.isArray(n))for(let a=0,r=n.length;a