ax_document/AI로 일하는 방법/inspect_ai_work_source.py

from __future__ import annotations

from collections import Counter
from pathlib import Path

from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE_TYPE


def main() -> None:
    base_dir = Path(__file__).resolve().parent
    src = base_dir / "resources" / "inputs" / "ai_work_source.pptx"
    if not src.exists():
        raise FileNotFoundError(src)

    prs = Presentation(str(src))
    print(f"file={src} slides={len(prs.slides)}")

    for idx, slide in enumerate(prs.slides, start=1):
        types = Counter()
        text_shapes = 0
        picture_shapes = 0
        tables = 0
        for sh in slide.shapes:
            types[sh.shape_type] += 1
            if sh.shape_type == MSO_SHAPE_TYPE.PICTURE:
                picture_shapes += 1
            if getattr(sh, "has_table", False):
                tables += 1
            if getattr(sh, "has_text_frame", False) and sh.text_frame.text.strip():
                text_shapes += 1

        print(
            f"Slide {idx:02d}: shapes={len(slide.shapes)} "
            f"textShapes={text_shapes} pictures={picture_shapes} tables={tables} "
            f"typeCounts={dict(types)}"
        )

        # Print a short preview of actual text content (if any)
        previews: list[str] = []
        for sh in slide.shapes:
            if getattr(sh, "has_text_frame", False):
                t = sh.text_frame.text.strip()
                if t:
                    previews.append(t.replace("\n", " / "))
        if previews:
            for t in previews[:8]:
                print(f"  - {t[:160]}")


if __name__ == "__main__":
    main()