from __future__ import annotations from collections import Counter from pathlib import Path from pptx import Presentation from pptx.enum.shapes import MSO_SHAPE_TYPE def main() -> None: base_dir = Path(__file__).resolve().parent src = base_dir / "resources" / "inputs" / "ai_work_source.pptx" if not src.exists(): raise FileNotFoundError(src) prs = Presentation(str(src)) print(f"file={src} slides={len(prs.slides)}") for idx, slide in enumerate(prs.slides, start=1): types = Counter() text_shapes = 0 picture_shapes = 0 tables = 0 for sh in slide.shapes: types[sh.shape_type] += 1 if sh.shape_type == MSO_SHAPE_TYPE.PICTURE: picture_shapes += 1 if getattr(sh, "has_table", False): tables += 1 if getattr(sh, "has_text_frame", False) and sh.text_frame.text.strip(): text_shapes += 1 print( f"Slide {idx:02d}: shapes={len(slide.shapes)} " f"textShapes={text_shapes} pictures={picture_shapes} tables={tables} " f"typeCounts={dict(types)}" ) # Print a short preview of actual text content (if any) previews: list[str] = [] for sh in slide.shapes: if getattr(sh, "has_text_frame", False): t = sh.text_frame.text.strip() if t: previews.append(t.replace("\n", " / ")) if previews: for t in previews[:8]: print(f" - {t[:160]}") if __name__ == "__main__": main()