54 lines
1.6 KiB
Python
54 lines
1.6 KiB
Python
from __future__ import annotations
|
|
|
|
from collections import Counter
|
|
from pathlib import Path
|
|
|
|
from pptx import Presentation
|
|
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
|
|
|
|
|
def main() -> None:
|
|
base_dir = Path(__file__).resolve().parent
|
|
src = base_dir / "resources" / "inputs" / "ai_work_source.pptx"
|
|
if not src.exists():
|
|
raise FileNotFoundError(src)
|
|
|
|
prs = Presentation(str(src))
|
|
print(f"file={src} slides={len(prs.slides)}")
|
|
|
|
for idx, slide in enumerate(prs.slides, start=1):
|
|
types = Counter()
|
|
text_shapes = 0
|
|
picture_shapes = 0
|
|
tables = 0
|
|
for sh in slide.shapes:
|
|
types[sh.shape_type] += 1
|
|
if sh.shape_type == MSO_SHAPE_TYPE.PICTURE:
|
|
picture_shapes += 1
|
|
if getattr(sh, "has_table", False):
|
|
tables += 1
|
|
if getattr(sh, "has_text_frame", False) and sh.text_frame.text.strip():
|
|
text_shapes += 1
|
|
|
|
print(
|
|
f"Slide {idx:02d}: shapes={len(slide.shapes)} "
|
|
f"textShapes={text_shapes} pictures={picture_shapes} tables={tables} "
|
|
f"typeCounts={dict(types)}"
|
|
)
|
|
|
|
# Print a short preview of actual text content (if any)
|
|
previews: list[str] = []
|
|
for sh in slide.shapes:
|
|
if getattr(sh, "has_text_frame", False):
|
|
t = sh.text_frame.text.strip()
|
|
if t:
|
|
previews.append(t.replace("\n", " / "))
|
|
if previews:
|
|
for t in previews[:8]:
|
|
print(f" - {t[:160]}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|