ai_platform/lib/parse-checklist-from-minutes.js

/**
 * 회의록 generated_minutes(Markdown)에서 체크리스트·액션 섹션 추출
 * LLM 출력은 ##/### 혼용, 번호 접두, 굵게 표기 등이 섞이므로 규칙을 넓게 둔다.
 */

const CHECKLIST_HEADINGS = [
  /^##\s+후속\s*확인\s*체크리스트\s*$/i,
  /^##\s+체크리스트\s*\([^)]*\)\s*$/i,
  /^##\s+체크리스트\s*$/i,
  /^##\s+후속\s*확인\s*$/i,
];

const ACTION_HEADINGS = [
  /^##\s+Action\s+Items\s*$/i,
  /^##\s+Action\s+Item\s*$/i,
  /^##\s+액션\s*아이템\s*$/i,
  /^##\s+액션\s*$/i,
];

function stripMd(s) {
  if (!s) return "";
  return s
    .replace(/\*\*([^*]+)\*\*/g, "$1")
    .replace(/__([^_]+)__/g, "$1")
    .trim();
}

/**
 * 제목 줄에서 비교용 텍스트 (굵게·번호 접두 제거)
 * @param {string} line
 */
function normalizeHeadingText(line) {
  return line
    .trim()
    .replace(/^#{1,6}\s+/, "")
    .replace(/^\d+[.)]\s*/, "")
    .replace(/\*\*/g, "")
    .replace(/\s+/g, " ")
    .trim()
    .toLowerCase();
}

/**
 * @param {string} trimmed — 한 줄 (마크다운 제목 가능)
 */
function isChecklistHeadingLine(trimmed) {
  const t = normalizeHeadingText(trimmed);
  if (!t) return false;
  if (/액션\s*아이템/.test(t) && /체크리스트/.test(t)) return true;
  if (/^액션\s*아이템\s*$/.test(t) || /^action\s+items\s*$/.test(t)) return false;
  if (t.includes("체크리스트")) return true;
  if (t.includes("후속") && (t.includes("확인") || t.includes("체크"))) return true;
  if (t.includes("follow") && t.includes("check")) return true;
  if (t.includes("follow-up") || t.includes("follow up")) return true;
  return false;
}

/**
 * @param {string} trimmed
 */
function isActionHeadingLine(trimmed) {
  const t = normalizeHeadingText(trimmed);
  if (!t) return false;
  if (/^액션\s*아이템\s*$/.test(t) || /^action\s+items\s*$/i.test(t) || /^action\s+item\s*$/i.test(t)) return true;
  if (t.includes("액션") && (t.includes("아이템") || t.includes("항목"))) return true;
  if (t.includes("action") && (t.includes("item") || t.includes("items"))) return true;
  return false;
}

/**
 * @param {string} text
 * @param {(trimmedLine: string) => boolean} predicate
 * @returns {string|null}
 */
function extractSectionByHeadingPredicate(text, predicate) {
  const lines = text.split(/\r?\n/);
  for (let i = 0; i < lines.length; i++) {
    const trimmed = lines[i].trim();
    const hm = trimmed.match(/^(#{1,6})(\s+.+)$/);
    if (!hm) continue;
    const level = hm[1].length;
    if (!predicate(trimmed)) continue;
    const body = [];
    for (let j = i + 1; j < lines.length; j++) {
      const tj = lines[j].trim();
      const nextHm = tj.match(/^(#{1,6})\s+/);
      if (nextHm) {
        const nextLevel = nextHm[1].length;
        if (nextLevel <= level) break;
      }
      body.push(lines[j]);
    }
    const joined = body.join("\n").trim();
    if (joined.length) return joined;
  }
  return null;
}

/**
 * 다음 ## 제목 전까지 본문 추출 (레거시: ## 만 구분)
 * @param {string} text
 * @param {RegExp[]} headingMatchers
 * @returns {string|null}
 */
function extractSectionAfterHeading(text, headingMatchers) {
  const lines = text.split(/\r?\n/);
  for (let i = 0; i < lines.length; i++) {
    const trimmed = lines[i].trim();
    for (const re of headingMatchers) {
      if (re.test(trimmed)) {
        const body = [];
        for (let j = i + 1; j < lines.length; j++) {
          if (/^##\s+/.test(lines[j].trim())) break;
          body.push(lines[j]);
        }
        const joined = body.join("\n").trim();
        return joined.length ? joined : null;
      }
    }
  }
  return null;
}

/**
 * @typedef {{ title: string, detail: string, assignee: string|null, due_note: string|null, completed: boolean }} ParsedItem
 */

/**
 * @param {string} body
 * @returns {ParsedItem[]}
 */
function parseBulletItems(body) {
  if (!body || !body.trim()) return [];
  const lines = body.split(/\r?\n/);
  /** @type {ParsedItem[]} */
  const items = [];
  /** @type {ParsedItem|null} */
  let cur = null;
  const flush = () => {
    if (cur) {
      items.push(cur);
      cur = null;
    }
  };
  for (const raw of lines) {
    const t = raw.trim();
    if (!t) continue;
    let m = t.match(/^\s*[-*•]\s+\[([ xX✓])\]\s*(.+)$/);
    if (m) {
      flush();
      items.push({
        title: stripMd(m[2].trim()),
        detail: "",
        assignee: null,
        due_note: null,
        completed: /[xX✓]/.test(m[1]),
      });
      continue;
    }
    m = t.match(/^\s*\[\s*([ xX✓])\s*\]\s+(.+)$/);
    if (m) {
      flush();
      items.push({
        title: stripMd(m[2].trim()),
        detail: "",
        assignee: null,
        due_note: null,
        completed: /[xX✓]/.test(m[1]),
      });
      continue;
    }
    m = t.match(/^\s*[☐☑✓✔]\s*(.+)$/);
    if (m) {
      flush();
      const done = /^[☑✓✔]/.test(t.trim());
      items.push({
        title: stripMd(m[1].trim()),
        detail: "",
        assignee: null,
        due_note: null,
        completed: done,
      });
      continue;
    }
    m = t.match(/^\s*[-*•]\s+(.+)$/);
    if (m) {
      flush();
      cur = { title: stripMd(m[1].trim()), detail: "", assignee: null, due_note: null, completed: false };
      continue;
    }
    m = t.match(/^\s*\d+\.\s+(.+)$/);
    if (m) {
      flush();
      cur = { title: stripMd(m[1].trim()), detail: "", assignee: null, due_note: null, completed: false };
      continue;
    }
    if (cur) {
      cur.detail += (cur.detail ? "\n" : "") + t;
    } else if (items.length) {
      const last = items[items.length - 1];
      last.detail += (last.detail ? "\n" : "") + t;
    }
  }
  flush();
  return items.filter((x) => x.title.length > 0);
}

/**
 * 액션 아이템 번호 목록 블록 (담당/기한/할 일)
 * @param {string} body
 * @returns {ParsedItem[]}
 */
function refineActionLines(detailText) {
  let assignee = null;
  let due_note = null;
  const rest = [];
  for (const line of (detailText || "").split(/\r?\n/)) {
    const r = line.trim();
    if (!r) continue;
    const t = r.replace(/^\*\s+/, "").trim();
    if (/담당\s*:/i.test(t)) {
      const m = t.match(/담당\s*:\s*(.+)$/i);
      if (m) assignee = stripMd(m[1].replace(/\*\*/g, "").replace(/^\*+|\*+$/g, "").trim());
      continue;
    }
    if (/기한\s*:/i.test(t)) {
      const m = t.match(/기한\s*:\s*(.+)$/i);
      if (m) due_note = stripMd(m[1].replace(/\*\*/g, "").replace(/^\*+|\*+$/g, "").trim());
      continue;
    }
    if (/할\s*일\s*:/i.test(t)) {
      const m = t.match(/할\s*일\s*:\s*(.+)$/i);
      if (m) rest.push(stripMd(m[1]));
      continue;
    }
    rest.push(r);
  }
  return { assignee, due_note, detail: rest.join("\n").trim() };
}

function parseNumberedActionBlocks(body) {
  if (!body || !body.trim()) return [];
  const lines = body.split(/\r?\n/);
  /** @type {ParsedItem[]} */
  const out = [];
  let i = 0;
  while (i < lines.length) {
    const line = lines[i].trim();
    /** `1. 제목` 또는 `1) 제목` */
    const nm = line.match(/^(\d+)[.)]\s+(.+)$/);
    if (nm) {
      const title = stripMd(nm[2].trim());
      const rest = [];
      i++;
      while (i < lines.length) {
        const lt = lines[i].trim();
        if (/^\d+[.)]\s+/.test(lt)) break;
        if (lt) rest.push(lines[i]);
        i++;
      }
      const rawDetail = rest.join("\n").trim();
      const refined = refineActionLines(rawDetail);
      out.push({
        title,
        detail: refined.detail,
        assignee: refined.assignee,
        due_note: refined.due_note,
        completed: false,
      });
      continue;
    }
    i++;
  }
  return out.filter((x) => x.title.length > 0);
}

/**
 * 업무 체크리스트 자동 동기화용: 규칙 기반으로 액션(번호 목록) → 체크리스트 순으로 항목 수집(제목 기준 중복 제거)
 * @param {string} markdown
 * @returns {ParsedItem[]}
 */
function parseAllRuleBasedWorkItems(markdown) {
  const text = (markdown || "").trim();
  if (!text) return [];
  const actions = parseItemsFromMinutes(text, "actions");
  const checklist = parseItemsFromMinutes(text, "checklist");
  const seen = new Set();
  /** @type {ParsedItem[]} */
  const out = [];
  for (const it of [...actions, ...checklist]) {
    const t = (it.title || "").trim().toLowerCase();
    if (!t) continue;
    if (seen.has(t)) continue;
    seen.add(t);
    out.push(it);
  }
  return out;
}

/**
 * 체크리스트 섹션 본문 찾기: 유연 매칭 → 레거시 정규식
 * @param {string} text
 * @returns {string|null}
 */
function extractChecklistSectionBody(text) {
  const flexible = extractSectionByHeadingPredicate(text, isChecklistHeadingLine);
  if (flexible) return flexible;
  return extractSectionAfterHeading(text, CHECKLIST_HEADINGS);
}

/**
 * @param {string} text
 * @returns {string|null}
 */
function extractActionSectionBody(text) {
  const flexible = extractSectionByHeadingPredicate(text, isActionHeadingLine);
  if (flexible) return flexible;
  return extractSectionAfterHeading(text, ACTION_HEADINGS);
}

/**
 * @param {string} generatedMinutes
 * @param {'checklist'|'actions'} mode
 * @returns {ParsedItem[]}
 */
function parseItemsFromMinutes(generatedMinutes, mode = "checklist") {
  const text = (generatedMinutes || "").trim();
  if (!text) return [];
  if (mode === "actions") {
    const section = extractActionSectionBody(text);
    if (!section) return [];
    const numbered = parseNumberedActionBlocks(section);
    if (numbered.length) return numbered;
    return parseBulletItems(section);
  }
  const section = extractChecklistSectionBody(text);
  if (!section) return [];
  return parseBulletItems(section);
}

module.exports = {
  extractSectionAfterHeading,
  extractSectionByHeadingPredicate,
  extractChecklistSectionBody,
  extractActionSectionBody,
  parseBulletItems,
  parseNumberedActionBlocks,
  parseItemsFromMinutes,
  parseAllRuleBasedWorkItems,
  CHECKLIST_HEADINGS,
  ACTION_HEADINGS,
  isChecklistHeadingLine,
  isActionHeadingLine,
};