"""Soft-maintain raw user-thought entries into mdbase.""" import re import sys from collections import defaultdict from datetime import datetime from pathlib import Path from common import find_ustht, read_define_ini, write_define_ini, is_processed, validate_dim_name HELP = """Usage: python sortin.py [--dry] [--help] Soft maintenance: parse unprocessed #raw/*.md files, append entries to matching mdbase dimensions, mark raw files as processed, and update LAST_SORTIN. Options: --dry Preview changes without writing --help Show this help text """ def parse_raw_file(filepath: Path): """Parse raw entries from one file.""" entries = [] date = filepath.stem.split("-", 3) if len(date) >= 3: date = "-".join(date[:3]) else: date = datetime.now().strftime("%Y-%m-%d") for line in filepath.read_text(encoding="utf-8").splitlines(): line = line.strip() match = re.match(r"^- \[(\d{2}:\d{2})\] (.*)$", line) if not match: continue time, content = match.groups() dim = "general" text = content if " | suggested-dim:" in content: text, dim = content.rsplit(" | suggested-dim:", 1) dim = dim.strip() if not validate_dim_name(dim): dim = "general" entries.append({"time": time, "text": text.strip(), "dimension": dim, "date": date}) return entries def dim_path(mdbase: Path, dim: str) -> Path: """Return the target file path for a dimension.""" if dim == "backlog": return mdbase / "backlog.md" return mdbase / "details" / f"{dim}.md" def count_entries(path: Path) -> int: if not path.exists(): return 0 return sum(1 for line in path.read_text(encoding="utf-8").splitlines() if line.strip().startswith("- ")) def append_entries(path: Path, entries): """Append entries grouped by date to one dimension file.""" by_date = defaultdict(list) for entry in entries: by_date[entry["date"]].append(entry) path.parent.mkdir(parents=True, exist_ok=True) if not path.exists(): title = path.stem.replace("-", " ").title() path.write_text(f"# {title}\n\n> Project memory for `{path.stem}`.\n\n", encoding="utf-8") content = path.read_text(encoding="utf-8").rstrip() for date, date_entries in sorted(by_date.items()): lines = [f"- {entry['text']}" for entry in date_entries] block = "\n".join(lines) heading = f"## {date}" if heading in content: content_lines = content.splitlines() heading_idx = next(i for i, line in enumerate(content_lines) if line.strip() == heading) insert_idx = len(content_lines) for i in range(heading_idx + 1, len(content_lines)): if content_lines[i].startswith("## "): insert_idx = i break before = content_lines[:insert_idx] after = content_lines[insert_idx:] if before and before[-1].strip(): before.append("") before.extend(lines) if after: before.append("") before.extend(after) content = "\n".join(before).rstrip() else: content = f"{content}\n\n{heading}\n\n{block}".rstrip() path.write_text(content + "\n", encoding="utf-8") def mark_processed(filepath: Path): """Insert the processed marker at the top of a raw file.""" content = filepath.read_text(encoding="utf-8") if content.split("\n", 1)[0].strip() != "": filepath.write_text("\n" + content, encoding="utf-8") def update_index(mdbase: Path): """Rebuild mdbase/README.ai.md with dimension counts.""" now = datetime.now().strftime("%Y-%m-%d %H:%M") details = mdbase / "details" dims = [] if details.exists(): dims = sorted(p.relative_to(details).with_suffix("").as_posix() for p in details.rglob("*.md")) rows = ["| File | Dimension | Entries |", "|------|-----------|---------|"] backlog = mdbase / "backlog.md" if backlog.exists(): rows.append(f"| [backlog.md](backlog.md) | backlog | {count_entries(backlog)} |") for dim in dims: path = details / f"{dim}.md" rows.append(f"| [details/{dim}.md](details/{dim}.md) | {dim} | {count_entries(path)} |") content = "\n".join([ "# user-thoughts mdbase Index", "", "This directory stores user-provided project decisions, constraints, preferences, and plans.", "", f"Last updated: {now}", "", "## Maintenance Rules", "", "- Preserve user wording and constraints.", "- Append entries by date under `## yyyy-mm-dd` headings.", "- Prefer existing dimensions before creating new ones.", "- Mark deprecated content instead of silently deleting history.", "", "## Document Index", "", *rows, "", ]) (mdbase / "README.ai.md").write_text(content, encoding="utf-8") def main(): if "--help" in sys.argv or "-h" in sys.argv: print(HELP) sys.exit(0) dry = "--dry" in sys.argv ustht = find_ustht() if ustht is None: print("Error: .ustht/ was not found. Run /ustht init first.") sys.exit(1) cfg = read_define_ini(ustht) if cfg.get("SKILL_STATUS") == "off": print("SKILL is off; write ignored. Run /ustht skill on to enable it.") sys.exit(0) raw_dir = ustht / "raw" if not raw_dir.exists(): print("No unprocessed records.") return raw_files = [f for f in sorted(raw_dir.glob("*.md")) if not is_processed(f)] if not raw_files: print("No unprocessed records. All raw files are marked processed.") return all_entries = [] entries_by_file = {} for f in raw_files: entries = parse_raw_file(f) entries_by_file[f] = entries all_entries.extend(entries) if not all_entries: print("No valid entries found in raw files.") return grouped = defaultdict(list) for entry in all_entries: grouped[entry["dimension"]].append(entry) print("Preview mode:" if dry else f"Soft maintenance complete. Processed {len(all_entries)} thoughts:") mdbase = ustht / "mdbase" for dim, entries in sorted(grouped.items()): target = dim_path(mdbase, dim) label = f"{dim}.md" if target.exists() else f"{dim}.md [new dimension]" sample = entries[0]["text"][:60] print(f" -> {label}: +{len(entries)} ({sample})") if dry: print(f" {len(all_entries)} total entries; no files were changed.") return for dim, entries in grouped.items(): append_entries(dim_path(mdbase, dim), entries) for f in raw_files: if entries_by_file.get(f): mark_processed(f) now = datetime.now().strftime("%Y-%m-%d %H:%M") cfg["LAST_SORTIN"] = now write_define_ini(ustht, cfg) update_index(mdbase) print(f" LAST_SORTIN updated to {now}") if __name__ == "__main__": main()