🐛 fix(thirdparty): prune nested project snapshots

This commit is contained in:
csh 2026-04-01 14:25:25 +08:00
parent a2a697e97b
commit da85d4efbb
4 changed files with 45 additions and 35 deletions

View File

@ -25,7 +25,6 @@ for entry in data["sources"]:
entry["sync_mode"], entry["sync_mode"],
entry["source_list"], entry["source_list"],
entry.get("skills_subdir", ""), entry.get("skills_subdir", ""),
",".join(entry.get("exclude_skill_dirs", [])),
entry.get("output_name", entry["id"]), entry.get("output_name", entry["id"]),
entry.get("platform_config", ""), entry.get("platform_config", ""),
entry.get("template_root", ""), entry.get("template_root", ""),
@ -108,23 +107,6 @@ tracked_skill_exists() {
return 1 return 1
} }
is_excluded_skill_dir() {
local name="$1"
local exclude_csv="$2"
[ -n "$exclude_csv" ] || return 1
local IFS=','
read -r -a excluded_names <<< "$exclude_csv"
for excluded_name in "${excluded_names[@]}"; do
if [ "$name" = "$excluded_name" ]; then
return 0
fi
done
return 1
}
cd "$REPO_DIR" cd "$REPO_DIR"
git config user.name "$COMMIT_AUTHOR_NAME" git config user.name "$COMMIT_AUTHOR_NAME"
@ -149,7 +131,7 @@ if ! emit_sources_tsv > "$sources_file"; then
exit 1 exit 1
fi fi
while IFS=$'\x1f' read -r source_id snapshot_dir sync_mode source_list skills_subdir exclude_skill_dirs output_name platform_config template_root data_dir scripts_dir; do while IFS=$'\x1f' read -r source_id snapshot_dir sync_mode source_list skills_subdir output_name platform_config template_root data_dir scripts_dir; do
[ -n "$source_id" ] || continue [ -n "$source_id" ] || continue
if [ -f "$source_list" ]; then if [ -f "$source_list" ]; then
@ -161,7 +143,7 @@ while IFS=$'\x1f' read -r source_id snapshot_dir sync_mode source_list skills_su
done < "$sources_file" done < "$sources_file"
declare -A owners=() declare -A owners=()
while IFS=$'\x1f' read -r source_id snapshot_dir sync_mode source_list skills_subdir exclude_skill_dirs output_name platform_config template_root data_dir scripts_dir; do while IFS=$'\x1f' read -r source_id snapshot_dir sync_mode source_list skills_subdir output_name platform_config template_root data_dir scripts_dir; do
[ -n "$source_id" ] || continue [ -n "$source_id" ] || continue
git archive --format=tar "origin/${THIRDPARTY_BRANCH}" "$snapshot_dir" | tar -xf - -C "$tmp_dir" git archive --format=tar "origin/${THIRDPARTY_BRANCH}" "$snapshot_dir" | tar -xf - -C "$tmp_dir"
@ -179,9 +161,6 @@ while IFS=$'\x1f' read -r source_id snapshot_dir sync_mode source_list skills_su
for dir in "$source_skills_dir"/*; do for dir in "$source_skills_dir"/*; do
[ -d "$dir" ] || continue [ -d "$dir" ] || continue
name="$(basename "$dir")" name="$(basename "$dir")"
if is_excluded_skill_dir "$name" "$exclude_skill_dirs"; then
continue
fi
if [ -n "${owners[$name]:-}" ] && [ "${owners[$name]}" != "$source_id" ]; then if [ -n "${owners[$name]:-}" ] && [ "${owners[$name]}" != "$source_id" ]; then
echo "ERROR: duplicate third-party skill name: $name" >&2 echo "ERROR: duplicate third-party skill name: $name" >&2
exit 1 exit 1

View File

@ -8,7 +8,7 @@
"sync_mode": "copy_skill_dirs", "sync_mode": "copy_skill_dirs",
"source_list": "codex/skills/.sources/superpowers.list", "source_list": "codex/skills/.sources/superpowers.list",
"skills_subdir": "skills", "skills_subdir": "skills",
"exclude_skill_dirs": ["ui-ux-pro-max"] "remove_paths": ["skills/ui-ux-pro-max"]
}, },
{ {
"id": "ui-ux-pro-max", "id": "ui-ux-pro-max",

View File

@ -88,12 +88,38 @@ for entry in data["sources"]:
entry.get("upstream_ref", "main"), entry.get("upstream_ref", "main"),
entry["snapshot_dir"], entry["snapshot_dir"],
entry["sync_mode"], entry["sync_mode"],
"\x1e".join(entry.get("remove_paths", [])),
] ]
) )
) )
PY PY
} }
read_source_metadata_value() {
local key="$1"
local source_file="$2"
if [ ! -f "$source_file" ]; then
return 0
fi
sed -n "s/^- ${key}:[[:space:]]*//p" "$source_file" | head -n 1
}
remove_snapshot_paths() {
local snapshot_dir="$1"
local remove_paths="$2"
[ -n "$remove_paths" ] || return 0
local IFS=$'\x1e'
read -r -a paths <<< "$remove_paths"
for path in "${paths[@]}"; do
[ -n "$path" ] || continue
rm -rf "$snapshot_dir/$path"
done
}
cd "$REPO_DIR" cd "$REPO_DIR"
git config user.name "$COMMIT_AUTHOR_NAME" git config user.name "$COMMIT_AUTHOR_NAME"
@ -124,8 +150,9 @@ if ! emit_sources_tsv > "$sources_file"; then
fi fi
changed=0 changed=0
while IFS=$'\x1f' read -r source_id upstream_repo upstream_ref snapshot_dir sync_mode; do while IFS=$'\x1f' read -r source_id upstream_repo upstream_ref snapshot_dir sync_mode remove_paths; do
[ -n "$source_id" ] || continue [ -n "$source_id" ] || continue
remove_paths_md="${remove_paths//$'\x1e'/,}"
gh_repo="" gh_repo=""
if gh_repo="$(github_owner_repo "$upstream_repo" 2>/dev/null)"; then if gh_repo="$(github_owner_repo "$upstream_repo" 2>/dev/null)"; then
@ -138,12 +165,10 @@ while IFS=$'\x1f' read -r source_id upstream_repo upstream_ref snapshot_dir sync
exit 1 exit 1
fi fi
current_sha="" current_sha="$(read_source_metadata_value "Ref" "$snapshot_dir/SOURCE.md")"
if [ -f "$snapshot_dir/SOURCE.md" ]; then current_remove_paths="$(read_source_metadata_value "Remove-Paths" "$snapshot_dir/SOURCE.md")"
current_sha="$(sed -n 's/^- Ref:[[:space:]]*//p' "$snapshot_dir/SOURCE.md" | head -n 1)"
fi
if [ "$latest_sha" = "$current_sha" ]; then if [ "$latest_sha" = "$current_sha" ] && [ "$remove_paths_md" = "$current_remove_paths" ]; then
echo "Third-party snapshot is up to date for ${source_id}: $latest_sha" echo "Third-party snapshot is up to date for ${source_id}: $latest_sha"
continue continue
fi fi
@ -169,12 +194,15 @@ while IFS=$'\x1f' read -r source_id upstream_repo upstream_ref snapshot_dir sync
git -C "$upstream_dir" archive --format=tar HEAD | tar -xf - -C "$snapshot_dir" git -C "$upstream_dir" archive --format=tar HEAD | tar -xf - -C "$snapshot_dir"
fi fi
remove_snapshot_paths "$snapshot_dir" "$remove_paths"
snapshot_date="$(date -u +%Y-%m-%d)" snapshot_date="$(date -u +%Y-%m-%d)"
cat > "$snapshot_dir/SOURCE.md" <<EOF cat > "$snapshot_dir/SOURCE.md" <<EOF
# Source # Source
- Repo: ${upstream_repo%".git"} - Repo: ${upstream_repo%".git"}
- Ref: $latest_sha - Ref: $latest_sha
- Remove-Paths: $remove_paths_md
- Snapshot: $snapshot_date - Snapshot: $snapshot_date
- Sync-Mode: $sync_mode - Sync-Mode: $sync_mode
- Notes: vendored into playbook branch $TARGET_BRANCH - Notes: vendored into playbook branch $TARGET_BRANCH

View File

@ -33,10 +33,10 @@ class ThirdpartySkillsPipelineTests(unittest.TestCase):
self.assertEqual(ui_skill["sync_mode"], "render_codex_skill") self.assertEqual(ui_skill["sync_mode"], "render_codex_skill")
self.assertEqual(ui_skill["snapshot_dir"], "ui-ux-pro-max") self.assertEqual(ui_skill["snapshot_dir"], "ui-ux-pro-max")
def test_superpowers_manifest_excludes_ui_ux_pro_max_from_copy_mode(self): def test_superpowers_manifest_prunes_non_superpowers_paths(self):
data = load_manifest() data = load_manifest()
superpowers = next(item for item in data["sources"] if item["id"] == "superpowers") superpowers = next(item for item in data["sources"] if item["id"] == "superpowers")
self.assertEqual(superpowers["exclude_skill_dirs"], ["ui-ux-pro-max"]) self.assertEqual(superpowers["remove_paths"], ["skills/ui-ux-pro-max"])
def test_workflow_uses_generic_scripts_and_single_serial_job(self): def test_workflow_uses_generic_scripts_and_single_serial_job(self):
text = WORKFLOW.read_text(encoding="utf-8") text = WORKFLOW.read_text(encoding="utf-8")
@ -90,6 +90,9 @@ class ThirdpartySkillsPipelineTests(unittest.TestCase):
self.assertIn('manifest_copy="$tmp_dir/thirdparty_skills.json"', text) self.assertIn('manifest_copy="$tmp_dir/thirdparty_skills.json"', text)
self.assertIn('cp "$MANIFEST_PATH" "$manifest_copy"', text) self.assertIn('cp "$MANIFEST_PATH" "$manifest_copy"', text)
self.assertIn('MANIFEST_PATH="$manifest_copy"', text) self.assertIn('MANIFEST_PATH="$manifest_copy"', text)
self.assertIn("remove_paths", text)
self.assertIn('remove_snapshot_paths "$snapshot_dir" "$remove_paths"', text)
self.assertIn("- Remove-Paths:", text)
self.assertIn('if ! emit_sources_tsv > "$sources_file"; then', text) self.assertIn('if ! emit_sources_tsv > "$sources_file"; then', text)
self.assertNotIn("done < <(emit_sources_tsv)", text) self.assertNotIn("done < <(emit_sources_tsv)", text)
self.assertLess( self.assertLess(
@ -97,13 +100,13 @@ class ThirdpartySkillsPipelineTests(unittest.TestCase):
text.index('git checkout -B "$TARGET_BRANCH" "origin/$TARGET_BRANCH"'), text.index('git checkout -B "$TARGET_BRANCH" "origin/$TARGET_BRANCH"'),
) )
def test_sync_script_uses_non_whitespace_separator_for_optional_fields(self): def test_sync_script_assumes_thirdparty_snapshot_is_already_clean(self):
text = SYNC_SCRIPT.read_text(encoding="utf-8") text = SYNC_SCRIPT.read_text(encoding="utf-8")
self.assertIn('"\\x1f".join(', text) self.assertIn('"\\x1f".join(', text)
self.assertIn("while IFS=$'\\x1f' read -r", text) self.assertIn("while IFS=$'\\x1f' read -r", text)
self.assertNotIn("while IFS=$'\\t' read -r", text) self.assertNotIn("while IFS=$'\\t' read -r", text)
self.assertIn("exclude_skill_dirs", text) self.assertNotIn("exclude_skill_dirs", text)
self.assertIn('if is_excluded_skill_dir "$name" "$exclude_skill_dirs"; then', text) self.assertNotIn("is_excluded_skill_dir", text)
if __name__ == "__main__": if __name__ == "__main__":