Files
josh-sync/lib/onboard.sh
Slim B 105216a27e Add onboard and migrate-pr commands (v1.1.0)
New commands for safely onboarding existing subrepos into the monorepo
without losing open PRs:

- josh-sync onboard <target>: interactive, resumable 5-step flow
  (import → wait for merge → reset to new repo)
- josh-sync migrate-pr <target> [PR#...] [--all]: migrate PRs from
  archived repo to new repo via patch application

Also refactors create_pr() to wrap create_pr_number(), eliminating
duplicated curl/jq logic.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 12:41:44 +03:00

426 lines
16 KiB
Bash

#!/usr/bin/env bash
# lib/onboard.sh — Onboard orchestration and PR migration
#
# Provides:
# onboard_flow() — Interactive: import → wait for merge → reset to new repo
# migrate_one_pr() — Migrate a single PR from archived repo to new repo
#
# Onboard state is stored on the josh-sync-state branch at <target>/onboard.json.
# Steps: start → importing → waiting-for-merge → resetting → complete
#
# Requires: lib/core.sh, lib/config.sh, lib/auth.sh, lib/state.sh, lib/sync.sh sourced
# Expects: JOSH_SYNC_TARGET_NAME, BOT_NAME, BOT_EMAIL, SUBREPO_API, SUBREPO_TOKEN, etc.
# ─── Onboard State Helpers ────────────────────────────────────────
# Follow the same pattern as read_state()/write_state() in lib/state.sh.
read_onboard_state() {
local target_name="${1:-$JOSH_SYNC_TARGET_NAME}"
git fetch origin "$STATE_BRANCH" 2>/dev/null || true
git show "origin/${STATE_BRANCH}:${target_name}/onboard.json" 2>/dev/null || echo '{}'
}
write_onboard_state() {
local target_name="${1:-$JOSH_SYNC_TARGET_NAME}"
local state_json="$2"
local key="${target_name}/onboard"
local tmp_dir
tmp_dir=$(mktemp -d)
if git rev-parse "origin/${STATE_BRANCH}" >/dev/null 2>&1; then
git worktree add "$tmp_dir" "origin/${STATE_BRANCH}" 2>/dev/null
else
git worktree add --detach "$tmp_dir" 2>/dev/null
(cd "$tmp_dir" && git checkout --orphan "$STATE_BRANCH" && { git rm -rf . 2>/dev/null || true; })
fi
mkdir -p "$(dirname "${tmp_dir}/${key}.json")"
echo "$state_json" | jq '.' > "${tmp_dir}/${key}.json"
(
cd "$tmp_dir" || exit
git add -A
if ! git diff --cached --quiet 2>/dev/null; then
git -c user.name="$BOT_NAME" -c user.email="$BOT_EMAIL" \
commit -m "onboard: update ${target_name}"
git push origin "HEAD:${STATE_BRANCH}" || log "WARN" "Failed to push onboard state"
fi
)
git worktree remove "$tmp_dir" 2>/dev/null || rm -rf "$tmp_dir"
}
# ─── Derive Archived API URL ─────────────────────────────────────
# Given a URL like "git@host:org/repo-archived.git" or
# "https://host/org/repo-archived.git", derive the Gitea API URL.
_archived_api_from_url() {
local url="$1"
# Strip .git suffix first — avoids non-greedy regex issues in POSIX ERE
url="${url%.git}"
local host repo_path
if echo "$url" | grep -qE '^(ssh://|git@)'; then
# SSH URL
if echo "$url" | grep -q '^ssh://'; then
host=$(echo "$url" | sed -E 's|ssh://[^@]*@([^/]+)/.*|\1|')
repo_path=$(echo "$url" | sed -E 's|ssh://[^@]*@[^/]+/(.+)$|\1|')
else
host=$(echo "$url" | sed -E 's|git@([^:/]+)[:/].*|\1|')
repo_path=$(echo "$url" | sed -E 's|git@[^:/]+[:/](.+)$|\1|')
fi
else
# HTTPS URL
host=$(echo "$url" | sed -E 's|https?://([^/]+)/.*|\1|')
repo_path=$(echo "$url" | sed -E 's|https?://[^/]+/(.+)$|\1|')
fi
echo "https://${host}/api/v1/repos/${repo_path}"
}
# ─── Onboard Flow ────────────────────────────────────────────────
# Interactive orchestrator with checkpoint/resume.
# Usage: onboard_flow <target_json> <restart>
onboard_flow() {
local target_json="$1"
local restart="${2:-false}"
local target_name="$JOSH_SYNC_TARGET_NAME"
# Load existing onboard state (or empty)
local onboard_state
onboard_state=$(read_onboard_state "$target_name")
local current_step
current_step=$(echo "$onboard_state" | jq -r '.step // "start"')
if [ "$restart" = true ]; then
log "INFO" "Restarting onboard from scratch"
current_step="start"
onboard_state='{}'
fi
log "INFO" "Onboard step: ${current_step}"
# ── Step 1: Prerequisites + archived repo info ──
if [ "$current_step" = "start" ]; then
echo "" >&2
echo "=== Onboarding ${target_name} ===" >&2
echo "" >&2
echo "Before proceeding, you should have:" >&2
echo " 1. Renamed the existing subrepo (e.g., storefront → storefront-archived)" >&2
echo " 2. Created a new EMPTY repo at the original URL" >&2
echo "" >&2
# Verify the new (empty) subrepo is reachable (no HEAD ref — works on empty repos)
if git ls-remote "$(subrepo_auth_url)" >/dev/null 2>&1; then
# shellcheck disable=SC2001 # sed is clearer for URL pattern replacement
log "INFO" "New subrepo is reachable at $(echo "$SUBREPO_URL" | sed 's|://[^@]*@|://***@|')"
else
log "WARN" "New subrepo is not reachable — make sure you created the new empty repo"
fi
echo "Enter the archived repo URL (e.g., git@host:org/repo-archived.git):" >&2
local archived_url
read -r archived_url
[ -n "$archived_url" ] || die "Archived URL is required"
# Determine auth type for archived repo (same as current subrepo)
local archived_auth="${SUBREPO_AUTH:-https}"
# Derive API URL
local archived_api
archived_api=$(_archived_api_from_url "$archived_url")
# Verify archived repo is reachable via API
if curl -sf -H "Authorization: token ${SUBREPO_TOKEN}" \
"${archived_api}" >/dev/null 2>&1; then
log "INFO" "Archived repo reachable: ${archived_api}"
else
log "WARN" "Cannot reach archived repo API — check URL and token"
echo "Continue anyway? (y/N):" >&2
local confirm
read -r confirm
[ "$confirm" = "y" ] || [ "$confirm" = "Y" ] || die "Aborted"
fi
# Save state
onboard_state=$(jq -n \
--arg step "importing" \
--arg archived_api "$archived_api" \
--arg archived_url "$archived_url" \
--arg archived_auth "$archived_auth" \
--arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
'{step:$step, archived_api:$archived_api, archived_url:$archived_url,
archived_auth:$archived_auth, import_prs:{}, reset_branches:[],
migrated_prs:[], timestamp:$ts}')
write_onboard_state "$target_name" "$onboard_state"
current_step="importing"
fi
# ── Step 2: Import (reuses initial_import()) ──
if [ "$current_step" = "importing" ]; then
echo "" >&2
log "INFO" "Step 2: Importing subrepo content into monorepo..."
local branches
branches=$(echo "$target_json" | jq -r '.branches | keys[]')
# Load existing import_prs from state (resume support)
local import_prs
import_prs=$(echo "$onboard_state" | jq -r '.import_prs // {}')
for branch in $branches; do
local mapped
mapped=$(echo "$target_json" | jq -r --arg b "$branch" '.branches[$b] // empty')
[ -z "$mapped" ] && continue
# Skip branches that already have an import PR recorded
if echo "$import_prs" | jq -e --arg b "$branch" 'has($b)' >/dev/null 2>&1; then
log "INFO" "Import PR already recorded for ${branch} — skipping"
continue
fi
export SYNC_BRANCH_MONO="$branch"
export SYNC_BRANCH_SUBREPO="$mapped"
log "INFO" "Importing branch: ${branch} (subrepo: ${mapped})"
local result
result=$(initial_import)
log "INFO" "Import result for ${branch}: ${result}"
if [ "$result" = "pr-created" ]; then
# Find the import PR number via API
local prs pr_number
prs=$(list_open_prs "$MONOREPO_API" "$GITEA_TOKEN")
pr_number=$(echo "$prs" | jq -r --arg t "$target_name" --arg b "$branch" \
'[.[] | select(.title | test("\\[Import\\] " + $t + ":")) | select(.base.ref == $b)] | .[0].number // empty')
if [ -n "$pr_number" ]; then
import_prs=$(echo "$import_prs" | jq --arg b "$branch" --arg n "$pr_number" '. + {($b): ($n | tonumber)}')
log "INFO" "Import PR for ${branch}: #${pr_number}"
else
log "WARN" "Could not find import PR number for ${branch} — check monorepo PRs"
fi
fi
# Save progress after each branch (resume support)
onboard_state=$(echo "$onboard_state" | jq --argjson prs "$import_prs" '.import_prs = $prs')
write_onboard_state "$target_name" "$onboard_state"
done
# Update state
onboard_state=$(echo "$onboard_state" | jq \
--arg step "waiting-for-merge" \
--argjson prs "$import_prs" \
'.step = $step | .import_prs = $prs')
write_onboard_state "$target_name" "$onboard_state"
current_step="waiting-for-merge"
fi
# ── Step 3: Wait for merge ──
if [ "$current_step" = "waiting-for-merge" ]; then
echo "" >&2
log "INFO" "Step 3: Waiting for import PR(s) to be merged..."
local import_prs
import_prs=$(echo "$onboard_state" | jq -r '.import_prs')
local pr_count
pr_count=$(echo "$import_prs" | jq 'length')
if [ "$pr_count" -eq 0 ]; then
log "WARN" "No import PRs recorded — skipping merge check"
else
echo "" >&2
echo "Import PRs to merge:" >&2
echo "$import_prs" | jq -r 'to_entries[] | " \(.key): PR #\(.value)"' >&2
echo "" >&2
echo "Merge the import PR(s) on the monorepo, then press Enter..." >&2
read -r
# Verify each PR is merged
local all_merged=true
for branch in $(echo "$import_prs" | jq -r 'keys[]'); do
local pr_number
pr_number=$(echo "$import_prs" | jq -r --arg b "$branch" '.[$b]')
local pr_json merged
pr_json=$(get_pr "$MONOREPO_API" "$GITEA_TOKEN" "$pr_number")
merged=$(echo "$pr_json" | jq -r '.merged // false')
if [ "$merged" = "true" ]; then
log "INFO" "PR #${pr_number} (${branch}): merged"
else
log "ERROR" "PR #${pr_number} (${branch}): NOT merged — merge it first"
all_merged=false
fi
done
if [ "$all_merged" = false ]; then
die "Not all import PRs are merged. Re-run 'josh-sync onboard ${target_name}' after merging."
fi
fi
# Update state
onboard_state=$(echo "$onboard_state" | jq '.step = "resetting"')
write_onboard_state "$target_name" "$onboard_state"
current_step="resetting"
fi
# ── Step 4: Reset (pushes josh-filtered history to new repo) ──
if [ "$current_step" = "resetting" ]; then
echo "" >&2
log "INFO" "Step 4: Pushing josh-filtered history to new subrepo..."
local branches
branches=$(echo "$target_json" | jq -r '.branches | keys[]')
local already_reset
already_reset=$(echo "$onboard_state" | jq -r '.reset_branches // []')
for branch in $branches; do
# Skip branches already reset (resume support)
if echo "$already_reset" | jq -e --arg b "$branch" 'index($b) != null' >/dev/null 2>&1; then
log "INFO" "Branch ${branch} already reset — skipping"
continue
fi
local mapped
mapped=$(echo "$target_json" | jq -r --arg b "$branch" '.branches[$b] // empty')
[ -z "$mapped" ] && continue
export SYNC_BRANCH_MONO="$branch"
export SYNC_BRANCH_SUBREPO="$mapped"
local result
result=$(subrepo_reset)
log "INFO" "Reset result for ${branch}: ${result}"
# Track progress
onboard_state=$(echo "$onboard_state" | jq --arg b "$branch" \
'.reset_branches += [$b]')
write_onboard_state "$target_name" "$onboard_state"
done
# Update state
onboard_state=$(echo "$onboard_state" | jq \
--arg step "complete" \
--arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
'.step = $step | .timestamp = $ts')
write_onboard_state "$target_name" "$onboard_state"
current_step="complete"
fi
# ── Step 5: Done ──
if [ "$current_step" = "complete" ]; then
echo "" >&2
echo "=== Onboarding complete! ===" >&2
echo "" >&2
echo "The new subrepo now has josh-filtered history." >&2
echo "Developers should re-clone or reset their local copies:" >&2
echo " git fetch origin && git reset --hard origin/main" >&2
echo "" >&2
echo "To migrate open PRs from the archived repo:" >&2
echo " josh-sync migrate-pr ${target_name} # interactive picker" >&2
echo " josh-sync migrate-pr ${target_name} --all # migrate all" >&2
echo " josh-sync migrate-pr ${target_name} 5 8 12 # specific PRs" >&2
fi
}
# ─── Migrate One PR ──────────────────────────────────────────────
# Applies a PR's diff from the archived repo to the new subrepo.
# Usage: migrate_one_pr <pr_number>
#
# Expects: JOSH_SYNC_TARGET_NAME, SUBREPO_API, SUBREPO_TOKEN, BOT_NAME, BOT_EMAIL loaded
migrate_one_pr() {
local pr_number="$1"
local target_name="$JOSH_SYNC_TARGET_NAME"
# Read archived repo info from onboard state
local onboard_state archived_api
onboard_state=$(read_onboard_state "$target_name")
archived_api=$(echo "$onboard_state" | jq -r '.archived_api')
if [ -z "$archived_api" ] || [ "$archived_api" = "null" ]; then
die "No archived repo info found. Run 'josh-sync onboard ${target_name}' first."
fi
# Check if this PR was already migrated
local already_migrated
already_migrated=$(echo "$onboard_state" | jq -r \
--argjson num "$pr_number" '.migrated_prs // [] | map(select(.old_number == $num)) | length')
if [ "$already_migrated" -gt 0 ]; then
log "INFO" "PR #${pr_number} already migrated — skipping"
return 0
fi
# Same credentials — the repo was just renamed
local archived_token="$SUBREPO_TOKEN"
# 1. Get PR metadata from archived repo
local pr_json title base head body
pr_json=$(get_pr "$archived_api" "$archived_token" "$pr_number") \
|| die "Failed to fetch PR #${pr_number} from archived repo"
title=$(echo "$pr_json" | jq -r '.title')
base=$(echo "$pr_json" | jq -r '.base.ref')
head=$(echo "$pr_json" | jq -r '.head.ref')
body=$(echo "$pr_json" | jq -r '.body // ""')
log "INFO" "Migrating PR #${pr_number}: \"${title}\" (${base} <- ${head})"
# 2. Get diff from archived repo
local diff
diff=$(get_pr_diff "$archived_api" "$archived_token" "$pr_number")
if [ -z "$diff" ]; then
log "WARN" "Empty diff for PR #${pr_number} — skipping"
return 1
fi
# 3. Clone new subrepo, apply patch
# Save cwd so we can restore it (function runs in caller's shell, not subshell)
local original_dir
original_dir=$(pwd)
local work_dir
work_dir=$(mktemp -d)
# shellcheck disable=SC2064 # Intentional early expansion
trap "cd '$original_dir' 2>/dev/null; rm -rf '$work_dir'" RETURN
git clone "$(subrepo_auth_url)" --branch "$base" --single-branch \
"${work_dir}/subrepo" 2>&1 || die "Failed to clone new subrepo (branch: ${base})"
cd "${work_dir}/subrepo" || exit
git config user.name "$BOT_NAME"
git config user.email "$BOT_EMAIL"
git checkout -B "$head" >&2
if echo "$diff" | git apply --check 2>/dev/null; then
echo "$diff" | git apply
git add -A
git commit -m "${title}
Migrated from archived repo PR #${pr_number}" >&2
git push "$(subrepo_auth_url)" "$head" >&2 \
|| die "Failed to push branch ${head}"
# 4. Create PR on new repo
local new_number
new_number=$(create_pr_number "$SUBREPO_API" "$SUBREPO_TOKEN" \
"$base" "$head" "$title" "$body")
log "INFO" "Migrated PR #${pr_number} -> #${new_number}: \"${title}\""
# 5. Record in onboard state
cd "$original_dir" || true
onboard_state=$(read_onboard_state "$target_name")
onboard_state=$(echo "$onboard_state" | jq \
--argjson old "$pr_number" \
--argjson new_num "${new_number}" \
--arg title "$title" \
'.migrated_prs += [{"old_number":$old, "new_number":$new_num, "title":$title}]')
write_onboard_state "$target_name" "$onboard_state"
else
log "ERROR" "Patch doesn't apply cleanly for PR #${pr_number} — skipping"
log "ERROR" "Manual migration needed: get diff from archived repo and resolve conflicts"
return 1
fi
}