From 0edbdae558b9fd0c44289a1493963f4429409ccd Mon Sep 17 00:00:00 2001 From: Slim B Date: Thu, 12 Feb 2026 18:00:08 +0300 Subject: [PATCH] "#" --- .gitignore | 1 + README.md | 5 + docs/config-reference.md | 79 +++++++ docs/guide.md | 482 +++++++++++++++++++++++++++++++++++++++ lib/sync.sh | 17 +- 5 files changed, 582 insertions(+), 2 deletions(-) create mode 100644 docs/config-reference.md create mode 100644 docs/guide.md diff --git a/.gitignore b/.gitignore index 5c9e080..58c99d2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ +.claude/*local* result diff --git a/README.md b/README.md index c3ad370..3b52a36 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,11 @@ Add josh-sync as a flake input, then: Run `josh-sync preflight` to validate your setup. +## Documentation + +- **[Setup Guide](docs/guide.md)** — Step-by-step: prerequisites, importing existing subrepos, CI workflows, and troubleshooting +- **[Configuration Reference](docs/config-reference.md)** — Full `.josh-sync.yml` field documentation + ## CLI ``` diff --git a/docs/config-reference.md b/docs/config-reference.md new file mode 100644 index 0000000..01b3af8 --- /dev/null +++ b/docs/config-reference.md @@ -0,0 +1,79 @@ +# Configuration Reference + +Full reference for `.josh-sync.yml` fields and environment variables. + +## `.josh-sync.yml` Structure + +```yaml +josh: # josh-proxy settings (required) +targets: # sync targets (required, at least 1) +bot: # bot identity for sync commits (required) +``` + +## `josh` Section + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `proxy_url` | string | Yes | Josh-proxy URL, no trailing slash. Must start with `http://` or `https://`. | +| `monorepo_path` | string | Yes | Repository path as josh-proxy sees it (e.g., `org/monorepo`). | + +## `targets[]` Section + +Each target maps a monorepo subfolder to an external subrepo. + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `name` | string | Yes | — | Unique target identifier. Used in CLI commands and state tracking. | +| `subfolder` | string | Yes | — | Monorepo subfolder path (e.g., `services/billing`). | +| `josh_filter` | string | No | `:/` | Josh filter expression. Auto-derived from `subfolder` if omitted. Must start with `:`. | +| `subrepo_url` | string | Yes | — | External subrepo Git URL. Supports HTTPS (`https://...`), SSH (`git@host:path`), and `ssh://` formats. | +| `subrepo_auth` | string | No | `"https"` | Auth method: `"https"` or `"ssh"`. | +| `subrepo_token_var` | string | No | `"SUBREPO_TOKEN"` | Name of the env var holding the HTTPS token for this target. | +| `subrepo_ssh_key_var` | string | No | `"SUBREPO_SSH_KEY"` | Name of the env var holding the SSH private key for this target. | +| `branches` | object | Yes | — | Branch mapping: `mono_branch: subrepo_branch`. Each key-value pair syncs those branches bidirectionally. | +| `forward_only` | string[] | No | `[]` | Branches that only sync mono → subrepo, never reverse. | + +## `bot` Section + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `name` | string | Yes | Git author name for sync commits. | +| `email` | string | Yes | Git author email for sync commits. | +| `trailer` | string | Yes | Git trailer key for loop prevention (e.g., `Josh-Sync-Origin`). | + +## Environment Variables + +### Credentials + +| Variable | Purpose | Default | +|----------|---------|---------| +| `SYNC_BOT_USER` | Bot's Git username | — | +| `SYNC_BOT_TOKEN` | API token for monorepo access and josh-proxy HTTPS auth | — | +| `SUBREPO_TOKEN` | HTTPS token for subrepo access | Falls back to `SYNC_BOT_TOKEN` | +| `SUBREPO_SSH_KEY` | SSH private key content (not a file path) for subrepo access | — | + +### Per-target credential overrides + +Set `subrepo_token_var` or `subrepo_ssh_key_var` in a target's config to read credentials from a different env var: + +```yaml +targets: + - name: "auth" + subrepo_token_var: "AUTH_REPO_TOKEN" # reads from $AUTH_REPO_TOKEN + subrepo_ssh_key_var: "AUTH_SSH_KEY" # reads from $AUTH_SSH_KEY +``` + +**Resolution order:** per-target env var → default env var (`SUBREPO_TOKEN` / `SUBREPO_SSH_KEY`) → `SYNC_BOT_TOKEN` fallback. + +### Runtime + +| Variable | Purpose | Default | +|----------|---------|---------| +| `JOSH_SYNC_TARGET` | Restrict sync to a single target | All targets | +| `JOSH_SYNC_STATE_BRANCH` | Name of the orphan branch for state storage | `josh-sync-state` | +| `JOSH_SYNC_DEBUG` | Enable verbose logging (`1` to enable) | `0` | +| `MONOREPO_API` | Override monorepo API URL | Auto-derived from first target's host | + +## JSON Schema + +The config file can be validated against the JSON Schema at [`schema/config-schema.json`](../schema/config-schema.json). diff --git a/docs/guide.md b/docs/guide.md new file mode 100644 index 0000000..816af1d --- /dev/null +++ b/docs/guide.md @@ -0,0 +1,482 @@ +# Setup Guide + +Step-by-step guide to setting up josh-sync for a new monorepo with existing subrepos. + +## Overview + +josh-sync provides bidirectional sync between a monorepo and N external subrepos via [josh-proxy](https://josh-project.github.io/josh/): + +``` +MONOREPO SUBREPOS +├── services/billing/ ──── forward ────► billing-repo/ +├── services/auth/ (push or cron) auth-repo/ +└── libs/shared/ ◄──── reverse ───── shared-lib-repo/ + (cron → always PR) + via josh-proxy (filtered git views) +``` + +**Key safety properties:** +- Forward sync (mono → subrepo) uses `--force-with-lease` — never overwrites concurrent changes +- Reverse sync (subrepo → mono) always creates a PR — never pushes directly +- Git trailers (`Josh-Sync-Origin:`) prevent infinite sync loops +- State tracked on an orphan branch (`josh-sync-state`) — survives CI runner teardown + +## Prerequisites + +Before you begin, you need: + +### josh-proxy instance + +A running [josh-proxy](https://josh-project.github.io/josh/) that can access your monorepo's Git server. Verify connectivity: + +```bash +git ls-remote https://josh.example.com/org/monorepo.git HEAD +``` + +### Bot account + +A dedicated Git user (e.g., `josh-sync-bot`) with: +- Write access to the monorepo +- Write access to all subrepos +- Ability to create PRs on both monorepo and subrepo platforms + +### Credentials + +| Variable | Purpose | Required | +|----------|---------|----------| +| `SYNC_BOT_USER` | Bot's Git username | Yes | +| `SYNC_BOT_TOKEN` | API token with repo scope (monorepo + josh-proxy auth) | Yes | +| `SUBREPO_SSH_KEY` | SSH private key for subrepo access (if using SSH auth) | If SSH | +| `SUBREPO_TOKEN` | HTTPS token for subrepo access (defaults to `SYNC_BOT_TOKEN`) | No | + +Per-target credential overrides are supported — see [Configuration Reference](config-reference.md). + +### Tool dependencies + +`bash >=4`, `git`, `curl`, `jq`, `yq` ([mikefarah/yq](https://github.com/mikefarah/yq) v4+), `openssh`, `rsync` + +> The Nix flake bundles all dependencies automatically. + +## Step 1: Create the Monorepo + +Create a new repository on your Git server (e.g., `org/monorepo`). Create subdirectories for each subrepo you want to sync: + +```bash +mkdir -p services/billing services/auth libs/shared +``` + +These directories will be populated during the import step. They can be empty or contain `.gitkeep` files for now. + +Verify josh-proxy can see the monorepo: + +```bash +git ls-remote https://josh.example.com/org/monorepo.git HEAD +``` + +## Step 2: Configure `.josh-sync.yml` + +Create `.josh-sync.yml` at the monorepo root. Each target maps a monorepo subfolder to an external subrepo: + +```yaml +josh: + proxy_url: "https://josh.example.com" # josh-proxy URL (no trailing slash) + monorepo_path: "org/monorepo" # repo path as josh sees it + +targets: + - name: "billing" # unique identifier + subfolder: "services/billing" # monorepo subfolder + # josh_filter auto-derived as ":/services/billing" if omitted + subrepo_url: "git@gitea.example.com:ext/billing.git" + subrepo_auth: "ssh" # "https" (default) or "ssh" + branches: + main: main # mono_branch: subrepo_branch + forward_only: [] + + - name: "auth" + subfolder: "services/auth" + subrepo_url: "https://gitea.example.com/ext/auth.git" + subrepo_auth: "https" + subrepo_token_var: "AUTH_REPO_TOKEN" # per-target credential override + branches: + main: main + develop: develop # multiple branches supported + forward_only: [] + + - name: "shared-lib" + subfolder: "libs/shared" + subrepo_url: "https://gitea.example.com/ext/shared-lib.git" + branches: + main: main + forward_only: [main] # one-way: mono → subrepo only + +bot: + name: "josh-sync-bot" + email: "josh-sync-bot@example.com" + trailer: "Josh-Sync-Origin" # git trailer for loop prevention +``` + +For the full field reference, see [Configuration Reference](config-reference.md). + +## Step 3: Set Up Local Dev Environment + +### Option A: Nix + devenv (recommended) + +**`devenv.yaml`** — declare josh-sync as a flake input: + +```yaml +inputs: + nixpkgs: + url: github:cachix/devenv-nixpkgs/rolling + josh-sync: + url: git+https://your-gitea.example.com/org/josh-sync?ref=main + flake: true +``` + +**`devenv.nix`** — import the josh-sync module: + +```nix +{ inputs, ... }: +{ + imports = [ inputs.josh-sync.devenvModules.default ]; + + name = "my-monorepo"; + + # .env contains secrets, not devenv config + dotenv.disableHint = true; +} +``` + +**`.envrc`** — activate devenv automatically: + +```bash +DEVENV_WARN_TIMEOUT=20 +use devenv +``` + +**`.env`** — local credentials (add to `.gitignore`): + +```bash +SYNC_BOT_USER=sync-bot +SYNC_BOT_TOKEN= +SUBREPO_SSH_KEY="-----BEGIN OPENSSH PRIVATE KEY----- +... +-----END OPENSSH PRIVATE KEY-----" +# Per-target overrides: +# AUTH_REPO_TOKEN= +``` + +### Option B: Manual installation + +Install the required tools, then either: + +- Clone the josh-sync repo and add `bin/` to your `PATH` +- Run `make build` to create a single bundled script at `dist/josh-sync` + +## Step 4: Validate with Preflight + +```bash +josh-sync preflight +``` + +This validates: +- Config syntax and required fields +- josh-proxy connectivity (via `git ls-remote` through josh) +- Subrepo connectivity and authentication +- Branch mappings +- CI workflow path coverage (checks if `.gitea/workflows/josh-sync-forward.yml` paths match target subfolders) + +For a new monorepo before import, preflight may warn that subfolders don't exist yet — that's expected. + +## Step 5: Import Existing Subrepos + +This is the critical onboarding step. For each existing subrepo, you run a three-step cycle: **import → merge → reset**. + +> Do this **one target at a time** to keep PRs reviewable. + +### 5a. Import + +```bash +josh-sync import billing +``` + +This: +1. Clones the monorepo directly (not through josh) +2. Clones the subrepo +3. Copies subrepo content into the monorepo subfolder via `rsync` +4. Creates a branch `auto-sync/import-billing-` +5. Pushes it and creates a PR on the monorepo + +Review the import PR — check for leaked credentials, environment-specific config, or files that shouldn't be in the monorepo. + +### 5b. Merge the import PR + +Merge the PR using your Git platform's UI. This lands the subrepo content into the monorepo's main branch. + +> At this point, the monorepo has the content but the histories are disconnected. Sync will **not** work until you complete the reset step. + +### 5c. Reset + +```bash +josh-sync reset billing +``` + +> **You do NOT need to `git pull` locally before running reset.** The reset command clones fresh from josh-proxy — it never uses your local working copy. + +This: +1. Clones the monorepo through josh-proxy with the josh filter (the "filtered view") +2. Force-pushes that filtered view to the subrepo, replacing its history + +This establishes **shared commit ancestry** between josh's filtered view and the subrepo. Without this, josh-proxy can't compute diffs between the two. + +> **Warning:** This is a destructive force-push that replaces the subrepo's history. Back up any important branches or tags in the subrepo beforehand. + +### 5d. Repeat for each target + +``` +For each target: + 1. josh-sync import + 2. Review and merge the import PR on the monorepo + 3. josh-sync reset +``` + +### 5e. Verify + +After all targets are imported and reset: + +```bash +# Check all targets show state +josh-sync status + +# Test forward sync — should return "skip" (trees are identical after reset) +josh-sync sync --forward --target billing + +# Test reverse sync — should return "skip" (no new human commits) +josh-sync sync --reverse --target billing +``` + +## Step 6: Set Up CI Workflows + +### Forward sync (mono → subrepo) + +Create `.gitea/workflows/josh-sync-forward.yml`: + +```yaml +name: "Josh Sync → Subrepo" + +on: + push: + branches: [main] + paths: + # List ALL target subfolders: + - "services/billing/**" + - "services/auth/**" + - "libs/shared/**" + schedule: + - cron: "0 */6 * * *" # every 6 hours as fallback + workflow_dispatch: + inputs: + target: + description: "Target to sync (empty = detect from push or all)" + required: false + default: "" + branch: + description: "Branch to sync (empty = triggered branch or all)" + required: false + default: "" + +concurrency: + group: josh-sync-fwd-${{ github.ref_name }} + cancel-in-progress: false + +jobs: + sync: + runs-on: docker + container: node:20-bookworm + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 2 # needed for target detection + + - name: Install tools + run: | + apt-get update -qq && apt-get install -y -qq jq curl git openssh-client >/dev/null 2>&1 + curl -sL "https://github.com/mikefarah/yq/releases/download/v4.44.6/yq_linux_amd64" \ + -o /usr/local/bin/yq && chmod +x /usr/local/bin/yq + + - name: Detect changed target + if: github.event_name == 'push' + id: detect + run: | + CHANGED=$(git diff --name-only HEAD~1 HEAD 2>/dev/null || echo "") + TARGETS=$(yq -o json '.targets' .josh-sync.yml \ + | jq -r '.[] | "\(.name):\(.subfolder)"' \ + | while IFS=: read -r name prefix; do + echo "$CHANGED" | grep -q "^${prefix}/" && echo "$name" + done | sort -u | paste -sd ',' -) + echo "targets=${TARGETS}" >> "$GITHUB_OUTPUT" + + - uses: https://your-gitea.example.com/org/josh-sync@v1 + with: + direction: forward + target: ${{ github.event.inputs.target || steps.detect.outputs.targets }} + branch: ${{ github.event.inputs.branch || github.ref_name }} + env: + SYNC_BOT_USER: ${{ secrets.SYNC_BOT_USER }} + SYNC_BOT_TOKEN: ${{ secrets.SYNC_BOT_TOKEN }} + SUBREPO_TOKEN: ${{ secrets.SUBREPO_TOKEN || secrets.SYNC_BOT_TOKEN }} + SUBREPO_SSH_KEY: ${{ secrets.SUBREPO_SSH_KEY }} +``` + +### Reverse sync (subrepo → mono) + +Create `.gitea/workflows/josh-sync-reverse.yml`: + +```yaml +name: "Josh Sync ← Subrepo" + +on: + schedule: + - cron: "0 1,7,13,19 * * *" # every 6h, offset from forward + workflow_dispatch: + inputs: + target: + description: "Target to sync (empty = all)" + required: false + default: "" + branch: + description: "Branch to sync (empty = all eligible)" + required: false + default: "" + +concurrency: + group: josh-sync-rev-${{ github.event.inputs.target || 'all' }} + cancel-in-progress: false + +jobs: + sync: + runs-on: docker + container: node:20-bookworm + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + + - name: Install tools + run: | + apt-get update -qq && apt-get install -y -qq jq curl git openssh-client >/dev/null 2>&1 + curl -sL "https://github.com/mikefarah/yq/releases/download/v4.44.6/yq_linux_amd64" \ + -o /usr/local/bin/yq && chmod +x /usr/local/bin/yq + + - uses: https://your-gitea.example.com/org/josh-sync@v1 + with: + direction: reverse + target: ${{ github.event.inputs.target || '' }} + branch: ${{ github.event.inputs.branch || '' }} + env: + SYNC_BOT_USER: ${{ secrets.SYNC_BOT_USER }} + SYNC_BOT_TOKEN: ${{ secrets.SYNC_BOT_TOKEN }} + SUBREPO_TOKEN: ${{ secrets.SUBREPO_TOKEN || secrets.SYNC_BOT_TOKEN }} + SUBREPO_SSH_KEY: ${{ secrets.SUBREPO_SSH_KEY }} +``` + +### Required CI secrets + +| Secret | Purpose | +|--------|---------| +| `SYNC_BOT_USER` | Bot username | +| `SYNC_BOT_TOKEN` | Bot API token (monorepo access + josh-proxy auth) | +| `SUBREPO_SSH_KEY` | SSH private key for subrepo push (if using SSH auth) | +| `SUBREPO_TOKEN` | Optional separate subrepo token (defaults to `SYNC_BOT_TOKEN`) | + +> **GitHub Actions note:** These examples target Gitea Actions. For GitHub Actions, change the `uses:` reference to a GitHub repo (e.g., `org/josh-sync@v1`) and `runs-on:` to a GitHub runner (e.g., `ubuntu-latest`). + +## How Ongoing Sync Works + +Once set up, sync runs automatically: + +### Forward sync (mono → subrepo) + +Triggered by pushes to target subfolders or on a cron schedule: + +1. Clones the monorepo through josh-proxy (filtered view of the subfolder) +2. Fetches the subrepo branch for comparison +3. If trees are identical → skip +4. If subrepo branch doesn't exist → fresh push +5. Merges mono changes on top of subrepo state +6. If clean merge → pushes with `--force-with-lease` (protects against concurrent changes) +7. If lease rejected → retries on next run (subrepo changed during sync) +8. If merge conflict → creates a conflict PR on the subrepo + +### Reverse sync (subrepo → mono) + +Runs on a cron schedule (never triggered by subrepo pushes): + +1. Clones the subrepo +2. Fetches the monorepo's josh-filtered view for comparison +3. Finds new human commits (filters out bot commits by checking for the `Josh-Sync-Origin:` trailer) +4. If no new human commits → skip +5. Pushes through josh-proxy to a staging branch +6. Creates a PR on the monorepo — **never pushes directly** + +### Loop prevention + +Bot commits include a git trailer like `Josh-Sync-Origin: forward/main/2024-02-12T10:30:00Z`. Each sync direction filters out commits with this trailer, preventing changes from bouncing back and forth. The CI action also has a loop guard that skips entirely if the HEAD commit has the trailer. + +### State tracking + +Sync state is stored as JSON files on an orphan branch (`josh-sync-state`), one file per target/branch. This tracks the last-synced commit SHAs and timestamps to avoid re-syncing the same changes. + +## Adding a New Target + +To add a new subrepo after initial setup: + +1. Add the target to `.josh-sync.yml` +2. Update the forward workflow's `paths:` list to include the new subfolder +3. Commit and push +4. Run the import-merge-reset cycle for the new target: + ```bash + josh-sync import new-target + # merge the PR + josh-sync reset new-target + ``` +5. Verify with `josh-sync status` + +## Troubleshooting + +### "Failed to clone through josh-proxy" + +- Check josh-proxy is running and accessible +- Verify `monorepo_path` matches what josh-proxy expects +- Test manually: `git ls-remote https://:@josh.example.com/org/repo.git:/services/app.git` + +### SSH authentication failures + +- `SUBREPO_SSH_KEY` must contain the actual key content, not a file path +- For per-target keys, ensure `subrepo_ssh_key_var` in config matches the env var name +- Check the key has write access to the subrepo + +### "Force-with-lease rejected" + +Normal: the subrepo changed while sync was running. The next sync run will pick it up. If persistent, check for another process pushing to the subrepo simultaneously. + +### "Josh rejected push" (reverse sync) + +Josh-proxy couldn't map the push back to the monorepo. Check josh-proxy logs, verify the josh filter is correct. May indicate a history divergence — consider running `josh-sync reset `. + +### Import PR shows "No changes" + +The subfolder already contains the same content as the subrepo. This is fine — the import is a no-op. + +### Duplicate/looping commits + +Verify `bot.trailer` in config matches what's in commit messages. Check the loop guard in the CI workflow is active. + +### State issues + +```bash +# View current state +josh-sync state show [branch] + +# Reset state (forces next sync to run regardless of SHA comparison) +josh-sync state reset [branch] +``` diff --git a/lib/sync.sh b/lib/sync.sh index 91d2903..5476807 100644 --- a/lib/sync.sh +++ b/lib/sync.sh @@ -170,7 +170,20 @@ reverse_sync() { log "INFO" "New human commits to sync:" echo "$human_commits" >&2 - # 4. Push through josh to a staging branch + # 4. Merge subrepo changes onto the latest josh-filtered monorepo view + # This ensures the staging branch is based on the latest monorepo main, + # not on the common ancestor between subrepo and monorepo histories. + local subrepo_head + subrepo_head=$(git rev-parse HEAD) + + git checkout -B sync-push "mono-filtered/${mono_branch}" >&2 + git merge --no-ff "$subrepo_head" \ + -m "Sync from subrepo $(date -u +%Y-%m-%dT%H:%M:%SZ) + +${BOT_TRAILER}: reverse/${subrepo_branch}/$(date -u +%Y-%m-%dT%H:%M:%SZ)" >&2 \ + || die "Merge conflict during reverse sync — manual intervention needed" + + # 5. Push merged branch through josh to a staging branch local ts ts=$(date +%Y%m%d-%H%M%S) local staging_branch="auto-sync/subrepo-${subrepo_branch}-${ts}" @@ -178,7 +191,7 @@ reverse_sync() { if git push -o "base=${mono_branch}" "$(josh_auth_url)" "HEAD:refs/heads/${staging_branch}"; then log "INFO" "Pushed to staging branch via josh: ${staging_branch}" - # 5. Create PR on monorepo (NEVER direct push) + # 6. Create PR on monorepo (NEVER direct push) local pr_body pr_body="## Subrepo changes\n\nNew commits from subrepo \`${subrepo_branch}\`:\n\n\`\`\`\n${human_commits}\n\`\`\`\n\n**Review checklist:**\n- [ ] Changes scoped to synced subfolder\n- [ ] No leaked credentials or environment-specific config\n- [ ] CI passes"