From 424d34a9c012308ba10817fe3ba990e4bcabb046 Mon Sep 17 00:00:00 2001 From: yyh <92089059+lyzno1@users.noreply.github.com> Date: Tue, 31 Mar 2026 18:02:02 +0800 Subject: [PATCH] fix(ci): structure i18n sync payload and PR flow (#34342) --- .github/workflows/translate-i18n-claude.yml | 283 +++++++++++++++++--- .github/workflows/trigger-i18n-sync.yml | 100 ++++++- 2 files changed, 334 insertions(+), 49 deletions(-) diff --git a/.github/workflows/translate-i18n-claude.yml b/.github/workflows/translate-i18n-claude.yml index f3fbfe60e21..33af4f36fdd 100644 --- a/.github/workflows/translate-i18n-claude.yml +++ b/.github/workflows/translate-i18n-claude.yml @@ -67,6 +67,92 @@ jobs: } " web/i18n-config/languages.ts | sed 's/[[:space:]]*$//') + generate_changes_json() { + node <<'NODE' + const { execFileSync } = require('node:child_process') + const fs = require('node:fs') + const path = require('node:path') + + const repoRoot = process.cwd() + const baseSha = process.env.BASE_SHA || '' + const headSha = process.env.HEAD_SHA || '' + const files = (process.env.CHANGED_FILES || '').split(/\s+/).filter(Boolean) + + const englishPath = fileStem => path.join(repoRoot, 'web', 'i18n', 'en-US', `${fileStem}.json`) + + const readCurrentJson = (fileStem) => { + const filePath = englishPath(fileStem) + if (!fs.existsSync(filePath)) + return null + + return JSON.parse(fs.readFileSync(filePath, 'utf8')) + } + + const readBaseJson = (fileStem) => { + if (!baseSha) + return null + + try { + const relativePath = `web/i18n/en-US/${fileStem}.json` + const content = execFileSync('git', ['show', `${baseSha}:${relativePath}`], { encoding: 'utf8' }) + return JSON.parse(content) + } + catch (error) { + return null + } + } + + const compareJson = (beforeValue, afterValue) => JSON.stringify(beforeValue) === JSON.stringify(afterValue) + + const changes = {} + + for (const fileStem of files) { + const currentJson = readCurrentJson(fileStem) + const beforeJson = readBaseJson(fileStem) || {} + const afterJson = currentJson || {} + const added = {} + const updated = {} + const deleted = [] + + for (const [key, value] of Object.entries(afterJson)) { + if (!(key in beforeJson)) { + added[key] = value + continue + } + + if (!compareJson(beforeJson[key], value)) { + updated[key] = { + before: beforeJson[key], + after: value, + } + } + } + + for (const key of Object.keys(beforeJson)) { + if (!(key in afterJson)) + deleted.push(key) + } + + changes[fileStem] = { + fileDeleted: currentJson === null, + added, + updated, + deleted, + } + } + + fs.writeFileSync( + '/tmp/i18n-changes.json', + JSON.stringify({ + baseSha, + headSha, + files, + changes, + }) + ) + NODE + } + if [ "${{ github.event_name }}" = "repository_dispatch" ]; then BASE_SHA="${{ github.event.client_payload.base_sha }}" HEAD_SHA="${{ github.event.client_payload.head_sha }}" @@ -74,12 +160,19 @@ jobs: TARGET_LANGS="$DEFAULT_TARGET_LANGS" SYNC_MODE="${{ github.event.client_payload.sync_mode || 'incremental' }}" - if [ -n "${{ github.event.client_payload.diff_base64 }}" ]; then - printf '%s' '${{ github.event.client_payload.diff_base64 }}' | base64 -d > /tmp/i18n-diff.txt - DIFF_AVAILABLE="true" + if [ -n "${{ github.event.client_payload.changes_base64 }}" ]; then + printf '%s' '${{ github.event.client_payload.changes_base64 }}' | base64 -d > /tmp/i18n-changes.json + CHANGES_AVAILABLE="true" + CHANGES_SOURCE="embedded" + elif [ -n "$BASE_SHA" ] && [ -n "$CHANGED_FILES" ]; then + export BASE_SHA HEAD_SHA CHANGED_FILES + generate_changes_json + CHANGES_AVAILABLE="true" + CHANGES_SOURCE="recomputed" else - : > /tmp/i18n-diff.txt - DIFF_AVAILABLE="false" + printf '%s' '{"baseSha":"","headSha":"","files":[],"changes":{}}' > /tmp/i18n-changes.json + CHANGES_AVAILABLE="false" + CHANGES_SOURCE="unavailable" fi else BASE_SHA="" @@ -106,16 +199,15 @@ jobs: CHANGED_FILES="" fi - if [ "$SYNC_MODE" = "incremental" ] && [ -n "$BASE_SHA" ]; then - git diff "$BASE_SHA" "$HEAD_SHA" -- 'web/i18n/en-US/*.json' > /tmp/i18n-diff.txt 2>/dev/null || : > /tmp/i18n-diff.txt + if [ "$SYNC_MODE" = "incremental" ] && [ -n "$CHANGED_FILES" ]; then + export BASE_SHA HEAD_SHA CHANGED_FILES + generate_changes_json + CHANGES_AVAILABLE="true" + CHANGES_SOURCE="local" else - : > /tmp/i18n-diff.txt - fi - - if [ -s /tmp/i18n-diff.txt ]; then - DIFF_AVAILABLE="true" - else - DIFF_AVAILABLE="false" + printf '%s' '{"baseSha":"","headSha":"","files":[],"changes":{}}' > /tmp/i18n-changes.json + CHANGES_AVAILABLE="false" + CHANGES_SOURCE="unavailable" fi fi @@ -136,7 +228,8 @@ jobs: echo "CHANGED_FILES=$CHANGED_FILES" echo "TARGET_LANGS=$TARGET_LANGS" echo "SYNC_MODE=$SYNC_MODE" - echo "DIFF_AVAILABLE=$DIFF_AVAILABLE" + echo "CHANGES_AVAILABLE=$CHANGES_AVAILABLE" + echo "CHANGES_SOURCE=$CHANGES_SOURCE" echo "FILE_ARGS=$FILE_ARGS" echo "LANG_ARGS=$LANG_ARGS" } >> "$GITHUB_OUTPUT" @@ -155,7 +248,7 @@ jobs: show_full_output: ${{ github.event_name == 'workflow_dispatch' }} prompt: | You are the i18n sync agent for the Dify repository. - Your job is to keep translations synchronized with the English source files under `${{ github.workspace }}/web/i18n/en-US/`, then open a PR with the result. + Your job is to keep translations synchronized with the English source files under `${{ github.workspace }}/web/i18n/en-US/`. Use absolute paths at all times: - Repo root: `${{ github.workspace }}` @@ -170,13 +263,15 @@ jobs: - Head SHA: `${{ steps.context.outputs.HEAD_SHA }}` - Scoped file args: `${{ steps.context.outputs.FILE_ARGS }}` - Scoped language args: `${{ steps.context.outputs.LANG_ARGS }}` - - Full English diff available: `${{ steps.context.outputs.DIFF_AVAILABLE }}` + - Structured change set available: `${{ steps.context.outputs.CHANGES_AVAILABLE }}` + - Structured change set source: `${{ steps.context.outputs.CHANGES_SOURCE }}` + - Structured change set file: `/tmp/i18n-changes.json` Tool rules: - Use Read for repository files. - Use Edit for JSON updates. - - Use Bash only for `git`, `gh`, `pnpm`, and `date`. - - Run Bash commands one by one. Do not combine commands with `&&`, `||`, pipes, or command substitution. + - Use Bash only for `pnpm`. + - Do not use Bash for `git`, `gh`, or branch management. Required execution plan: 1. Resolve target languages. @@ -187,30 +282,25 @@ jobs: - Only process the resolved target languages, never `en-US`. - Do not touch unrelated i18n files. - Do not modify `${{ github.workspace }}/web/i18n/en-US/`. - 3. Detect English changes per file. - - Treat the current English JSON files under `${{ github.workspace }}/web/i18n/en-US/` plus the scoped `i18n:check` result as the primary source of truth. - - Use `/tmp/i18n-diff.txt` only as supporting context to understand what changed between `Base SHA` and `Head SHA`. - - Never rely on diff alone when deciding final keys or values. - - Read the current English JSON file for each file in scope. - - If sync mode is `incremental` and `Base SHA` is not empty, run: - `git -C ${{ github.workspace }} show :web/i18n/en-US/.json` - - If sync mode is `full` or `Base SHA` is empty, skip historical comparison and treat the current English file as the only source of truth for structural sync. - - If the file did not exist at Base SHA, treat all current keys as ADD. - - Compare previous and current English JSON to identify: - - ADD: key only in current - - UPDATE: key exists in both and the English value changed - - DELETE: key only in previous - - If `/tmp/i18n-diff.txt` is available, read it before translating so wording changes are grounded in the full English patch, but resolve any ambiguity by trusting the actual English files and scoped checks. + 3. Resolve source changes. + - If `Structured change set available` is `true`, read `/tmp/i18n-changes.json` and use it as the source of truth for file-level and key-level changes. + - For each file entry: + - `added` contains new English keys that need translations. + - `updated` contains stale keys whose English source changed; re-translate using the `after` value. + - `deleted` contains keys that should be removed from locale files. + - `fileDeleted: true` means the English file no longer exists; remove the matching locale file if present. + - Read the current English JSON file for any file that still exists so wording, placeholders, and surrounding terminology stay accurate. + - If `Structured change set available` is `false`, treat this as a scoped full sync and use the current English files plus scoped checks as the source of truth. 4. Run a scoped pre-check before editing: - `pnpm --dir ${{ github.workspace }}/web run i18n:check ${{ steps.context.outputs.FILE_ARGS }} ${{ steps.context.outputs.LANG_ARGS }}` - Use this command as the source of truth for missing and extra keys inside the current scope. 5. Apply translations. - For every target language and scoped file: + - If `fileDeleted` is `true`, remove the locale file if it exists and skip the rest of that file. - If the locale file does not exist yet, create it with `Write` and then continue with `Edit` as needed. - ADD missing keys. - UPDATE stale translations when the English value changed. - DELETE removed keys. Prefer `pnpm --dir ${{ github.workspace }}/web run i18n:check ${{ steps.context.outputs.FILE_ARGS }} ${{ steps.context.outputs.LANG_ARGS }} --auto-remove` for extra keys so deletions stay in scope. - - For `zh-Hans` and `ja-JP`, if the locale file also changed between Base SHA and Head SHA, preserve manual translations unless they are clearly wrong for the new English value. If in doubt, keep the manual translation. - Preserve placeholders exactly: `{{variable}}`, `${variable}`, HTML tags, component tags, and variable names. - Match the existing terminology and register used by each locale. - Prefer one Edit per file when stable, but prioritize correctness over batching. @@ -218,14 +308,119 @@ jobs: - Run `pnpm --dir ${{ github.workspace }}/web lint:fix --quiet -- ` - Run `pnpm --dir ${{ github.workspace }}/web run i18n:check ${{ steps.context.outputs.FILE_ARGS }} ${{ steps.context.outputs.LANG_ARGS }}` - If verification fails, fix the remaining problems before continuing. - 7. Create a PR only when there are changes in `web/i18n/`. - - Check `git -C ${{ github.workspace }} status --porcelain -- web/i18n/` - - Create branch `chore/i18n-sync-` - - Commit message: `chore(i18n): sync translations with en-US` - - Push the branch and open a PR against `main` - - PR title: `chore(i18n): sync translations with en-US` - - PR body: summarize files, languages, sync mode, and verification commands - 8. If there are no translation changes after verification, do not create a branch, commit, or PR. + 7. Stop after the scoped locale files are updated and verification passes. + - Do not create branches, commits, or pull requests. claude_args: | - --max-turns 80 - --allowedTools "Read,Write,Edit,Bash(git *),Bash(git:*),Bash(gh *),Bash(gh:*),Bash(pnpm *),Bash(pnpm:*),Bash(date *),Bash(date:*),Glob,Grep" + --max-turns 120 + --allowedTools "Read,Write,Edit,Bash(pnpm *),Bash(pnpm:*),Glob,Grep" + + - name: Prepare branch metadata + id: pr_meta + if: steps.context.outputs.CHANGED_FILES != '' + shell: bash + run: | + if [ -z "$(git -C "${{ github.workspace }}" status --porcelain -- web/i18n/)" ]; then + echo "has_changes=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + SCOPE_HASH=$(printf '%s|%s|%s' "${{ steps.context.outputs.CHANGED_FILES }}" "${{ steps.context.outputs.TARGET_LANGS }}" "${{ steps.context.outputs.SYNC_MODE }}" | sha256sum | cut -c1-8) + HEAD_SHORT=$(printf '%s' "${{ steps.context.outputs.HEAD_SHA }}" | cut -c1-12) + BRANCH_NAME="chore/i18n-sync-${HEAD_SHORT}-${SCOPE_HASH}" + + { + echo "has_changes=true" + echo "branch_name=$BRANCH_NAME" + } >> "$GITHUB_OUTPUT" + + - name: Commit translation changes + if: steps.pr_meta.outputs.has_changes == 'true' + shell: bash + run: | + git -C "${{ github.workspace }}" checkout -B "${{ steps.pr_meta.outputs.branch_name }}" + git -C "${{ github.workspace }}" add web/i18n/ + git -C "${{ github.workspace }}" commit -m "chore(i18n): sync translations with en-US" + + - name: Push translation branch + if: steps.pr_meta.outputs.has_changes == 'true' + shell: bash + run: | + if git -C "${{ github.workspace }}" ls-remote --exit-code --heads origin "${{ steps.pr_meta.outputs.branch_name }}" >/dev/null 2>&1; then + git -C "${{ github.workspace }}" push --force-with-lease origin "${{ steps.pr_meta.outputs.branch_name }}" + else + git -C "${{ github.workspace }}" push --set-upstream origin "${{ steps.pr_meta.outputs.branch_name }}" + fi + + - name: Create or update translation PR + if: steps.pr_meta.outputs.has_changes == 'true' + env: + BRANCH_NAME: ${{ steps.pr_meta.outputs.branch_name }} + FILES_IN_SCOPE: ${{ steps.context.outputs.CHANGED_FILES }} + TARGET_LANGS: ${{ steps.context.outputs.TARGET_LANGS }} + SYNC_MODE: ${{ steps.context.outputs.SYNC_MODE }} + CHANGES_SOURCE: ${{ steps.context.outputs.CHANGES_SOURCE }} + BASE_SHA: ${{ steps.context.outputs.BASE_SHA }} + HEAD_SHA: ${{ steps.context.outputs.HEAD_SHA }} + REPO_NAME: ${{ github.repository }} + shell: bash + run: | + PR_BODY_FILE=/tmp/i18n-pr-body.md + LANG_COUNT=$(printf '%s\n' "$TARGET_LANGS" | wc -w | tr -d ' ') + if [ "$LANG_COUNT" = "0" ]; then + LANG_COUNT="0" + fi + export LANG_COUNT + + node <<'NODE' > "$PR_BODY_FILE" + const fs = require('node:fs') + + const changesPath = '/tmp/i18n-changes.json' + const changes = fs.existsSync(changesPath) + ? JSON.parse(fs.readFileSync(changesPath, 'utf8')) + : { changes: {} } + + const filesInScope = (process.env.FILES_IN_SCOPE || '').split(/\s+/).filter(Boolean) + const lines = [ + '## Summary', + '', + `- **Files synced**: \`${process.env.FILES_IN_SCOPE || ''}\``, + `- **Languages updated**: ${process.env.TARGET_LANGS || ''} (${process.env.LANG_COUNT} languages)`, + `- **Sync mode**: ${process.env.SYNC_MODE}${process.env.BASE_SHA ? ` (base: \`${process.env.BASE_SHA.slice(0, 10)}\`, head: \`${process.env.HEAD_SHA.slice(0, 10)}\`)` : ` (head: \`${process.env.HEAD_SHA.slice(0, 10)}\`)`}`, + '', + '### Key changes', + ] + + for (const fileName of filesInScope) { + const fileChange = changes.changes?.[fileName] || { added: {}, updated: {}, deleted: [], fileDeleted: false } + const addedKeys = Object.keys(fileChange.added || {}) + const updatedKeys = Object.keys(fileChange.updated || {}) + const deletedKeys = fileChange.deleted || [] + lines.push(`- \`${fileName}\`: +${addedKeys.length} / ~${updatedKeys.length} / -${deletedKeys.length}${fileChange.fileDeleted ? ' (file deleted in en-US)' : ''}`) + } + + lines.push( + '', + '## Verification', + '', + `- \`pnpm --dir web run i18n:check --file ${process.env.FILES_IN_SCOPE} --lang ${process.env.TARGET_LANGS}\``, + `- \`pnpm --dir web lint:fix --quiet -- \``, + '', + '## Notes', + '', + '- This PR was generated from structured en-US key changes produced by `trigger-i18n-sync.yml`.', + `- Structured change source: ${process.env.CHANGES_SOURCE || 'unknown'}.`, + '- Branch name is deterministic for the head SHA and scope, so reruns update the same PR instead of opening duplicates.', + '', + '🤖 Generated with [Claude Code](https://claude.com/claude-code)' + ) + + process.stdout.write(lines.join('\n')) + NODE + + EXISTING_PR_NUMBER=$(gh pr list --repo "$REPO_NAME" --head "$BRANCH_NAME" --state open --json number --jq '.[0].number') + + if [ -n "$EXISTING_PR_NUMBER" ] && [ "$EXISTING_PR_NUMBER" != "null" ]; then + gh pr edit "$EXISTING_PR_NUMBER" --repo "$REPO_NAME" --title "chore(i18n): sync translations with en-US" --body-file "$PR_BODY_FILE" + else + gh pr create --repo "$REPO_NAME" --head "$BRANCH_NAME" --base main --title "chore(i18n): sync translations with en-US" --body-file "$PR_BODY_FILE" + fi diff --git a/.github/workflows/trigger-i18n-sync.yml b/.github/workflows/trigger-i18n-sync.yml index ee44fbb0c05..a1ca42b26ee 100644 --- a/.github/workflows/trigger-i18n-sync.yml +++ b/.github/workflows/trigger-i18n-sync.yml @@ -25,7 +25,7 @@ jobs: with: fetch-depth: 0 - - name: Detect changed files and generate full diff + - name: Detect changed files and build structured change set id: detect shell: bash run: | @@ -37,12 +37,94 @@ jobs: if [ -n "$BASE_SHA" ]; then CHANGED_FILES=$(git diff --name-only "$BASE_SHA" "$HEAD_SHA" -- 'web/i18n/en-US/*.json' 2>/dev/null | sed -n 's@^.*/@@p' | sed 's/\.json$//' | tr '\n' ' ' | sed 's/[[:space:]]*$//') - git diff "$BASE_SHA" "$HEAD_SHA" -- 'web/i18n/en-US/*.json' > /tmp/i18n-diff.txt 2>/dev/null || : > /tmp/i18n-diff.txt else CHANGED_FILES=$(find web/i18n/en-US -maxdepth 1 -type f -name '*.json' -print | sed -n 's@^.*/@@p' | sed 's/\.json$//' | sort | tr '\n' ' ' | sed 's/[[:space:]]*$//') - : > /tmp/i18n-diff.txt fi + export BASE_SHA HEAD_SHA CHANGED_FILES + node <<'NODE' + const { execFileSync } = require('node:child_process') + const fs = require('node:fs') + const path = require('node:path') + + const repoRoot = process.cwd() + const baseSha = process.env.BASE_SHA || '' + const headSha = process.env.HEAD_SHA || '' + const files = (process.env.CHANGED_FILES || '').split(/\s+/).filter(Boolean) + + const englishPath = fileStem => path.join(repoRoot, 'web', 'i18n', 'en-US', `${fileStem}.json`) + + const readCurrentJson = (fileStem) => { + const filePath = englishPath(fileStem) + if (!fs.existsSync(filePath)) + return null + + return JSON.parse(fs.readFileSync(filePath, 'utf8')) + } + + const readBaseJson = (fileStem) => { + if (!baseSha) + return null + + try { + const relativePath = `web/i18n/en-US/${fileStem}.json` + const content = execFileSync('git', ['show', `${baseSha}:${relativePath}`], { encoding: 'utf8' }) + return JSON.parse(content) + } + catch (error) { + return null + } + } + + const compareJson = (beforeValue, afterValue) => JSON.stringify(beforeValue) === JSON.stringify(afterValue) + + const changes = {} + + for (const fileStem of files) { + const beforeJson = readBaseJson(fileStem) || {} + const afterJson = readCurrentJson(fileStem) || {} + const added = {} + const updated = {} + const deleted = [] + + for (const [key, value] of Object.entries(afterJson)) { + if (!(key in beforeJson)) { + added[key] = value + continue + } + + if (!compareJson(beforeJson[key], value)) { + updated[key] = { + before: beforeJson[key], + after: value, + } + } + } + + for (const key of Object.keys(beforeJson)) { + if (!(key in afterJson)) + deleted.push(key) + } + + changes[fileStem] = { + fileDeleted: readCurrentJson(fileStem) === null, + added, + updated, + deleted, + } + } + + fs.writeFileSync( + '/tmp/i18n-changes.json', + JSON.stringify({ + baseSha, + headSha, + files, + changes, + }) + ) + NODE + if [ -n "$CHANGED_FILES" ]; then echo "has_changes=true" >> "$GITHUB_OUTPUT" else @@ -65,7 +147,14 @@ jobs: script: | const fs = require('fs') - const diffBase64 = fs.readFileSync('/tmp/i18n-diff.txt').toString('base64') + const changesJson = fs.readFileSync('/tmp/i18n-changes.json', 'utf8') + const changesBase64 = Buffer.from(changesJson).toString('base64') + const maxEmbeddedChangesChars = 48000 + const changesEmbedded = changesBase64.length <= maxEmbeddedChangesChars + + if (!changesEmbedded) { + console.log(`Structured change set too large to embed safely (${changesBase64.length} chars). Downstream workflow will regenerate it from git history.`) + } await github.rest.repos.createDispatchEvent({ owner: context.repo.owner, @@ -73,7 +162,8 @@ jobs: event_type: 'i18n-sync', client_payload: { changed_files: process.env.CHANGED_FILES, - diff_base64: diffBase64, + changes_base64: changesEmbedded ? changesBase64 : '', + changes_embedded: changesEmbedded, sync_mode: 'incremental', base_sha: process.env.BASE_SHA, head_sha: process.env.HEAD_SHA,