diff --git a/plugins/language-orthography/.claude-plugin/plugin.json b/plugins/language-orthography/.claude-plugin/plugin.json new file mode 100644 index 0000000000..c425150ce3 --- /dev/null +++ b/plugins/language-orthography/.claude-plugin/plugin.json @@ -0,0 +1,5 @@ +{ + "name": "language-orthography", + "version": "1.0.0", + "description": "Enforces full orthographic correctness (diacritics, accents, cedillas) when the language setting targets a non-ASCII language" +} diff --git a/plugins/language-orthography/README.md b/plugins/language-orthography/README.md new file mode 100644 index 0000000000..25114d26fa --- /dev/null +++ b/plugins/language-orthography/README.md @@ -0,0 +1,58 @@ +# language-orthography + +Enforces full orthographic correctness when the `language` setting targets a non-ASCII language (Portuguese, French, Vietnamese, Czech, Turkish, Spanish, German, etc.). + +## The problem + +The built-in `language` setting injects a generic instruction like *"Always respond in pt-BR"*, which the model often interprets loosely — producing text without proper accents, cedillas, or other diacritical marks. This is especially noticeable in long sessions and after context compaction. + +For example, with `language: "pt-BR"`: + +| Expected | What you get | +|----------|-------------| +| informação | informacao | +| não | nao | +| código | codigo | +| você | voce | + +## How it works + +A `SessionStart` hook reads your `language` setting from `~/.claude/settings.json` (or `settings.local.json`) and injects an explicit orthographic enforcement instruction into the session context. This instruction frames diacritic omission as an orthographic error rather than a style choice, which makes the model treat it with the same weight as a spelling mistake in English. + +The hook is a no-op if: +- No `language` setting is configured +- The language is English (`en`, `en-US`, `en-GB`, etc.) + +## Installation + +```bash +claude plugin add language-orthography +``` + +Or install from the repository: + +```bash +/install-plugin plugins/language-orthography +``` + +## Configuration + +No configuration needed beyond the standard `language` setting in your Claude Code settings: + +```json +{ + "language": "pt-BR" +} +``` + +The plugin reads this value automatically. + +## Limitations + +This plugin works around the issue at the prompt level. It doesn't fix the underlying causes in the core CLI: + +1. The built-in language instruction template should explicitly mention diacritical marks +2. CLAUDE.md instructions are wrapped in a disclaimer that weakens their authority +3. Context compaction doesn't carry language rules to the summarization step + +See [#32886](https://github.com/anthropics/claude-code/issues/32886) for the full root cause analysis. diff --git a/plugins/language-orthography/hooks-handlers/session-start.sh b/plugins/language-orthography/hooks-handlers/session-start.sh new file mode 100755 index 0000000000..519d590ff5 --- /dev/null +++ b/plugins/language-orthography/hooks-handlers/session-start.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +# Reads the user's language setting and injects orthographic enforcement +# instructions so the model preserves diacritical marks in its output. + +SETTINGS_FILE="${CLAUDE_CONFIG_DIR:-$HOME/.claude}/settings.json" +SETTINGS_LOCAL="${CLAUDE_CONFIG_DIR:-$HOME/.claude}/settings.local.json" + +# Try local settings first, then global +LANG_VALUE="" +for f in "$SETTINGS_LOCAL" "$SETTINGS_FILE"; do + if [ -f "$f" ]; then + # Extract language field — handles both quoted and unquoted values + LANG_VALUE=$(grep -o '"language"[[:space:]]*:[[:space:]]*"[^"]*"' "$f" 2>/dev/null | head -1 | sed 's/.*:[[:space:]]*"\(.*\)"/\1/') + [ -n "$LANG_VALUE" ] && break + fi +done + +# If no language setting found, nothing to enforce +if [ -z "$LANG_VALUE" ]; then + exit 0 +fi + +# Languages that require diacritical marks (non-exhaustive, covers most common cases) +# If the language is plain English, skip enforcement +case "$LANG_VALUE" in + en|english|English|en-US|en-GB|en-AU) + exit 0 + ;; +esac + +cat << EOF +{ + "hookSpecificOutput": { + "hookEventName": "SessionStart", + "additionalContext": "ORTHOGRAPHIC ENFORCEMENT (language: ${LANG_VALUE})\n\nThe user's language setting is '${LANG_VALUE}'. All text output in this language MUST maintain full orthographic correctness:\n\n- NEVER omit diacritical marks, accents, cedillas, umlauts, circumflexes, tildes, or any other characters required by the language's writing system.\n- Dropping diacritics (e.g. writing 'informacao' instead of 'informação', or 'voce' instead of 'você') is an orthographic error equivalent to a typo or misspelling in English.\n- This applies to ALL text: messages, code comments, strings, commit messages, file contents, and any other output in ${LANG_VALUE}.\n- This rule persists through the entire session, including after context compaction.\n\nTreat this as a hard constraint, not a style preference." + } +} +EOF + +exit 0 diff --git a/plugins/language-orthography/hooks/hooks.json b/plugins/language-orthography/hooks/hooks.json new file mode 100644 index 0000000000..2b03d7f557 --- /dev/null +++ b/plugins/language-orthography/hooks/hooks.json @@ -0,0 +1,15 @@ +{ + "description": "Injects orthographic enforcement instructions at session start based on the user's language setting", + "hooks": { + "SessionStart": [ + { + "hooks": [ + { + "type": "command", + "command": "${CLAUDE_PLUGIN_ROOT}/hooks-handlers/session-start.sh" + } + ] + } + ] + } +}