# Layered Style Guidance Enforcement—Rule Schema
#
# This schema defines the shape of every entry in conflict-register.yaml. It is
# the canonical contract between csa-content-standards (where rules live) and
# the McClatchy CSA backend (which ingests + enforces rules at write time and
# composition time).
#
# See docs/layered-enforcement.md for the conceptual model + worked examples.
# See _data/rules/precedence.yaml for the resolution algorithm.
# See _data/rules/conflict-register.yaml for the rule entries themselves.

schema_version: "1.0.0"
last_updated: "2026-05-08"

# -----------------------------------------------------------------------------
# Layer definitions—the four layers of guidance, in default precedence order
# (lowest first, highest last). Layer N may override any rule established at
# layer M < N. See precedence.yaml for the resolution algorithm.
# -----------------------------------------------------------------------------
layers:
  - id: general
    rank: 1
    description: Universal rules across all McClatchy content. AP-Compatible is the default backbone. Sources include §1 Brand Guidelines, §2 Headlines, §6 Publishing Guidelines, §10.6 AP-Compatible.
    selector_attrs: []  # general always matches; no selector attributes
  - id: persona
    rank: 2
    description: Audience-specific overrides. Each persona has its own §4.X page in csa-content-standards (Discover Browser, Curious Optimizer, Watercooler Insider, Curious Explorer, Wonder-Driven Science Enthusiast). Selected at draft time via CSA Target Audience.
    selector_attrs: [persona]
  - id: format
    rank: 3
    description: Format-specific overrides. Each format has its own §3.X page. Active formats (selectable for new content) — Everything to Know, FAQ / Service Journalism, What to Know Next. Pending formats (spec not finalized) — Recipe, Timeline, Interview, Recap, Fan Theory / Fan Question, Obituary, Couple / Baby, Cast Introduction / Update. Retired formats (preserved for historical reference; not selectable for new content) — Google Discover Explainer (retired 2026-05-28; superseded by What to Know Next). Selected at draft time via Format selector.
    selector_attrs: [format]
  - id: platform
    rank: 4
    description: Distribution-platform + per-publication overrides—combined at this layer per the §10 Platform Guidance design. Each has its own §10.X page (SmartNews, Apple News, Us Weekly, Trend Hunter B2C, Woman's World, AP-Compatible Quick / Condensed / Thorough). Determined by destination publication or distribution channel.
    selector_attrs: [platform, publication, distribution]

# -----------------------------------------------------------------------------
# Rule entry—the structure each item in conflict-register.yaml follows.
# -----------------------------------------------------------------------------
rule_entry:
  required_fields:
    - id                  # stable string identifier; dotted-path style (e.g., "headline.char_count")
    - domain              # grouping category (see `domains` below)
    - type                # data type of the rule's value (see `value_types` below)
    - description         # one-sentence human-readable summary
    - default             # the rule's value at the general layer, with source
    - overrides           # list of per-layer overrides (may be empty if no overrides exist yet)

  optional_fields:
    - precedence          # resolution order, defaults to [platform, format, persona, general]
    - machine_evaluable   # true|false—can this be checked by code, or does it require LLM judgment?
    - enforcement_target  # who enforces—writer (LLM) | grader (post-hoc rules) | composer (load-time) | human (editorial)
    - related_rules       # list of other rule IDs that interact with this one
    - status              # active | deprecated | pending—defaults to active
    - introduced          # csa-content-standards version when rule was first registered
    - notes               # free-form notes / caveats

  default_block:
    required_fields:
      - value             # the default value (may be null if there is no general rule and overrides are additive only)
      - source            # citation: docs/<page>.md §X.Y
    optional_fields:
      - rationale         # why this is the default

  override_block:
    required_fields:
      - selector          # object with layer-attribute keys (see `selector_grammar` below)
      - value             # the override value (may be null to mean "this layer removes the default")
      - source            # citation
    optional_fields:
      - rationale         # why this layer overrides
      - kind              # override | additive | restriction
                          #   override   = layer changes a value set at a higher (lower-rank) layer
                          #   additive   = no higher-layer value exists; this layer introduces a constraint
                          #   restriction = layer narrows the scope of an existing rule
      - data_validated    # true|false—is the override value backed by performance data?
      - sweet_spot        # for ranges, an inner sub-range that performs best (e.g., 110-119 chars within 90-120)

# -----------------------------------------------------------------------------
# Domains—grouping categories for rules. Used for navigation + filtering.
# -----------------------------------------------------------------------------
domains:
  - id: headline
    description: H1, SEO title, promo title, meta description, dek, subtitle
  - id: body
    description: Article body copy—paragraphs, sentences, structural patterns
  - id: punctuation
    description: Em dash, en dash, comma, quote marks, apostrophe, slash, ellipsis, period
  - id: capitalization
    description: Title case, sentence case, proper nouns, headings, after-colon
  - id: numbers
    description: Numerals vs spelled-out, ages, dates, times, money, sports scores, percent
  - id: grammar
    description: Verb agreement, contractions, possessives, modifiers
  - id: vocabulary
    description: Banned words, preferred terms, register, slang, demographic-vs-psychographic framing
  - id: voice
    description: Tone north stars, persona attributes, voice attributes, anti-patterns
  - id: structure
    description: Required sections, article structure, chronological vs analytical, three-section arc
  - id: italics
    description: What gets italicized, what doesn't, italics-with-punctuation
  - id: media
    description: Hero image, thumbnail, lead image, embedded media, AI-generated media labeling
  - id: attribution
    description: "'said' vs 'says', anonymous sources, on/off the record, source authority tiers"
  - id: cms
    description: CMS-required fields, flavor text, tags, categories, content segments
  - id: linking
    description: Internal links, anchor text, link count, tag-page linking, retailer linking
  - id: compliance
    description: AI disclosure, bylines, plagiarism, libel, helpful-content, breaking-news triage
  - id: faith
    description: Religious / faith-based content rules; permitted in pieces explicitly about faith only

# -----------------------------------------------------------------------------
# Value types—what kind of data a rule's value contains.
# -----------------------------------------------------------------------------
value_types:
  - id: enum
    description: One of a fixed set of values (e.g., "singular" | "plural", "title-case" | "sentence-case")
  - id: range
    description: Numeric range (e.g., 80-100, 90-120). Often paired with a sweet_spot inner range
  - id: integer
    description: Single number (e.g., 3, 5)
  - id: boolean
    description: true / false (e.g., "no surrounding spaces" → false)
  - id: string
    description: Free-form value (e.g., a tone north star, a regex pattern)
  - id: list
    description: Set of values (e.g., banned words list, italicized item types)
  - id: pattern
    description: 'A formula or template the value must match (e.g., "[Topic] [Question Word]: [Specific Question Answered]")'
  - id: prose
    description: 'Editorial prose that requires LLM judgment to evaluate (e.g., "lead with empathy, then offer solutions")'

# -----------------------------------------------------------------------------
# Selector grammar—how an override identifies which content it applies to.
# Selectors use object syntax. All keys must match for the selector to apply.
# Keys must be drawn from the layer's `selector_attrs` (see `layers` above).
# -----------------------------------------------------------------------------
selector_grammar:
  shape:
    description: 'Object with layer-attribute keys (e.g., { format: faq } or { platform: apple-news, format: everything-to-know })'
    required: All keys present must match the content's attributes
    matching: Equality only in v1.0; future versions may support globs / regex / set membership
  attribute_values:
    persona:
      type: enum
      values: [discover-browser, curious-optimizer, watercooler-insider, curious-explorer, science-enthusiast]
    format:
      type: enum
      values: [everything-to-know, faq, what-to-know-next, recipe, timeline, interview, recap, fan-content, obituary, couple-baby, cast, discover-explainer]
      # active first (everything-to-know, faq, what-to-know-next), then pending, then retired (discover-explainer retired 2026-05-28)
      # `discover-explainer` retained in the enum so historical articles tagged with this format still validate; new articles must not select it
    platform:
      type: enum
      values: [smartnews, apple-news]
    publication:
      type: enum
      values: [us-weekly, trend-hunter-b2c, womans-world]
    distribution:
      type: enum
      description: For destination-channel-specific rules (e.g., push-notification, web, syndication-feed)
      values: [web, push-notification, apple-news, smartnews, msn, yahoo]
  combinators:
    note: v1.0 uses simple equality + AND-of-keys. Future versions may add OR / NOT.

# -----------------------------------------------------------------------------
# Enforcement targets—which CSA component is responsible for applying this rule.
# Engineering uses this to route rules to the right validator / composer / agent prompt.
# -----------------------------------------------------------------------------
enforcement_targets:
  - id: writer
    description: Rule is included in the agent prompt that drafts content. The LLM applies it during generation. Best for prose rules requiring judgment (voice, tone, persona framing)
  - id: grader
    description: Rule is checked by post-hoc programmatic validation (regex, parse-tree analysis, etc.). Best for machine-evaluable rules (em dash spacing, Oxford comma presence, char count)
  - id: composer
    description: Rule is applied at composition time when the StyleGuideComposer assembles guides for an LLM call. Determines layer ordering / which guides to load
  - id: human
    description: Rule is enforced by human editorial review. Best for rules that require domain judgment (helpful-content audit, byline accuracy, AI disclosure presence)
