# COST POLICY
# Status: IMMUTABLE per version. Reviewed monthly.
# Version: 1.0.0
# Decision rank: #3

# ─────────────────────────────────────────────────────
# GLOBAL BUDGETS
# ─────────────────────────────────────────────────────
global:
  monthly_budget_usd: 5000.00
  daily_budget_usd: 200.00
  hard_stop_at_percent: 95   # halt at 95% utilization
  warn_at_percent: 70
  alert_channels: [slack, audit_log]

# ─────────────────────────────────────────────────────
# PER-PROJECT BUDGETS (override per project in PROJECT/COST.yaml)
# ─────────────────────────────────────────────────────
per_project_default:
  monthly_usd: 500.00
  per_story_max_usd: 5.00
  per_task_max_usd: 1.00

# ─────────────────────────────────────────────────────
# PER-LAYER COST CEILINGS (per invocation)
# ─────────────────────────────────────────────────────
per_layer_ceilings:
  layer_1_global_orchestrator:
    model: haiku-4-5
    max_tokens_input: 4000
    max_tokens_output: 1000
    max_cost_per_invocation_usd: 0.01

  layer_2_domain_orchestrator:
    model: sonnet-4-6
    max_tokens_input: 20000
    max_tokens_output: 4000
    max_cost_per_invocation_usd: 0.20

  layer_3_task_orchestrator:
    model: sonnet-4-6
    max_tokens_input: 30000
    max_tokens_output: 8000
    max_cost_per_invocation_usd: 0.50

  layer_4_specialist:
    model: sonnet-4-6
    max_tokens_input: 80000
    max_tokens_output: 16000
    max_cost_per_invocation_usd: 2.00

  layer_5_worker:
    model: haiku-4-5
    max_tokens_input: 16000
    max_tokens_output: 4000
    max_cost_per_invocation_usd: 0.10

  planner_opus:
    model: opus-4-7
    max_tokens_input: 200000
    max_tokens_output: 32000
    max_cost_per_invocation_usd: 5.00
    triggers:
      - "complex_planning_required = true"
      - "task_estimated_complexity > 0.8"
      - "explicit user request"

# ─────────────────────────────────────────────────────
# MODEL PRICING (synced with provider; check monthly)
# ─────────────────────────────────────────────────────
model_pricing_usd_per_million_tokens:
  haiku-4-5:
    input: 0.80
    output: 4.00
    cache_write: 1.00
    cache_read: 0.08
  sonnet-4-6:
    input: 3.00
    output: 15.00
    cache_write: 3.75
    cache_read: 0.30
  opus-4-7:
    input: 15.00
    output: 75.00
    cache_write: 18.75
    cache_read: 1.50

# ─────────────────────────────────────────────────────
# COST OPTIMIZATION RULES (auto-applied)
# ─────────────────────────────────────────────────────
optimizations:
  enabled:
    - prompt_caching        # mandatory for constitution + registries
    - skill_lazy_loading    # only load skills router selected
    - context_compaction    # compress when context > 80%
    - artifact_referencing  # never inline >2KB content
    - speculative_execution  # for high-confidence routes
    - batch_api_for_async   # 50% off non-urgent
    - result_cache_redis    # idempotent operations
    - stop_token_aggressive # cut generation when done

  thresholds:
    cache_min_token_savings: 1000   # only cache if saves >1K tokens
    context_compaction_at_pct: 80
    speculative_confidence_min: 0.85
    result_cache_default_ttl_sec: 3600

# ─────────────────────────────────────────────────────
# ESCALATION RULES
# ─────────────────────────────────────────────────────
escalation:
  on_layer_ceiling_exceeded:
    action: halt_and_log
    notify: [audit_log, slack]

  on_project_budget_70pct:
    action: warn
    notify: [slack]

  on_project_budget_90pct:
    action: warn_loud
    notify: [slack, email]
    require_user_extension: true

  on_project_budget_100pct:
    action: hard_stop
    notify: [slack, email, pagerduty]

# ─────────────────────────────────────────────────────
# RESOURCE GUARDRAILS
# ─────────────────────────────────────────────────────
guardrails:
  max_parallel_agents_per_task: 5
  max_recursion_depth: 5
  max_retry_per_skill: 3
  max_story_duration_seconds: 1800   # 30min hard timeout
  max_episodic_memory_mb_per_task: 50

# ─────────────────────────────────────────────────────
# AUDIT REQUIREMENTS
# ─────────────────────────────────────────────────────
audit:
  log_every_invocation: true
  log_cost_dimensions:
    - agent_id
    - layer
    - skill_id
    - project_id
    - story_id
    - timestamp
    - tokens_in
    - tokens_out
    - cost_usd
    - cache_hit_pct

  monthly_review:
    required: true
    auto_generated_report: true
    suggest_optimizations: true   # Opus monthly meta-review