feat: greenfield Go rewrite (Phase 2A + 2B + 2C core)

Replaces the Python v1 (preserved on archive/python-v1 branch).

Stack: Go 1.23 + chi router + sqlx + MySQL 8. Distroless static
container. 12-factor config from env. Embedded SQL migrations.

Schema (internal/db/migrations/001_init.sql):
- topics: 议题 with 4-timestamp lifecycle (signup_open/close +
  debate_start/end), visibility (default private), status state machine,
  verdict_schema FK
- signups: agent self-enrollment with willing_camps (JSON array of
  pro|con|judge), pre_validated audit flag, (topic,agent) unique
- camps: post-allocation lock (one row per topic+camp) — written by
  Phase 2D allocator
- rounds + arguments: chronological debate transcript
- verdicts: judge structured output, one per topic, with token-cost
  trail for future budgeting
- agent_keys + system_keys: peppered sha256 hashes, never raw
- verdict_schemas: seeded with binary, claim-resolution (for
  analyze-intel), policy-recommendation, free-form

Auth (internal/auth):
- AgentAPIKey: real bearer-token middleware against agent_keys;
  best-effort last_used_at touch on success
- OIDCBrowser: Phase 2 stub. Dev mode accepts x-dev-bypass header
  (constant-time compare); prod 401s with a Phase-4-pending hint.
  Real Keycloak JWKS verification lands with the frontend rewrite.

HTTP API (internal/httpapi):
- /api/healthz — db ping + version + uptime
- GET /api/topics — list with status/visibility/limit/offset filters;
  anonymous callers see public only
- GET /api/topics/{id} — visibility-gated (private → 404 hide)
- POST /api/topics — create with RFC3339 lifecycle validation
  (signup_open < signup_close <= debate_start < debate_end)
- PUT /api/topics/{id}/visibility — dialectic-admin role gate
- POST /api/topics/{id}/signups — agent self-enroll; rejects when
  topic.status != signup_open OR outside signup window; idempotent
  upsert per (topic, agent)
- GET /api/topics/{id}/signups — list (any authed caller)

Auth chains:
- optionalAuth: try bearer → try oidc → fall through anonymous
  (handlers branch on Caller.Kind == ""). Uses captureWriter to demote
  inner 401s to "try next" without leaking response bytes.
- requireAnyAuth: chain that 401s if neither succeeds.
- requireAgent: strict bearer-only (signup POST).

Run: `docker compose -f docker-compose.dev.yml up --build`. Migrations
auto-apply on first connect; idempotent on reboot. README documents
env vars, dev bypass usage, agent-key provisioning SQL, and the
Phase 2D/E/3/4/5 roadmap.

go vet clean, gofmt clean, single 11M static binary.
This commit is contained in:
h z
2026-05-23 11:51:48 +01:00
parent e049b1c4bd
commit e706f3d6ef
51 changed files with 1700 additions and 2324 deletions

119
internal/db/db.go Normal file
View File

@@ -0,0 +1,119 @@
// Package db wraps sqlx and runs embedded SQL migrations on startup.
//
// Migrations are flat files in migrations/, named NNN_*.sql. They run in
// lexical order. Each is executed in its own transaction; a missing
// schema_migrations row indicates "not yet applied". This is a
// deliberately simple migration runner — for this project's size + team
// size, pulling in golang-migrate or atlas adds complexity without
// payback. If migration count grows past ~20, revisit.
package db
import (
"context"
"database/sql"
"embed"
"fmt"
"sort"
"strings"
"time"
_ "github.com/go-sql-driver/mysql"
"github.com/jmoiron/sqlx"
)
//go:embed migrations/*.sql
var migrationsFS embed.FS
func Open(ctx context.Context, dsn string) (*sqlx.DB, error) {
d, err := sqlx.ConnectContext(ctx, "mysql", dsn)
if err != nil {
return nil, fmt.Errorf("connect mysql: %w", err)
}
d.SetMaxOpenConns(25)
d.SetMaxIdleConns(5)
d.SetConnMaxLifetime(5 * time.Minute)
return d, nil
}
// RunMigrations applies any migrations that aren't yet present in the
// schema_migrations table. Idempotent — safe to call on every startup.
func RunMigrations(ctx context.Context, d *sqlx.DB) error {
// Bootstrap the tracker table itself.
if _, err := d.ExecContext(ctx, `
CREATE TABLE IF NOT EXISTS schema_migrations (
name VARCHAR(255) PRIMARY KEY,
applied_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4`); err != nil {
return fmt.Errorf("ensure schema_migrations: %w", err)
}
entries, err := migrationsFS.ReadDir("migrations")
if err != nil {
return fmt.Errorf("list migrations: %w", err)
}
var files []string
for _, e := range entries {
if !e.IsDir() && strings.HasSuffix(e.Name(), ".sql") {
files = append(files, e.Name())
}
}
sort.Strings(files)
for _, name := range files {
var found string
err := d.GetContext(ctx, &found, `SELECT name FROM schema_migrations WHERE name = ?`, name)
if err == nil {
continue // already applied
}
if err != sql.ErrNoRows {
return fmt.Errorf("check migration %s: %w", name, err)
}
content, err := migrationsFS.ReadFile("migrations/" + name)
if err != nil {
return fmt.Errorf("read migration %s: %w", name, err)
}
// MySQL doesn't support multi-statement in a single Exec by default
// — split on ';' boundaries and run each individually. Comments are
// passed through (server-side parser handles).
statements := splitSQL(string(content))
tx, err := d.BeginTxx(ctx, nil)
if err != nil {
return fmt.Errorf("tx for %s: %w", name, err)
}
for _, stmt := range statements {
stmt = strings.TrimSpace(stmt)
if stmt == "" {
continue
}
if _, err := tx.ExecContext(ctx, stmt); err != nil {
_ = tx.Rollback()
return fmt.Errorf("apply %s [statement: %q]: %w", name, firstLine(stmt), err)
}
}
if _, err := tx.ExecContext(ctx, `INSERT INTO schema_migrations(name) VALUES (?)`, name); err != nil {
_ = tx.Rollback()
return fmt.Errorf("record %s: %w", name, err)
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("commit %s: %w", name, err)
}
}
return nil
}
func splitSQL(s string) []string {
// Crude but adequate for our migrations (no string-literal semicolons).
// If we ever need to embed semicolons inside strings, switch to a
// proper SQL tokenizer.
return strings.Split(s, ";")
}
func firstLine(s string) string {
if i := strings.IndexByte(s, '\n'); i >= 0 {
return s[:i]
}
return s
}

View File

@@ -0,0 +1,141 @@
-- 001_init.sql — Dialectic v2 schema (greenfield, replaces Python v1).
-- See /home/hzhang/arch/DIALECTIC-V2-DESIGN.md for the design.
-- Verdict schemas — declared at topic-creation time; judge produces output matching.
CREATE TABLE verdict_schemas (
id VARCHAR(64) NOT NULL PRIMARY KEY,
description TEXT NOT NULL,
shape_json JSON NOT NULL,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- Seed v1 schemas.
INSERT INTO verdict_schemas (id, description, shape_json) VALUES
('binary', 'pro|con|draw with confidence + key reasoning', JSON_OBJECT('decision', 'pro|con|draw', 'confidence', 'number 0..1', 'key_reasoning', 'string')),
('claim-resolution', 'analyze-intel contested-cluster resolution', JSON_OBJECT('verdict', 'resolved-toward-A|resolved-toward-B|irreducibly-contested', 'winning_claim', 'string', 'dissenting_points', 'array of string', 'confidence', 'number 0..1')),
('policy-recommendation', 'recommended action with alternatives and risks', JSON_OBJECT('recommended_action', 'string', 'alternatives', 'array of string', 'conditions_for_alternatives', 'array of string', 'risks_noted', 'array of string')),
('free-form', 'unstructured summary escape hatch', JSON_OBJECT('summary', 'string'));
-- Topics (议题) — the unit of debate.
CREATE TABLE topics (
id CHAR(36) NOT NULL PRIMARY KEY,
title VARCHAR(255) NOT NULL,
summary TEXT NOT NULL,
visibility ENUM('public','private') NOT NULL DEFAULT 'private',
verdict_schema_id VARCHAR(64) NOT NULL,
status ENUM('created','signup_open','signup_closed','debating','completed','cancelled') NOT NULL DEFAULT 'created',
-- Lifecycle timestamps (per section 3 of design doc)
signup_open_at TIMESTAMP NOT NULL,
signup_close_at TIMESTAMP NOT NULL,
debate_start_at TIMESTAMP NOT NULL,
debate_end_at TIMESTAMP NOT NULL,
-- Audit
creator_user_id CHAR(36) NOT NULL,
visibility_changed_by CHAR(36) NULL,
visibility_changed_at TIMESTAMP NULL,
cancelled_reason VARCHAR(255) NULL,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
INDEX idx_topics_status (status, signup_open_at),
INDEX idx_topics_visibility (visibility, created_at),
CONSTRAINT fk_topics_schema FOREIGN KEY (verdict_schema_id) REFERENCES verdict_schemas(id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- Signups: an agent volunteers for one or more camps on a topic.
-- willing_camps is a JSON array of camp names (subset of {pro, con, judge}).
-- (agent_id, topic_id) is unique — re-signup updates willing_camps.
CREATE TABLE signups (
id CHAR(36) NOT NULL PRIMARY KEY,
topic_id CHAR(36) NOT NULL,
agent_id VARCHAR(64) NOT NULL,
willing_camps JSON NOT NULL,
-- Pre-validation result captured at signup time (plugin verifies the
-- agent has an on_call slot covering the debate window; backend
-- records what the agent told it for audit).
pre_validated BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
UNIQUE KEY uq_signups (topic_id, agent_id),
CONSTRAINT fk_signups_topic FOREIGN KEY (topic_id) REFERENCES topics(id) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- Camps: the post-allocation assignment. One row per (topic, camp) with
-- the locked-in agent. Written by camp-allocation algorithm at
-- signup_close_at; immutable afterwards (no drop-out / replacement in v1).
CREATE TABLE camps (
id CHAR(36) NOT NULL PRIMARY KEY,
topic_id CHAR(36) NOT NULL,
camp ENUM('pro','con','judge') NOT NULL,
agent_id VARCHAR(64) NOT NULL,
allocated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
UNIQUE KEY uq_camps (topic_id, camp),
INDEX idx_camps_agent (agent_id),
CONSTRAINT fk_camps_topic FOREIGN KEY (topic_id) REFERENCES topics(id) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- Rounds: chronological partition of arguments. Each topic has N rounds
-- (typically 3-5); round 0 is the opening. Round transitions are driven
-- by the orchestrator on a schedule (or all-participants-posted).
CREATE TABLE rounds (
id CHAR(36) NOT NULL PRIMARY KEY,
topic_id CHAR(36) NOT NULL,
round_no INT NOT NULL,
opened_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
closed_at TIMESTAMP NULL,
UNIQUE KEY uq_rounds (topic_id, round_no),
CONSTRAINT fk_rounds_topic FOREIGN KEY (topic_id) REFERENCES topics(id) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- Arguments: an individual contribution within a round by a camp's agent.
-- For pro/con these are claims/rebuttals; for judge these are clarifying
-- questions (judge is silent observer in v1 except for clarifications).
CREATE TABLE arguments (
id CHAR(36) NOT NULL PRIMARY KEY,
topic_id CHAR(36) NOT NULL,
round_id CHAR(36) NOT NULL,
camp ENUM('pro','con','judge') NOT NULL,
agent_id VARCHAR(64) NOT NULL,
content MEDIUMTEXT NOT NULL,
posted_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
INDEX idx_arguments_round (round_id, posted_at),
INDEX idx_arguments_topic (topic_id, posted_at),
CONSTRAINT fk_arguments_round FOREIGN KEY (round_id) REFERENCES rounds(id) ON DELETE CASCADE,
CONSTRAINT fk_arguments_topic FOREIGN KEY (topic_id) REFERENCES topics(id) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- Verdicts: judge's structured output, one per topic (one verdict per
-- debate). verdict_json shape matches the topic's verdict_schema_id.
CREATE TABLE verdicts (
id CHAR(36) NOT NULL PRIMARY KEY,
topic_id CHAR(36) NOT NULL UNIQUE,
judge_agent_id VARCHAR(64) NOT NULL,
verdict_json JSON NOT NULL,
rationale TEXT NOT NULL,
-- Token cost trail for accounting (Phase 1: not enforced; Phase N: budget gate)
tokens_input INT NOT NULL DEFAULT 0,
tokens_output INT NOT NULL DEFAULT 0,
produced_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT fk_verdicts_topic FOREIGN KEY (topic_id) REFERENCES topics(id) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- Agent API keys: provisioned per agent at recruitment time. Stored as
-- sha256(pepper || raw); pepper rotation invalidates all keys.
CREATE TABLE agent_keys (
agent_id VARCHAR(64) NOT NULL PRIMARY KEY,
key_hash CHAR(64) NOT NULL UNIQUE,
issued_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
last_used_at TIMESTAMP NULL,
revoked_at TIMESTAMP NULL,
INDEX idx_agent_keys_hash (key_hash)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- System keys: out-of-band credentials for non-agent callers (e.g. the
-- analyze-intel workflow running via a system identity that creates
-- topics on behalf of the analyzing agent). Also stored as hash.
CREATE TABLE system_keys (
name VARCHAR(64) NOT NULL PRIMARY KEY,
key_hash CHAR(64) NOT NULL UNIQUE,
description TEXT NULL,
issued_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
revoked_at TIMESTAMP NULL,
INDEX idx_system_keys_hash (key_hash)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;