feat: Phase 2D — orchestrator, arguments/verdict endpoints, fabric announce

State machine driver + camp allocator + judge-submitted verdicts + broadcast hook to Fabric announce channel. internal/orchestrator/ - allocator.go: pure function implementing the 3-camp rule from the 2026-05-23 design session — for each camp (pro/con/judge), random pick from volunteers; backfill unfilled camps from remaining unallocated signups if pool is large enough; <3 final → cancel with diagnostic reason. rng injected for test determinism. - allocator_test.go: 7 tests covering empty/insufficient/single-volunteer /multi-volunteer-no-dup/backfill/insufficient-backfill/large-pool distinctness invariants. All pass. - ticker.go: scans every 15s (configurable via ORCHESTRATOR_TICK_INTERVAL), drives 3 state transitions atomically: created → signup_open (post fabric announcement async) signup_open → signup_closed | cancelled (run allocator, write camps) signup_closed → debating (open round 0) debating → completed is driven by the verdict POST handler (the implicit "judging" sub-state is captured by the gate status==debating AND now>=debate_end_at). Per-topic transitions use SELECT FOR UPDATE so concurrent ticker instances are safe. internal/fabric/announce.go: HTTP client posting to a Guild announce channel using x-fabric-system-key header (the Phase 1 gate). Wraps the formatted topic announcement (title/summary/timing/schema). All 4 config fields required to enable; any missing → no-op with log (orchestrator runs fine without Fabric coupling for dev). internal/store/{round,camp,argument,verdict}_store.go: CRUD layer for the remaining v2 entities. CampStore.WriteAllocation accepts a tx so the orchestrator can wrap allocator+camps+status into one atomic transition. internal/httpapi/handlers/arguments.go: - POST /api/topics/{id}/arguments — agent posts during debate. Gates: agent must be in a camp on this topic; status==debating; content nonempty and <=32KB; attached to latest open round. - GET /api/topics/{id}/arguments — full transcript, visibility-gated. internal/httpapi/handlers/verdict.go: - POST /api/topics/{id}/verdict — judge submits. Gates: caller==judge camp; status==debating AND now>=debate_end_at; verdict valid JSON; rationale required. On success: writes verdicts row (unique on topic_id → 409 on dup) and flips topic.status to completed. - GET /api/topics/{id}/verdict — visibility-gated. config: 5 new env vars — FABRIC_GUILD_BASE_URL, FABRIC_ANNOUNCE_CHANNEL_ID, FABRIC_SYSTEM_API_KEY, FABRIC_BOT_BEARER_TOKEN, ORCHESTRATOR_TICK_INTERVAL. routes.go: wired new handlers — POST signups/arguments/verdict gated on agent bearer; GET arguments/verdict on optional-auth chain (public topics readable anonymously). main.go: instantiates announcer + ticker; ticker.Run in a goroutine sharing the lifetime ctx. go vet + gofmt clean; 7/7 allocator tests pass; 12M static binary. Next: Phase 2E (deploy to t3 with nginx + CF origin cert) or Phase 2D.5 (SSE stream for live transcript subscribers).
2026-05-23 12:02:27 +01:00
parent e706f3d6ef
commit 57a1fa1b33
14 changed files with 1243 additions and 14 deletions
--- a/internal/orchestrator/allocator.go
+++ b/internal/orchestrator/allocator.go
@@ -0,0 +1,116 @@
+// Package orchestrator owns the topic lifecycle state machine:
+// signup-window allocator, round driver, judge invocation, Fabric
+// announce broadcaster. All long-running coordination logic lives
+// here so handlers stay thin.
+package orchestrator
+
+import (
+	"math/rand"
+
+	"git.hangman-lab.top/hzhang/Dialectic.Backend/internal/models"
+)
+
+// AllocateResult is the outcome of running the camp allocator on a
+// topic's signup pool. Either Allocation is set (one agent per camp,
+// no duplicates) or CancelReason is set (signup pool insufficient
+// after backfill).
+type AllocateResult struct {
+	Allocation   map[models.Camp]string // camp → agentId
+	CancelReason string                 // non-empty when allocation could not complete
+}
+
+// Allocate runs the 3-camp self-enrollment allocation algorithm
+// agreed on in the 2026-05-23 design session.
+//
+//	for each camp in [pro, con, judge]:
+//	    if any signup has that camp in willing_camps AND that agent
+//	    isn't already locked → random pick, lock.
+//	if camps still unfilled AND remaining unallocated signups >= unfilled:
+//	    random pick from remaining to fill, one each.
+//	if any camp still unfilled (i.e. signup pool < 3 effective):
+//	    return CancelReason; creator re-times.
+//
+// Invariants: same agent never lands in two camps; allocation order
+// respects [pro, con, judge] so the test seed produces deterministic
+// results when rng is seeded.
+//
+// `rng` is injected so tests can supply a deterministic source. Pass
+// `rand.New(rand.NewSource(time.Now().UnixNano()))` in prod.
+func Allocate(signups []models.SignupView, rng *rand.Rand) AllocateResult {
+	allocated := make(map[models.Camp]string, 3)
+	used := make(map[string]struct{}, len(signups))
+
+	// Pass 1 — fill each camp from its volunteers.
+	for _, camp := range models.AllCamps {
+		candidates := make([]string, 0)
+		for _, s := range signups {
+			if _, taken := used[s.AgentID]; taken {
+				continue
+			}
+			for _, w := range s.WillingCamps {
+				if w == camp {
+					candidates = append(candidates, s.AgentID)
+					break
+				}
+			}
+		}
+		if len(candidates) == 0 {
+			continue
+		}
+		pick := candidates[rng.Intn(len(candidates))]
+		allocated[camp] = pick
+		used[pick] = struct{}{}
+	}
+
+	// Pass 2 — backfill unfilled camps from any remaining signup.
+	if len(allocated) < 3 {
+		remaining := make([]string, 0)
+		for _, s := range signups {
+			if _, taken := used[s.AgentID]; taken {
+				continue
+			}
+			remaining = append(remaining, s.AgentID)
+		}
+		// We can only fill if we have enough remaining for every still-empty camp.
+		unfilled := make([]models.Camp, 0, 3)
+		for _, c := range models.AllCamps {
+			if _, ok := allocated[c]; !ok {
+				unfilled = append(unfilled, c)
+			}
+		}
+		if len(remaining) >= len(unfilled) {
+			// Shuffle remaining, then take one per unfilled camp in order.
+			rng.Shuffle(len(remaining), func(i, j int) {
+				remaining[i], remaining[j] = remaining[j], remaining[i]
+			})
+			for i, c := range unfilled {
+				allocated[c] = remaining[i]
+				used[remaining[i]] = struct{}{}
+			}
+		}
+	}
+
+	// Verdict — all 3 filled or we cancel.
+	if len(allocated) < 3 {
+		filled := len(allocated)
+		return AllocateResult{
+			CancelReason: cancelReason(filled, len(signups)),
+		}
+	}
+	return AllocateResult{Allocation: allocated}
+}
+
+func cancelReason(filled, totalSignups int) string {
+	switch {
+	case totalSignups == 0:
+		return "no signups received"
+	case totalSignups < 3:
+		return "insufficient signups: need at least 3 distinct agents across pro/con/judge"
+	default:
+		// Edge case: pool >= 3 but allocator still couldn't fill — e.g.
+		// every signup volunteered for the same one camp and the same
+		// person was somehow used (shouldn't happen with current rules,
+		// but make the message honest).
+		return "allocation infeasible: signup distribution does not cover all 3 camps"
+	}
+}
--- a/internal/orchestrator/allocator_test.go
+++ b/internal/orchestrator/allocator_test.go
@@ -0,0 +1,137 @@
+package orchestrator
+
+import (
+	"math/rand"
+	"testing"
+
+	"git.hangman-lab.top/hzhang/Dialectic.Backend/internal/models"
+)
+
+func sig(agentID string, camps ...models.Camp) models.SignupView {
+	return models.SignupView{AgentID: agentID, WillingCamps: camps}
+}
+
+// Helper: assert no duplicate agents across the 3 camps.
+func assertDistinct(t *testing.T, alloc map[models.Camp]string) {
+	t.Helper()
+	seen := map[string]models.Camp{}
+	for c, a := range alloc {
+		if prev, ok := seen[a]; ok {
+			t.Fatalf("agent %q allocated to both %s and %s", a, prev, c)
+		}
+		seen[a] = c
+	}
+}
+
+func TestAllocate_EmptyPoolCancels(t *testing.T) {
+	r := Allocate(nil, rand.New(rand.NewSource(1)))
+	if r.CancelReason == "" {
+		t.Fatal("expected cancel reason, got allocation")
+	}
+}
+
+func TestAllocate_TwoSignupsCancels(t *testing.T) {
+	r := Allocate([]models.SignupView{
+		sig("a", models.CampPro),
+		sig("b", models.CampCon),
+	}, rand.New(rand.NewSource(1)))
+	if r.CancelReason == "" {
+		t.Fatalf("expected cancel reason (pool<3), got %v", r.Allocation)
+	}
+}
+
+func TestAllocate_OneVolunteerPerCampFills(t *testing.T) {
+	signups := []models.SignupView{
+		sig("a", models.CampPro),
+		sig("b", models.CampCon),
+		sig("c", models.CampJudge),
+	}
+	r := Allocate(signups, rand.New(rand.NewSource(1)))
+	if r.CancelReason != "" {
+		t.Fatalf("unexpected cancel: %s", r.CancelReason)
+	}
+	if r.Allocation[models.CampPro] != "a" || r.Allocation[models.CampCon] != "b" || r.Allocation[models.CampJudge] != "c" {
+		t.Fatalf("wrong allocation: %v", r.Allocation)
+	}
+	assertDistinct(t, r.Allocation)
+}
+
+func TestAllocate_AgentMultiVolunteerPicksOnlyOnce(t *testing.T) {
+	// 'a' volunteers for all 3 camps. Should only be allocated to one
+	// (pro, since it's first in iteration order); other camps need
+	// other volunteers or get filled via backfill.
+	signups := []models.SignupView{
+		sig("a", models.CampPro, models.CampCon, models.CampJudge),
+		sig("b", models.CampCon),
+		sig("c", models.CampJudge),
+	}
+	r := Allocate(signups, rand.New(rand.NewSource(1)))
+	if r.CancelReason != "" {
+		t.Fatalf("unexpected cancel: %s", r.CancelReason)
+	}
+	if r.Allocation[models.CampPro] != "a" {
+		t.Fatalf("expected 'a' in pro, got %v", r.Allocation)
+	}
+	if r.Allocation[models.CampCon] != "b" {
+		t.Fatalf("expected 'b' in con, got %v", r.Allocation)
+	}
+	if r.Allocation[models.CampJudge] != "c" {
+		t.Fatalf("expected 'c' in judge, got %v", r.Allocation)
+	}
+	assertDistinct(t, r.Allocation)
+}
+
+func TestAllocate_BackfillFromUnallocated(t *testing.T) {
+	// pro has 2 volunteers ('a','c'), con has 1 ('b'), judge has 0.
+	// Allocator picks one of {a,c} for pro, then b for con, then
+	// backfills judge from whichever of {a,c} is unallocated.
+	signups := []models.SignupView{
+		sig("a", models.CampPro),
+		sig("b", models.CampCon),
+		sig("c", models.CampPro),
+	}
+	r := Allocate(signups, rand.New(rand.NewSource(1)))
+	if r.CancelReason != "" {
+		t.Fatalf("unexpected cancel: %s; alloc=%v", r.CancelReason, r.Allocation)
+	}
+	assertDistinct(t, r.Allocation)
+	if len(r.Allocation) != 3 {
+		t.Fatalf("expected all 3 camps filled; got %d (%v)", len(r.Allocation), r.Allocation)
+	}
+	// Con must be 'b' (only volunteer); pro and judge must be {a, c} in some order.
+	if r.Allocation[models.CampCon] != "b" {
+		t.Fatalf("expected con=b, got %v", r.Allocation)
+	}
+	pro := r.Allocation[models.CampPro]
+	judge := r.Allocation[models.CampJudge]
+	if !(pro == "a" && judge == "c") && !(pro == "c" && judge == "a") {
+		t.Fatalf("expected pro/judge to be {a,c} permutation, got pro=%s judge=%s", pro, judge)
+	}
+}
+
+func TestAllocate_BackfillInsufficientCancels(t *testing.T) {
+	// pro filled by 'a'; con filled by 'b'; judge has no volunteer
+	// AND no remaining unallocated signups → cancel.
+	signups := []models.SignupView{
+		sig("a", models.CampPro),
+		sig("b", models.CampCon),
+	}
+	r := Allocate(signups, rand.New(rand.NewSource(1)))
+	if r.CancelReason == "" {
+		t.Fatalf("expected cancel; got allocation %v", r.Allocation)
+	}
+}
+
+func TestAllocate_LargePoolDistinctness(t *testing.T) {
+	// Many signups, all willing for all camps. Allocation should pick 3
+	// distinct agents, randomly.
+	signups := []models.SignupView{}
+	for i := 0; i < 20; i++ {
+		signups = append(signups, sig(string(rune('a'+i)), models.CampPro, models.CampCon, models.CampJudge))
+	}
+	r := Allocate(signups, rand.New(rand.NewSource(42)))
+	if r.CancelReason != "" {
+		t.Fatalf("unexpected cancel: %s", r.CancelReason)
+	}
+	assertDistinct(t, r.Allocation)
+}
--- a/internal/orchestrator/ticker.go
+++ b/internal/orchestrator/ticker.go
@@ -0,0 +1,237 @@
+package orchestrator
+
+import (
+	"context"
+	"log"
+	"math/rand"
+	"time"
+
+	"github.com/jmoiron/sqlx"
+
+	"git.hangman-lab.top/hzhang/Dialectic.Backend/internal/fabric"
+	"git.hangman-lab.top/hzhang/Dialectic.Backend/internal/models"
+	"git.hangman-lab.top/hzhang/Dialectic.Backend/internal/store"
+)
+
+// Ticker drives the topic state machine. Every TickInterval it scans
+// for topics with timestamps that have crossed a transition boundary
+// and applies the transition atomically per topic.
+//
+// State transitions handled by the ticker:
+//
+//	created       → signup_open      (when now >= signup_open_at)
+//	                                  + post Fabric announcement
+//	signup_open   → signup_closed    (when now >= signup_close_at, allocator succeeded)
+//	              → cancelled         (allocator returned CancelReason)
+//	signup_closed → debating         (when now >= debate_start_at; opens round 0)
+//
+// NOT handled by the ticker (driven elsewhere):
+//
+//	debating      → completed         driven by POST /api/topics/{id}/verdict
+//	                                  (judge submits; handler flips status).
+//	                                  The "judging" sub-state is implicit:
+//	                                  status==debating AND now>=debate_end_at.
+//
+// Per-topic transitions use SELECT FOR UPDATE so concurrent ticker
+// instances (or future replicas) don't double-fire.
+type Ticker struct {
+	db        *sqlx.DB
+	topics    *store.TopicStore
+	signups   *store.SignupStore
+	camps     *store.CampStore
+	rounds    *store.RoundStore
+	announcer *fabric.Announcer
+	interval  time.Duration
+	rng       *rand.Rand
+}
+
+func NewTicker(
+	db *sqlx.DB,
+	topics *store.TopicStore,
+	signups *store.SignupStore,
+	camps *store.CampStore,
+	rounds *store.RoundStore,
+	announcer *fabric.Announcer,
+	interval time.Duration,
+) *Ticker {
+	if interval <= 0 {
+		interval = 15 * time.Second
+	}
+	return &Ticker{
+		db:        db,
+		topics:    topics,
+		signups:   signups,
+		camps:     camps,
+		rounds:    rounds,
+		announcer: announcer,
+		interval:  interval,
+		rng:       rand.New(rand.NewSource(time.Now().UnixNano())),
+	}
+}
+
+// Run blocks until ctx is cancelled. Caller goroutines it.
+func (t *Ticker) Run(ctx context.Context) {
+	log.Printf("orchestrator: ticker started (interval=%s, announce=%v)", t.interval, t.announcer.Enabled())
+	tk := time.NewTicker(t.interval)
+	defer tk.Stop()
+	// First tick immediately so startup is responsive — don't wait
+	// 15s for the first scan.
+	t.tickOnce(ctx)
+	for {
+		select {
+		case <-ctx.Done():
+			log.Printf("orchestrator: ticker stopped")
+			return
+		case <-tk.C:
+			t.tickOnce(ctx)
+		}
+	}
+}
+
+// tickOnce scans + applies. Errors are logged per topic; one topic
+// failing doesn't stall the others.
+func (t *Ticker) tickOnce(ctx context.Context) {
+	now := time.Now()
+
+	// 1. created → signup_open
+	if err := t.transitionByStatus(ctx, now,
+		models.TopicStatusCreated, "signup_open_at",
+		func(ctx context.Context, tx *sqlx.Tx, topicID string) error {
+			topic, err := t.topics.GetByID(ctx, topicID)
+			if err != nil {
+				return err
+			}
+			if _, err := tx.ExecContext(ctx,
+				`UPDATE topics SET status = ? WHERE id = ?`,
+				models.TopicStatusSignupOpen, topicID); err != nil {
+				return err
+			}
+			// Announcement is best-effort, outside the tx (network call).
+			go t.broadcastAnnouncement(topic)
+			return nil
+		}); err != nil {
+		log.Printf("orchestrator: created→signup_open scan: %v", err)
+	}
+
+	// 2. signup_open → signup_closed | cancelled
+	if err := t.transitionByStatus(ctx, now,
+		models.TopicStatusSignupOpen, "signup_close_at",
+		func(ctx context.Context, tx *sqlx.Tx, topicID string) error {
+			signups, err := t.signups.ListByTopic(ctx, topicID)
+			if err != nil {
+				return err
+			}
+			res := Allocate(signups, t.rng)
+			if res.CancelReason != "" {
+				_, err := tx.ExecContext(ctx,
+					`UPDATE topics SET status = ?, cancelled_reason = ? WHERE id = ?`,
+					models.TopicStatusCancelled, res.CancelReason, topicID)
+				log.Printf("orchestrator: topic %s cancelled at signup_close: %s",
+					topicID, res.CancelReason)
+				return err
+			}
+			if err := t.camps.WriteAllocation(ctx, tx, topicID, res.Allocation); err != nil {
+				return err
+			}
+			_, err = tx.ExecContext(ctx,
+				`UPDATE topics SET status = ? WHERE id = ?`,
+				models.TopicStatusSignupClosed, topicID)
+			log.Printf("orchestrator: topic %s allocated pro=%s con=%s judge=%s",
+				topicID,
+				res.Allocation[models.CampPro], res.Allocation[models.CampCon], res.Allocation[models.CampJudge])
+			return err
+		}); err != nil {
+		log.Printf("orchestrator: signup_open→signup_closed scan: %v", err)
+	}
+
+	// 3. signup_closed → debating (opens round 0)
+	if err := t.transitionByStatus(ctx, now,
+		models.TopicStatusSignupClosed, "debate_start_at",
+		func(ctx context.Context, tx *sqlx.Tx, topicID string) error {
+			if _, err := tx.ExecContext(ctx,
+				`UPDATE topics SET status = ? WHERE id = ?`,
+				models.TopicStatusDebating, topicID); err != nil {
+				return err
+			}
+			// Round 0 inserted within the tx — if commit fails we don't
+			// leak a half-state.
+			_, err := tx.ExecContext(ctx,
+				`INSERT INTO rounds (id, topic_id, round_no) VALUES (UUID(), ?, 0)`,
+				topicID)
+			log.Printf("orchestrator: topic %s entered debating; round 0 opened", topicID)
+			return err
+		}); err != nil {
+		log.Printf("orchestrator: signup_closed→debating scan: %v", err)
+	}
+
+	// Note: there's no explicit `debating → judging` transition in v1.
+	// The verdict handler enforces "status==debating AND now>=debate_end_at"
+	// as its preconditions; that's equivalent to a "judging" gate without
+	// adding a new enum value. Migration 002 will introduce the explicit
+	// 'judging' state when we want richer UI (e.g. "Awaiting verdict"
+	// distinct from "In debate"); until then this comment serves as the
+	// state-machine documentation for future maintainers.
+}
+
+// transitionByStatus is the shared "scan + per-row tx + apply" pattern.
+// Picks all topics in `currentStatus` whose `dueColumn` <= now, opens a
+// tx with SELECT FOR UPDATE, re-checks status (someone else may have
+// already moved it), calls apply, commits. Errors per topic logged.
+func (t *Ticker) transitionByStatus(ctx context.Context, now time.Time,
+	currentStatus models.TopicStatus, dueColumn string,
+	apply func(context.Context, *sqlx.Tx, string) error) error {
+
+	// Pull candidate IDs first (no lock); we lock per row inside the loop.
+	var ids []string
+	q := "SELECT id FROM topics WHERE status = ? AND " + dueColumn + " <= ? LIMIT 50"
+	if err := t.db.SelectContext(ctx, &ids, q, currentStatus, now); err != nil {
+		return err
+	}
+	for _, id := range ids {
+		if err := t.applyOne(ctx, id, currentStatus, apply); err != nil {
+			log.Printf("orchestrator: apply topic=%s: %v", id, err)
+		}
+	}
+	return nil
+}
+
+func (t *Ticker) applyOne(ctx context.Context, topicID string,
+	expected models.TopicStatus,
+	apply func(context.Context, *sqlx.Tx, string) error) error {
+
+	tx, err := t.db.BeginTxx(ctx, nil)
+	if err != nil {
+		return err
+	}
+	defer func() { _ = tx.Rollback() }() // safe no-op after commit
+
+	var actual models.TopicStatus
+	if err := tx.GetContext(ctx, &actual,
+		`SELECT status FROM topics WHERE id = ? FOR UPDATE`, topicID); err != nil {
+		return err
+	}
+	if actual != expected {
+		// Already transitioned by some other process — skip.
+		return nil
+	}
+	if err := apply(ctx, tx, topicID); err != nil {
+		return err
+	}
+	return tx.Commit()
+}
+
+func (t *Ticker) broadcastAnnouncement(topic *models.Topic) {
+	if topic == nil {
+		return
+	}
+	if err := t.announcer.PostTopicAnnouncement(
+		context.Background(),
+		topic.ID, topic.Title, topic.Summary,
+		topic.SignupOpenAt, topic.SignupCloseAt,
+		topic.DebateStartAt, topic.DebateEndAt,
+		topic.VerdictSchemaID,
+	); err != nil {
+		log.Printf("orchestrator: announce topic=%s failed: %v", topic.ID, err)
+	}
+}
+