# INSTRUMENT-AUSNAHME: Victor-Go — Mix-Only aus bestehenden Clips, kein API-Call
"""
Liest watson_demo_clips/d01_*.mp3 … d53_*.mp3 + *.timestamps.json
Wendet SSML-Filter-Trim an (Rat-der-Weisen-Fix 2026-05-31)
Exportiert watson_mathe.mp3
"""
import json
from pathlib import Path
from pydub import AudioSegment, effects

CLIPS  = Path("/Users/victorholland/Vibe Coding/dispatcher/cockpit/watson_demo_clips")
OUT    = CLIPS / "watson_mathe.mp3"
LOG    = Path("/tmp/watson_mix_only.log")

DIALOG = [
    ("d01","holmes"), ("d02","watson"), ("d03","holmes"), ("d04","watson"),
    ("d05","holmes"), ("d06","holmes"), ("d07","watson"), ("d08","holmes"),
    ("d09","watson"), ("d10","holmes"), ("d11","watson"), ("d12","holmes"),
    ("d13","watson"), ("d14","holmes"), ("d15","watson"), ("d16","holmes"),
    ("d17","holmes"), ("d18","watson"), ("d19","holmes"), ("d20","watson"),
    ("d21","holmes"), ("d22","watson"), ("d23","holmes"), ("d24","watson"),
    ("d25","holmes"), ("d26","watson"), ("d27","holmes"), ("d28","watson"),
    ("d29","holmes"), ("d30","watson"), ("d31","holmes"), ("d32","watson"),
    ("d33","holmes"), ("d34","watson"), ("d35","holmes"), ("d36","watson"),
    ("d37","holmes"), ("d38","watson"), ("d39","holmes"), ("d40","watson"),
    ("d41","holmes"), ("d42","watson"), ("d43","holmes"), ("d44","watson"),
    ("d45","holmes"), ("d46","watson"), ("d47","holmes"), ("d48","watson"),
    ("d49","holmes"), ("d50","watson"), ("d51","holmes"), ("d52","watson"),
    ("d53","holmes"),
]

LEAD_MS = 30
TAIL_MS = 200

# Stille nach einem Clip (ms) — dramatische Pausen, Denk-Momente, Themenübergänge
PAUSE_AFTER = {
    # Eröffnung
    "d01": 400,   # Holmes' Frage → Watson überlegt seine Antwort
    "d03": 300,   # "Sie haben sich um vierundzwanzig Schilling verrechnet." — Schock
    "d05": 700,   # "Das wird ein wenig dauern." → Holmes holt Luft vor Vorlesung

    # Lineare Funktionen
    "d08": 600,   # Rechenbeispiel → Watson verarbeitet die Zahlen
    "d10": 700,   # "Was kostet es dann?" → Watson denkt nach
    "d11": 200,   # "Zwei Schilling. Die Grundgebühr." → Holmes' Pause vor Lob
    "d13": 400,   # Watson versteht — Holmes lässt es kurz wirken
    "d15": 200,   # "Er hat mir einen Brief geschrieben?!" → Holmes schmunzelt

    # Themenübergang: Pythagoras
    "d16": 900,   # Langer Schnitt — neues Kapitel beginnt

    # Pythagoras
    "d17": 500,   # Holmes fragt nach Zirkus-Fall → Watson zögert
    "d19": 800,   # "Sie haben was getan?" → Watson peinliche Antwort
    "d21": 400,   # Holmes wiederholt trocken "Sie haben geschätzt."
    "d23": 600,   # Langer Pythagoras-Vortrag → Watson setzt an
    "d25": 500,   # "Nicht aus zwölf Metern" — Watson realisiert: falscher Mann

    # Themenübergang: LGS
    "d28": 900,   # Neues Kapitel

    # LGS
    "d29": 800,   # Holmes stellt Aufgabe → Watson rechnet im Kopf
    "d31": 400,   # "Richtig." — kurze Bestätigung, dann Watsons Einsicht
    "d33": 300,   # "Schnittpunkt ist der Tatort" — Watson lässt es sacken

    # Themenübergang: Textaufgaben
    "d35": 700,   # Neues Kapitel

    # Textaufgaben
    "d37": 700,   # Holmes nennt Textaufgabe → Watson richtet sich auf
    "d38": 300,   # Watson setzt Gleichung an → Holmes lakonisch "Weiter."
    "d39": 700,   # "Weiter." → Watson löst durch

    # Ausklang
    "d40": 900,   # Watson löst richtig → Holmes' seltenes "Watson."
    "d41": 500,   # "Watson." → "Holmes." — komödiantisches Timing
    "d42": 700,   # "Holmes." → "Das war ausgezeichnet." — Spannung vor Lob
    "d43": 700,   # "Das war ausgezeichnet." → Watson kann es nicht glauben
    "d44": 300,   # "War das ein Kompliment?" → Holmes' trockene Antwort
    "d45": 500,   # "Es war eine Beobachtung." → Watson schreibt ins Tagebuch
}

def log(msg):
    print(msg, flush=True)
    with LOG.open("a") as f:
        f.write(msg + "\n")

def get_speech_bounds(ts_path, audio_len_ms):
    try:
        alignment = json.loads(ts_path.read_text())
        chars  = alignment.get("characters", [])
        starts = alignment.get("character_start_times_seconds", [])
        ends   = alignment.get("character_end_times_seconds", [])
        if not chars:
            raise ValueError("leere Alignment-Daten")
        in_tag = False
        real = []
        for c, s, e in zip(chars, starts, ends):
            if c == '<':
                in_tag = True
            if not in_tag:
                real.append((c, s, e))
            if c == '>':
                in_tag = False
        if not real:
            raise ValueError("keine Sprachzeichen nach SSML-Filter")
        return int(real[0][1] * 1000), int(real[-1][2] * 1000)
    except Exception as e:
        log(f"    ⚠ Fallback {ts_path.name}: {e}")
        return 30, audio_len_ms - 30

LOG.write_text("")
log("=== Watson Mix (SSML-Filter-Fix) ===")

trimmed = []
for did, sp in DIALOG:
    mp3  = CLIPS / f"{did}_{sp}.mp3"
    ts   = CLIPS / f"{did}_{sp}.timestamps.json"
    if not mp3.exists():
        log(f"  ✗ {did} fehlt: {mp3.name}")
        continue
    raw  = AudioSegment.from_mp3(str(mp3))
    norm = effects.normalize(raw, headroom=1.0)
    s_ms, e_ms = get_speech_bounds(ts, len(norm))
    cut_start = max(0, s_ms - LEAD_MS)
    cut_end   = min(len(norm), e_ms + TAIL_MS)
    if cut_end - cut_start < 200:
        cut_start = 0
        cut_end   = len(norm)
        log(f"  {did} Sanity-Fallback → komplett")
    clipped = norm[cut_start:cut_end]
    log(f"  {did} ({sp:6s})  raw:{len(norm):5d}ms  cut:{cut_start:4d}–{cut_end:5d}ms  → {len(clipped):5d}ms")
    trimmed.append((did, clipped))

log(f"\n{len(trimmed)}/{len(DIALOG)} Clips geladen")
if not trimmed:
    raise SystemExit("Keine Clips.")

result = trimmed[0][1]
for i in range(1, len(trimmed)):
    did_prev = trimmed[i-1][0]
    _, seg   = trimmed[i]
    pause_ms = PAUSE_AFTER.get(did_prev, 0)
    if pause_ms > 0:
        silence = AudioSegment.silent(duration=pause_ms, frame_rate=44100)
        result = result + silence
    result = result.append(seg, crossfade=0)

log(f"\nGesamt: {len(result)/1000:.1f}s — Exportiere…")
result.export(str(OUT), format="mp3", bitrate="320k",
              tags={"title":"Holmes erklärt Watson Mathematik","artist":"Watson Demo"})
log(f"Fertig: {OUT.stat().st_size//1024} KB")
print(f"\n✓ {OUT.name}  {OUT.stat().st_size//1024} KB  {len(result)/1000:.1f}s")
