spych.cli
spych CLI entry point.
Usage:
spych
Examples: spych ollama --model llama3.2:latest spych --theme light claude_code_cli spych claude_code_sdk --setting-sources user project local spych codex_cli --listen-duration 8 spych gemini_cli spych opencode_cli --model anthropic/claude-sonnet-4-5
# Live transcription
spych live
spych live --output-path my_transcript --output-format srt
spych live --stop-key q --terminate-words "stop recording"
spych live --no-timestamps --whisper-model small.en
# Multi-agent: run several agents under different wake words at once
spych multi --agents claude_code_sdk ollama --ollama-model llama3.2:latest
1""" 2spych CLI entry point. 3 4Usage: 5 spych <agent> [options] 6 7Examples: 8 spych ollama --model llama3.2:latest 9 spych --theme light claude_code_cli 10 spych claude_code_sdk --setting-sources user project local 11 spych codex_cli --listen-duration 8 12 spych gemini_cli 13 spych opencode_cli --model anthropic/claude-sonnet-4-5 14 15 # Live transcription 16 spych live 17 spych live --output-path my_transcript --output-format srt 18 spych live --stop-key q --terminate-words "stop recording" 19 spych live --no-timestamps --whisper-model small.en 20 21 # Multi-agent: run several agents under different wake words at once 22 spych multi --agents claude_code_sdk ollama --ollama-model llama3.2:latest 23""" 24 25import argparse 26import sys 27 28 29def _parse_bool(value: str) -> bool: 30 if value.lower() in ("true", "1", "yes"): 31 return True 32 if value.lower() in ("false", "0", "no"): 33 return False 34 raise argparse.ArgumentTypeError(f"Boolean value expected, got: {value!r}") 35 36 37def _add_shared_args(parser: argparse.ArgumentParser) -> None: 38 """Args shared by all agents.""" 39 parser.add_argument( 40 "--name", 41 metavar="NAME", 42 help="Custom display name for the agent", 43 ) 44 parser.add_argument( 45 "--wake-words", 46 nargs="+", 47 metavar="WORD", 48 help="One or more wake words that trigger the agent", 49 ) 50 parser.add_argument( 51 "--terminate-words", 52 nargs="+", 53 metavar="WORD", 54 default=["terminate"], 55 help="Words that stop the listener (default: terminate)", 56 ) 57 parser.add_argument( 58 "--listen-duration", 59 type=float, 60 metavar="SECONDS", 61 help="Seconds to listen after wake word (default: 5)", 62 ) 63 64 65def _add_agent_args(parser: argparse.ArgumentParser) -> None: 66 """Args shared by all coding agents (non-Ollama).""" 67 parser.add_argument( 68 "--continue-conversation", 69 type=_parse_bool, 70 metavar="BOOL", 71 default=True, 72 help="Resume the most recent session (default: true)", 73 ) 74 parser.add_argument( 75 "--show-tool-events", 76 type=_parse_bool, 77 metavar="BOOL", 78 default=True, 79 help="Print live tool start/end events (default: true)", 80 ) 81 82 83def _build_shared_kwargs(args: argparse.Namespace) -> dict: 84 kwargs = {} 85 if args.name is not None: 86 kwargs["name"] = args.name 87 if args.wake_words: 88 kwargs["wake_words"] = args.wake_words 89 if args.terminate_words: 90 kwargs["terminate_words"] = args.terminate_words 91 if args.listen_duration is not None: 92 kwargs["listen_duration"] = args.listen_duration 93 return kwargs 94 95 96def _build_agent_kwargs(args: argparse.Namespace) -> dict: 97 kwargs = _build_shared_kwargs(args) 98 kwargs["continue_conversation"] = args.continue_conversation 99 kwargs["show_tool_events"] = args.show_tool_events 100 return kwargs 101 102 103def main(): 104 parser = argparse.ArgumentParser( 105 prog="spych", 106 description="Launch a voice agent from the terminal.", 107 formatter_class=argparse.RawDescriptionHelpFormatter, 108 epilog=__doc__, 109 ) 110 111 parser.add_argument( 112 "--theme", 113 default="dark", 114 choices=["dark", "light", "solarized", "mono"], 115 metavar="THEME", 116 help=( 117 "Colour theme for terminal output. " 118 "Choices: dark (default), light, solarized, mono" 119 ), 120 ) 121 122 subparsers = parser.add_subparsers(dest="agent", metavar="agent") 123 subparsers.required = True 124 125 # Aliases → canonical name; used to normalise args.agent after parsing. 126 _AGENT_ALIASES: dict[str, str] = { 127 "claude": "claude_code_cli", 128 "codex": "codex_cli", 129 "gemini": "gemini_cli", 130 "opencode": "opencode_cli", 131 } 132 133 # ------------------------------------------------------------------ # 134 # ollama # 135 # ------------------------------------------------------------------ # 136 p_ollama = subparsers.add_parser( 137 "ollama", help="Talk to a local Ollama model" 138 ) 139 _add_shared_args(p_ollama) 140 p_ollama.add_argument( 141 "--model", 142 default="llama3.2:latest", 143 metavar="MODEL", 144 help="Ollama model name (default: llama3.2:latest)", 145 ) 146 p_ollama.add_argument( 147 "--history-length", 148 type=int, 149 default=10, 150 metavar="N", 151 help="Past interactions to include in context (default: 10)", 152 ) 153 p_ollama.add_argument( 154 "--host", 155 default="http://localhost:11434", 156 metavar="URL", 157 help="Ollama instance URL (default: http://localhost:11434)", 158 ) 159 160 # ------------------------------------------------------------------ # 161 # claude_code_cli # 162 # ------------------------------------------------------------------ # 163 p_claude_cli = subparsers.add_parser( 164 "claude_code_cli", 165 aliases=["claude"], 166 help="Voice-control Claude Code via the CLI", 167 ) 168 _add_shared_args(p_claude_cli) 169 _add_agent_args(p_claude_cli) 170 171 # ------------------------------------------------------------------ # 172 # claude_code_sdk # 173 # ------------------------------------------------------------------ # 174 p_claude_sdk = subparsers.add_parser( 175 "claude_code_sdk", 176 help="Voice-control Claude Code via the Agent SDK", 177 ) 178 _add_shared_args(p_claude_sdk) 179 _add_agent_args(p_claude_sdk) 180 p_claude_sdk.add_argument( 181 "--setting-sources", 182 nargs="+", 183 metavar="SOURCE", 184 default=["user", "project", "local"], 185 help="Claude Code settings sources to load (default: user project local)", 186 ) 187 188 # ------------------------------------------------------------------ # 189 # codex_cli # 190 # ------------------------------------------------------------------ # 191 p_codex = subparsers.add_parser( 192 "codex_cli", 193 aliases=["codex"], 194 help="Voice-control the OpenAI Codex agent", 195 ) 196 _add_shared_args(p_codex) 197 _add_agent_args(p_codex) 198 199 # ------------------------------------------------------------------ # 200 # gemini_cli # 201 # ------------------------------------------------------------------ # 202 p_gemini = subparsers.add_parser( 203 "gemini_cli", 204 aliases=["gemini"], 205 help="Voice-control the Google Gemini agent", 206 ) 207 _add_shared_args(p_gemini) 208 _add_agent_args(p_gemini) 209 210 # ------------------------------------------------------------------ # 211 # opencode_cli # 212 # ------------------------------------------------------------------ # 213 p_opencode = subparsers.add_parser( 214 "opencode_cli", 215 aliases=["opencode"], 216 help="Voice-control the OpenCode agent", 217 ) 218 _add_shared_args(p_opencode) 219 _add_agent_args(p_opencode) 220 p_opencode.add_argument( 221 "--model", 222 default=None, 223 metavar="MODEL", 224 help="Model in provider/model format, e.g. anthropic/claude-sonnet-4-5", 225 ) 226 227 # ------------------------------------------------------------------ # 228 # live — continuous transcription to file # 229 # ------------------------------------------------------------------ # 230 p_live = subparsers.add_parser( 231 "live", 232 help="Continuously transcribe speech to .txt and/or .srt files", 233 formatter_class=argparse.RawDescriptionHelpFormatter, 234 description=( 235 "Start a live transcription session. Records continuously using VAD\n" 236 "and writes output to disk in real time.\n\n" 237 "Stop by pressing the stop key (default: q + Enter), saying a\n" 238 "terminate word, or pressing Ctrl+C." 239 ), 240 ) 241 p_live.add_argument( 242 "--output-path", 243 default="transcript", 244 metavar="PATH", 245 help="Base output file path without extension (default: transcript)", 246 ) 247 p_live.add_argument( 248 "--output-format", 249 default="srt", 250 choices=["txt", "srt", "both"], 251 metavar="FORMAT", 252 help="Output format: txt, srt, or both (default: both)", 253 ) 254 p_live.add_argument( 255 "--no-timestamps", 256 action="store_true", 257 help="Omit timestamps from terminal and .txt output", 258 ) 259 p_live.add_argument( 260 "--stop-key", 261 default="q", 262 metavar="KEY", 263 help="Key to type (then Enter) to stop the session (default: q)", 264 ) 265 p_live.add_argument( 266 "--terminate-words", 267 nargs="+", 268 metavar="WORD", 269 help="Spoken words that stop the session (e.g. 'stop recording')", 270 ) 271 p_live.add_argument( 272 "--device-index", 273 type=int, 274 default=-1, 275 metavar="N", 276 help="Microphone device index; -1 uses system default (default: -1)", 277 ) 278 p_live.add_argument( 279 "--whisper-model", 280 default="base.en", 281 metavar="MODEL", 282 help="faster-whisper model name (default: base.en)", 283 ) 284 p_live.add_argument( 285 "--whisper-device", 286 default="cpu", 287 choices=["cpu", "cuda"], 288 metavar="DEVICE", 289 help="Device for whisper inference: cpu or cuda (default: cpu)", 290 ) 291 p_live.add_argument( 292 "--whisper-compute-type", 293 default="int8", 294 choices=["int8", "float16", "float32"], 295 metavar="TYPE", 296 help="Compute type for whisper: int8, float16, float32 (default: int8)", 297 ) 298 p_live.add_argument( 299 "--no-speech-threshold", 300 type=float, 301 default=0.3, 302 metavar="FLOAT", 303 help="Whisper no_speech_prob cutoff — segments above this are dropped (default: 0.3)", 304 ) 305 p_live.add_argument( 306 "--speech-threshold", 307 type=float, 308 default=0.5, 309 metavar="FLOAT", 310 help="VAD speech onset probability (default: 0.5)", 311 ) 312 p_live.add_argument( 313 "--silence-threshold", 314 type=float, 315 default=0.35, 316 metavar="FLOAT", 317 help="VAD silence probability during speech (default: 0.35)", 318 ) 319 p_live.add_argument( 320 "--silence-frames", 321 type=int, 322 default=20, 323 metavar="N", 324 help="Consecutive silent frames required to end a segment (~32ms each, default: 20)", 325 ) 326 p_live.add_argument( 327 "--speech-pad-frames", 328 type=int, 329 default=5, 330 metavar="N", 331 help="Pre-roll frames and onset confirmation count (default: 5)", 332 ) 333 p_live.add_argument( 334 "--max-speech-duration", 335 type=float, 336 default=30.0, 337 metavar="SECONDS", 338 help="Hard cap on a single segment in seconds (default: 30.0)", 339 ) 340 p_live.add_argument( 341 "--context-words", 342 type=int, 343 default=32, 344 metavar="N", 345 help="Trailing words passed as whisper initial_prompt for context (default: 32)", 346 ) 347 348 # ------------------------------------------------------------------ # 349 p_multi = subparsers.add_parser( 350 "multi", 351 help="Run multiple agents simultaneously under different wake words", 352 formatter_class=argparse.RawDescriptionHelpFormatter, 353 description=( 354 "Run several agents at once. Each agent uses its own default wake " 355 "words unless overridden.\n\n" 356 "Example:\n" 357 " spych multi --agents claude_code_cli gemini_cli\n" 358 " spych multi --agents claude_code_cli ollama --ollama-model llama3.2:latest\n" 359 " spych multi --agents claude_code_sdk codex_cli --listen-duration 8" 360 ), 361 ) 362 p_multi.add_argument( 363 "--agents", 364 nargs="+", 365 required=True, 366 metavar="AGENT", 367 choices=[ 368 "claude_code_cli", 369 "claude", 370 "claude_code_sdk", 371 "claude_sdk", 372 "codex_cli", 373 "codex", 374 "gemini_cli", 375 "gemini", 376 "opencode_cli", 377 "opencode", 378 "ollama", 379 ], 380 help=( 381 "Agents to run. Choices: claude (claude_code_cli), " 382 "claude_sdk (claude_code_sdk), codex (codex_cli), " 383 "gemini (gemini_cli), opencode (opencode_cli), ollama" 384 ), 385 ) 386 p_multi.add_argument( 387 "--terminate-words", 388 nargs="+", 389 metavar="WORD", 390 default=["terminate"], 391 help="Words that stop all agents (default: terminate)", 392 ) 393 p_multi.add_argument( 394 "--listen-duration", 395 type=float, 396 default=5, 397 metavar="SECONDS", 398 help="Seconds to listen after a wake word (default: 5)", 399 ) 400 p_multi.add_argument( 401 "--continue-conversation", 402 type=_parse_bool, 403 default=True, 404 metavar="BOOL", 405 help="Resume most recent session for each coding agent (default: true)", 406 ) 407 p_multi.add_argument( 408 "--show-tool-events", 409 type=_parse_bool, 410 default=True, 411 metavar="BOOL", 412 help="Print live tool start/end events (default: true)", 413 ) 414 # ollama-specific flags (only used when 'ollama' is in --agents) 415 p_multi.add_argument( 416 "--ollama-model", 417 default="llama3.2:latest", 418 metavar="MODEL", 419 help="Ollama model (default: llama3.2:latest). Only used when ollama is in --agents.", 420 ) 421 p_multi.add_argument( 422 "--ollama-host", 423 default="http://localhost:11434", 424 metavar="URL", 425 help="Ollama instance URL (default: http://localhost:11434). Only used when ollama is in --agents.", 426 ) 427 p_multi.add_argument( 428 "--ollama-history-length", 429 type=int, 430 default=10, 431 metavar="N", 432 help="Ollama context history length (default: 10). Only used when ollama is in --agents.", 433 ) 434 # opencode-specific flag 435 p_multi.add_argument( 436 "--opencode-model", 437 default=None, 438 metavar="MODEL", 439 help="OpenCode model in provider/model format. Only used when opencode_cli is in --agents.", 440 ) 441 # claude_code_sdk-specific flag 442 p_multi.add_argument( 443 "--setting-sources", 444 nargs="+", 445 metavar="SOURCE", 446 default=["user", "project", "local"], 447 help="Claude Code SDK setting sources (default: user project local). Only used when claude_code_sdk is in --agents.", 448 ) 449 450 # ------------------------------------------------------------------ # 451 # Dispatch # 452 # ------------------------------------------------------------------ # 453 args = parser.parse_args() 454 455 # Normalise any alias back to the canonical agent name so the dispatch 456 # block below only needs to handle one name per agent. 457 args.agent = _AGENT_ALIASES.get(args.agent, args.agent) 458 459 # Apply color theme as early as possible so all subsequent output uses it. 460 if args.theme != "dark": 461 from spych.cli_tools import set_theme 462 463 set_theme(args.theme) 464 465 # ------------------------------------------------------------------ # 466 # Single-agent dispatch # 467 # ------------------------------------------------------------------ # 468 if args.agent == "ollama": 469 from spych.agents import ollama 470 471 kwargs = _build_shared_kwargs(args) 472 kwargs["model"] = args.model 473 kwargs["history_length"] = args.history_length 474 kwargs["host"] = args.host 475 ollama(**kwargs) 476 477 elif args.agent == "claude_code_cli": 478 from spych.agents import claude_code_cli 479 480 claude_code_cli(**_build_agent_kwargs(args)) 481 482 elif args.agent == "claude_code_sdk": 483 from spych.agents import claude_code_sdk 484 485 kwargs = _build_agent_kwargs(args) 486 kwargs["setting_sources"] = args.setting_sources 487 claude_code_sdk(**kwargs) 488 489 elif args.agent == "codex_cli": 490 from spych.agents import codex_cli 491 492 codex_cli(**_build_agent_kwargs(args)) 493 494 elif args.agent == "gemini_cli": 495 from spych.agents import gemini_cli 496 497 gemini_cli(**_build_agent_kwargs(args)) 498 499 elif args.agent == "opencode_cli": 500 from spych.agents import opencode_cli 501 502 kwargs = _build_agent_kwargs(args) 503 if args.model is not None: 504 kwargs["model"] = args.model 505 opencode_cli(**kwargs) 506 507 elif args.agent == "live": 508 from spych.live import SpychLive 509 510 SpychLive( 511 output_format=args.output_format, 512 output_path=args.output_path, 513 show_timestamps=not args.no_timestamps, 514 stop_key=args.stop_key, 515 terminate_words=args.terminate_words, 516 device_index=args.device_index, 517 whisper_model=args.whisper_model, 518 whisper_device=args.whisper_device, 519 whisper_compute_type=args.whisper_compute_type, 520 no_speech_threshold=args.no_speech_threshold, 521 speech_threshold=args.speech_threshold, 522 silence_threshold=args.silence_threshold, 523 silence_frames_threshold=args.silence_frames, 524 speech_pad_frames=args.speech_pad_frames, 525 max_speech_duration_s=args.max_speech_duration, 526 context_words=args.context_words, 527 ).start() 528 529 # ------------------------------------------------------------------ # 530 # Multi-agent dispatch # 531 # ------------------------------------------------------------------ # 532 elif args.agent == "multi": 533 from spych.core import Spych 534 from spych.orchestrator import SpychOrchestrator 535 536 # A single Spych transcription object shared by all responders. 537 spych_object = Spych(whisper_model="base.en") 538 539 entries = [] 540 541 for agent_name in [_AGENT_ALIASES.get(a, a) for a in args.agents]: 542 if agent_name == "claude_code_cli": 543 from spych.agents.claude import LocalClaudeCodeCLIResponder 544 545 entries.append( 546 { 547 "responder": LocalClaudeCodeCLIResponder( 548 spych_object=spych_object, 549 continue_conversation=args.continue_conversation, 550 listen_duration=args.listen_duration, 551 show_tool_events=args.show_tool_events, 552 ), 553 "wake_words": ["claude", "clod", "cloud", "clawed"], 554 "terminate_words": args.terminate_words, 555 } 556 ) 557 558 elif agent_name == "claude_code_sdk": 559 from spych.agents.claude import LocalClaudeCodeSDKResponder 560 561 entries.append( 562 { 563 "responder": LocalClaudeCodeSDKResponder( 564 spych_object=spych_object, 565 continue_conversation=args.continue_conversation, 566 listen_duration=args.listen_duration, 567 setting_sources=args.setting_sources, 568 show_tool_events=args.show_tool_events, 569 ), 570 "wake_words": ["claude", "clod", "cloud", "clawed"], 571 "terminate_words": args.terminate_words, 572 } 573 ) 574 575 elif agent_name == "codex_cli": 576 from spych.agents.codex import LocalCodexCLIResponder 577 578 entries.append( 579 { 580 "responder": LocalCodexCLIResponder( 581 spych_object=spych_object, 582 continue_conversation=args.continue_conversation, 583 listen_duration=args.listen_duration, 584 show_tool_events=args.show_tool_events, 585 ), 586 "wake_words": ["codex"], 587 "terminate_words": args.terminate_words, 588 } 589 ) 590 591 elif agent_name == "gemini_cli": 592 from spych.agents.gemini import LocalGeminiCLIResponder 593 594 entries.append( 595 { 596 "responder": LocalGeminiCLIResponder( 597 spych_object=spych_object, 598 continue_conversation=args.continue_conversation, 599 listen_duration=args.listen_duration, 600 show_tool_events=args.show_tool_events, 601 ), 602 "wake_words": ["gemini"], 603 "terminate_words": args.terminate_words, 604 } 605 ) 606 607 elif agent_name == "opencode_cli": 608 from spych.agents.opencode import LocalOpenCodeCLIResponder 609 610 entries.append( 611 { 612 "responder": LocalOpenCodeCLIResponder( 613 spych_object=spych_object, 614 continue_conversation=args.continue_conversation, 615 listen_duration=args.listen_duration, 616 show_tool_events=args.show_tool_events, 617 model=args.opencode_model, 618 ), 619 "wake_words": ["opencode", "open code"], 620 "terminate_words": args.terminate_words, 621 } 622 ) 623 624 elif agent_name == "ollama": 625 from spych.agents.ollama import OllamaResponder 626 627 entries.append( 628 { 629 "responder": OllamaResponder( 630 spych_object=spych_object, 631 model=args.ollama_model, 632 history_length=args.ollama_history_length, 633 host=args.ollama_host, 634 listen_duration=args.listen_duration, 635 ), 636 "wake_words": ["llama", "ollama", "lama"], 637 "terminate_words": args.terminate_words, 638 } 639 ) 640 641 SpychOrchestrator(entries=entries).start() 642 643 else: 644 parser.print_help() 645 sys.exit(1) 646 647 648if __name__ == "__main__": 649 main()
def
main():
104def main(): 105 parser = argparse.ArgumentParser( 106 prog="spych", 107 description="Launch a voice agent from the terminal.", 108 formatter_class=argparse.RawDescriptionHelpFormatter, 109 epilog=__doc__, 110 ) 111 112 parser.add_argument( 113 "--theme", 114 default="dark", 115 choices=["dark", "light", "solarized", "mono"], 116 metavar="THEME", 117 help=( 118 "Colour theme for terminal output. " 119 "Choices: dark (default), light, solarized, mono" 120 ), 121 ) 122 123 subparsers = parser.add_subparsers(dest="agent", metavar="agent") 124 subparsers.required = True 125 126 # Aliases → canonical name; used to normalise args.agent after parsing. 127 _AGENT_ALIASES: dict[str, str] = { 128 "claude": "claude_code_cli", 129 "codex": "codex_cli", 130 "gemini": "gemini_cli", 131 "opencode": "opencode_cli", 132 } 133 134 # ------------------------------------------------------------------ # 135 # ollama # 136 # ------------------------------------------------------------------ # 137 p_ollama = subparsers.add_parser( 138 "ollama", help="Talk to a local Ollama model" 139 ) 140 _add_shared_args(p_ollama) 141 p_ollama.add_argument( 142 "--model", 143 default="llama3.2:latest", 144 metavar="MODEL", 145 help="Ollama model name (default: llama3.2:latest)", 146 ) 147 p_ollama.add_argument( 148 "--history-length", 149 type=int, 150 default=10, 151 metavar="N", 152 help="Past interactions to include in context (default: 10)", 153 ) 154 p_ollama.add_argument( 155 "--host", 156 default="http://localhost:11434", 157 metavar="URL", 158 help="Ollama instance URL (default: http://localhost:11434)", 159 ) 160 161 # ------------------------------------------------------------------ # 162 # claude_code_cli # 163 # ------------------------------------------------------------------ # 164 p_claude_cli = subparsers.add_parser( 165 "claude_code_cli", 166 aliases=["claude"], 167 help="Voice-control Claude Code via the CLI", 168 ) 169 _add_shared_args(p_claude_cli) 170 _add_agent_args(p_claude_cli) 171 172 # ------------------------------------------------------------------ # 173 # claude_code_sdk # 174 # ------------------------------------------------------------------ # 175 p_claude_sdk = subparsers.add_parser( 176 "claude_code_sdk", 177 help="Voice-control Claude Code via the Agent SDK", 178 ) 179 _add_shared_args(p_claude_sdk) 180 _add_agent_args(p_claude_sdk) 181 p_claude_sdk.add_argument( 182 "--setting-sources", 183 nargs="+", 184 metavar="SOURCE", 185 default=["user", "project", "local"], 186 help="Claude Code settings sources to load (default: user project local)", 187 ) 188 189 # ------------------------------------------------------------------ # 190 # codex_cli # 191 # ------------------------------------------------------------------ # 192 p_codex = subparsers.add_parser( 193 "codex_cli", 194 aliases=["codex"], 195 help="Voice-control the OpenAI Codex agent", 196 ) 197 _add_shared_args(p_codex) 198 _add_agent_args(p_codex) 199 200 # ------------------------------------------------------------------ # 201 # gemini_cli # 202 # ------------------------------------------------------------------ # 203 p_gemini = subparsers.add_parser( 204 "gemini_cli", 205 aliases=["gemini"], 206 help="Voice-control the Google Gemini agent", 207 ) 208 _add_shared_args(p_gemini) 209 _add_agent_args(p_gemini) 210 211 # ------------------------------------------------------------------ # 212 # opencode_cli # 213 # ------------------------------------------------------------------ # 214 p_opencode = subparsers.add_parser( 215 "opencode_cli", 216 aliases=["opencode"], 217 help="Voice-control the OpenCode agent", 218 ) 219 _add_shared_args(p_opencode) 220 _add_agent_args(p_opencode) 221 p_opencode.add_argument( 222 "--model", 223 default=None, 224 metavar="MODEL", 225 help="Model in provider/model format, e.g. anthropic/claude-sonnet-4-5", 226 ) 227 228 # ------------------------------------------------------------------ # 229 # live — continuous transcription to file # 230 # ------------------------------------------------------------------ # 231 p_live = subparsers.add_parser( 232 "live", 233 help="Continuously transcribe speech to .txt and/or .srt files", 234 formatter_class=argparse.RawDescriptionHelpFormatter, 235 description=( 236 "Start a live transcription session. Records continuously using VAD\n" 237 "and writes output to disk in real time.\n\n" 238 "Stop by pressing the stop key (default: q + Enter), saying a\n" 239 "terminate word, or pressing Ctrl+C." 240 ), 241 ) 242 p_live.add_argument( 243 "--output-path", 244 default="transcript", 245 metavar="PATH", 246 help="Base output file path without extension (default: transcript)", 247 ) 248 p_live.add_argument( 249 "--output-format", 250 default="srt", 251 choices=["txt", "srt", "both"], 252 metavar="FORMAT", 253 help="Output format: txt, srt, or both (default: both)", 254 ) 255 p_live.add_argument( 256 "--no-timestamps", 257 action="store_true", 258 help="Omit timestamps from terminal and .txt output", 259 ) 260 p_live.add_argument( 261 "--stop-key", 262 default="q", 263 metavar="KEY", 264 help="Key to type (then Enter) to stop the session (default: q)", 265 ) 266 p_live.add_argument( 267 "--terminate-words", 268 nargs="+", 269 metavar="WORD", 270 help="Spoken words that stop the session (e.g. 'stop recording')", 271 ) 272 p_live.add_argument( 273 "--device-index", 274 type=int, 275 default=-1, 276 metavar="N", 277 help="Microphone device index; -1 uses system default (default: -1)", 278 ) 279 p_live.add_argument( 280 "--whisper-model", 281 default="base.en", 282 metavar="MODEL", 283 help="faster-whisper model name (default: base.en)", 284 ) 285 p_live.add_argument( 286 "--whisper-device", 287 default="cpu", 288 choices=["cpu", "cuda"], 289 metavar="DEVICE", 290 help="Device for whisper inference: cpu or cuda (default: cpu)", 291 ) 292 p_live.add_argument( 293 "--whisper-compute-type", 294 default="int8", 295 choices=["int8", "float16", "float32"], 296 metavar="TYPE", 297 help="Compute type for whisper: int8, float16, float32 (default: int8)", 298 ) 299 p_live.add_argument( 300 "--no-speech-threshold", 301 type=float, 302 default=0.3, 303 metavar="FLOAT", 304 help="Whisper no_speech_prob cutoff — segments above this are dropped (default: 0.3)", 305 ) 306 p_live.add_argument( 307 "--speech-threshold", 308 type=float, 309 default=0.5, 310 metavar="FLOAT", 311 help="VAD speech onset probability (default: 0.5)", 312 ) 313 p_live.add_argument( 314 "--silence-threshold", 315 type=float, 316 default=0.35, 317 metavar="FLOAT", 318 help="VAD silence probability during speech (default: 0.35)", 319 ) 320 p_live.add_argument( 321 "--silence-frames", 322 type=int, 323 default=20, 324 metavar="N", 325 help="Consecutive silent frames required to end a segment (~32ms each, default: 20)", 326 ) 327 p_live.add_argument( 328 "--speech-pad-frames", 329 type=int, 330 default=5, 331 metavar="N", 332 help="Pre-roll frames and onset confirmation count (default: 5)", 333 ) 334 p_live.add_argument( 335 "--max-speech-duration", 336 type=float, 337 default=30.0, 338 metavar="SECONDS", 339 help="Hard cap on a single segment in seconds (default: 30.0)", 340 ) 341 p_live.add_argument( 342 "--context-words", 343 type=int, 344 default=32, 345 metavar="N", 346 help="Trailing words passed as whisper initial_prompt for context (default: 32)", 347 ) 348 349 # ------------------------------------------------------------------ # 350 p_multi = subparsers.add_parser( 351 "multi", 352 help="Run multiple agents simultaneously under different wake words", 353 formatter_class=argparse.RawDescriptionHelpFormatter, 354 description=( 355 "Run several agents at once. Each agent uses its own default wake " 356 "words unless overridden.\n\n" 357 "Example:\n" 358 " spych multi --agents claude_code_cli gemini_cli\n" 359 " spych multi --agents claude_code_cli ollama --ollama-model llama3.2:latest\n" 360 " spych multi --agents claude_code_sdk codex_cli --listen-duration 8" 361 ), 362 ) 363 p_multi.add_argument( 364 "--agents", 365 nargs="+", 366 required=True, 367 metavar="AGENT", 368 choices=[ 369 "claude_code_cli", 370 "claude", 371 "claude_code_sdk", 372 "claude_sdk", 373 "codex_cli", 374 "codex", 375 "gemini_cli", 376 "gemini", 377 "opencode_cli", 378 "opencode", 379 "ollama", 380 ], 381 help=( 382 "Agents to run. Choices: claude (claude_code_cli), " 383 "claude_sdk (claude_code_sdk), codex (codex_cli), " 384 "gemini (gemini_cli), opencode (opencode_cli), ollama" 385 ), 386 ) 387 p_multi.add_argument( 388 "--terminate-words", 389 nargs="+", 390 metavar="WORD", 391 default=["terminate"], 392 help="Words that stop all agents (default: terminate)", 393 ) 394 p_multi.add_argument( 395 "--listen-duration", 396 type=float, 397 default=5, 398 metavar="SECONDS", 399 help="Seconds to listen after a wake word (default: 5)", 400 ) 401 p_multi.add_argument( 402 "--continue-conversation", 403 type=_parse_bool, 404 default=True, 405 metavar="BOOL", 406 help="Resume most recent session for each coding agent (default: true)", 407 ) 408 p_multi.add_argument( 409 "--show-tool-events", 410 type=_parse_bool, 411 default=True, 412 metavar="BOOL", 413 help="Print live tool start/end events (default: true)", 414 ) 415 # ollama-specific flags (only used when 'ollama' is in --agents) 416 p_multi.add_argument( 417 "--ollama-model", 418 default="llama3.2:latest", 419 metavar="MODEL", 420 help="Ollama model (default: llama3.2:latest). Only used when ollama is in --agents.", 421 ) 422 p_multi.add_argument( 423 "--ollama-host", 424 default="http://localhost:11434", 425 metavar="URL", 426 help="Ollama instance URL (default: http://localhost:11434). Only used when ollama is in --agents.", 427 ) 428 p_multi.add_argument( 429 "--ollama-history-length", 430 type=int, 431 default=10, 432 metavar="N", 433 help="Ollama context history length (default: 10). Only used when ollama is in --agents.", 434 ) 435 # opencode-specific flag 436 p_multi.add_argument( 437 "--opencode-model", 438 default=None, 439 metavar="MODEL", 440 help="OpenCode model in provider/model format. Only used when opencode_cli is in --agents.", 441 ) 442 # claude_code_sdk-specific flag 443 p_multi.add_argument( 444 "--setting-sources", 445 nargs="+", 446 metavar="SOURCE", 447 default=["user", "project", "local"], 448 help="Claude Code SDK setting sources (default: user project local). Only used when claude_code_sdk is in --agents.", 449 ) 450 451 # ------------------------------------------------------------------ # 452 # Dispatch # 453 # ------------------------------------------------------------------ # 454 args = parser.parse_args() 455 456 # Normalise any alias back to the canonical agent name so the dispatch 457 # block below only needs to handle one name per agent. 458 args.agent = _AGENT_ALIASES.get(args.agent, args.agent) 459 460 # Apply color theme as early as possible so all subsequent output uses it. 461 if args.theme != "dark": 462 from spych.cli_tools import set_theme 463 464 set_theme(args.theme) 465 466 # ------------------------------------------------------------------ # 467 # Single-agent dispatch # 468 # ------------------------------------------------------------------ # 469 if args.agent == "ollama": 470 from spych.agents import ollama 471 472 kwargs = _build_shared_kwargs(args) 473 kwargs["model"] = args.model 474 kwargs["history_length"] = args.history_length 475 kwargs["host"] = args.host 476 ollama(**kwargs) 477 478 elif args.agent == "claude_code_cli": 479 from spych.agents import claude_code_cli 480 481 claude_code_cli(**_build_agent_kwargs(args)) 482 483 elif args.agent == "claude_code_sdk": 484 from spych.agents import claude_code_sdk 485 486 kwargs = _build_agent_kwargs(args) 487 kwargs["setting_sources"] = args.setting_sources 488 claude_code_sdk(**kwargs) 489 490 elif args.agent == "codex_cli": 491 from spych.agents import codex_cli 492 493 codex_cli(**_build_agent_kwargs(args)) 494 495 elif args.agent == "gemini_cli": 496 from spych.agents import gemini_cli 497 498 gemini_cli(**_build_agent_kwargs(args)) 499 500 elif args.agent == "opencode_cli": 501 from spych.agents import opencode_cli 502 503 kwargs = _build_agent_kwargs(args) 504 if args.model is not None: 505 kwargs["model"] = args.model 506 opencode_cli(**kwargs) 507 508 elif args.agent == "live": 509 from spych.live import SpychLive 510 511 SpychLive( 512 output_format=args.output_format, 513 output_path=args.output_path, 514 show_timestamps=not args.no_timestamps, 515 stop_key=args.stop_key, 516 terminate_words=args.terminate_words, 517 device_index=args.device_index, 518 whisper_model=args.whisper_model, 519 whisper_device=args.whisper_device, 520 whisper_compute_type=args.whisper_compute_type, 521 no_speech_threshold=args.no_speech_threshold, 522 speech_threshold=args.speech_threshold, 523 silence_threshold=args.silence_threshold, 524 silence_frames_threshold=args.silence_frames, 525 speech_pad_frames=args.speech_pad_frames, 526 max_speech_duration_s=args.max_speech_duration, 527 context_words=args.context_words, 528 ).start() 529 530 # ------------------------------------------------------------------ # 531 # Multi-agent dispatch # 532 # ------------------------------------------------------------------ # 533 elif args.agent == "multi": 534 from spych.core import Spych 535 from spych.orchestrator import SpychOrchestrator 536 537 # A single Spych transcription object shared by all responders. 538 spych_object = Spych(whisper_model="base.en") 539 540 entries = [] 541 542 for agent_name in [_AGENT_ALIASES.get(a, a) for a in args.agents]: 543 if agent_name == "claude_code_cli": 544 from spych.agents.claude import LocalClaudeCodeCLIResponder 545 546 entries.append( 547 { 548 "responder": LocalClaudeCodeCLIResponder( 549 spych_object=spych_object, 550 continue_conversation=args.continue_conversation, 551 listen_duration=args.listen_duration, 552 show_tool_events=args.show_tool_events, 553 ), 554 "wake_words": ["claude", "clod", "cloud", "clawed"], 555 "terminate_words": args.terminate_words, 556 } 557 ) 558 559 elif agent_name == "claude_code_sdk": 560 from spych.agents.claude import LocalClaudeCodeSDKResponder 561 562 entries.append( 563 { 564 "responder": LocalClaudeCodeSDKResponder( 565 spych_object=spych_object, 566 continue_conversation=args.continue_conversation, 567 listen_duration=args.listen_duration, 568 setting_sources=args.setting_sources, 569 show_tool_events=args.show_tool_events, 570 ), 571 "wake_words": ["claude", "clod", "cloud", "clawed"], 572 "terminate_words": args.terminate_words, 573 } 574 ) 575 576 elif agent_name == "codex_cli": 577 from spych.agents.codex import LocalCodexCLIResponder 578 579 entries.append( 580 { 581 "responder": LocalCodexCLIResponder( 582 spych_object=spych_object, 583 continue_conversation=args.continue_conversation, 584 listen_duration=args.listen_duration, 585 show_tool_events=args.show_tool_events, 586 ), 587 "wake_words": ["codex"], 588 "terminate_words": args.terminate_words, 589 } 590 ) 591 592 elif agent_name == "gemini_cli": 593 from spych.agents.gemini import LocalGeminiCLIResponder 594 595 entries.append( 596 { 597 "responder": LocalGeminiCLIResponder( 598 spych_object=spych_object, 599 continue_conversation=args.continue_conversation, 600 listen_duration=args.listen_duration, 601 show_tool_events=args.show_tool_events, 602 ), 603 "wake_words": ["gemini"], 604 "terminate_words": args.terminate_words, 605 } 606 ) 607 608 elif agent_name == "opencode_cli": 609 from spych.agents.opencode import LocalOpenCodeCLIResponder 610 611 entries.append( 612 { 613 "responder": LocalOpenCodeCLIResponder( 614 spych_object=spych_object, 615 continue_conversation=args.continue_conversation, 616 listen_duration=args.listen_duration, 617 show_tool_events=args.show_tool_events, 618 model=args.opencode_model, 619 ), 620 "wake_words": ["opencode", "open code"], 621 "terminate_words": args.terminate_words, 622 } 623 ) 624 625 elif agent_name == "ollama": 626 from spych.agents.ollama import OllamaResponder 627 628 entries.append( 629 { 630 "responder": OllamaResponder( 631 spych_object=spych_object, 632 model=args.ollama_model, 633 history_length=args.ollama_history_length, 634 host=args.ollama_host, 635 listen_duration=args.listen_duration, 636 ), 637 "wake_words": ["llama", "ollama", "lama"], 638 "terminate_words": args.terminate_words, 639 } 640 ) 641 642 SpychOrchestrator(entries=entries).start() 643 644 else: 645 parser.print_help() 646 sys.exit(1)