vtt_rs/config.rs
1use anyhow::{Context, Result};
2use serde::Deserialize;
3use std::{
4 fs,
5 path::{Path, PathBuf},
6};
7
8const DEFAULT_CONFIG_PATH: &str = "vtt.config.json";
9const DEFAULT_CHUNK_DURATION_SECS: usize = 5;
10const DEFAULT_MODEL: &str = "whisper-1";
11const DEFAULT_ENDPOINT: &str = "https://api.openai.com/v1/audio/transcriptions";
12
13/// Configuration for the transcription service.
14///
15/// This struct controls how audio is captured, chunked, and sent to the transcription API.
16/// All fields have sensible defaults and can be loaded from a JSON configuration file.
17///
18/// # Examples
19///
20/// ## Using defaults
21///
22/// ```
23/// use vtt_rs::Config;
24///
25/// let config = Config::default();
26/// assert_eq!(config.chunk_duration_secs, 5);
27/// assert_eq!(config.model, "whisper-1");
28/// ```
29///
30/// ## Custom configuration
31///
32/// ```
33/// use vtt_rs::Config;
34/// use std::path::PathBuf;
35///
36/// let config = Config {
37/// chunk_duration_secs: 3,
38/// model: "whisper-1".to_string(),
39/// endpoint: "https://api.openai.com/v1/audio/transcriptions".to_string(),
40/// out_file: Some(PathBuf::from("output.log")),
41/// };
42/// ```
43///
44/// ## Loading from JSON file
45///
46/// ```no_run
47/// use vtt_rs::Config;
48///
49/// # fn main() -> anyhow::Result<()> {
50/// let config = Config::from_file("config.json")?;
51/// # Ok(())
52/// # }
53/// ```
54#[derive(Debug, Deserialize, Clone)]
55#[serde(default)]
56pub struct Config {
57 /// Duration of each audio chunk in seconds.
58 ///
59 /// Smaller values (2-3 seconds) provide faster response times but may reduce accuracy.
60 /// Larger values (5-10 seconds) improve accuracy but increase latency.
61 pub chunk_duration_secs: usize,
62
63 /// OpenAI model to use for transcription.
64 ///
65 /// Common values: `"whisper-1"` for OpenAI's Whisper model.
66 pub model: String,
67
68 /// API endpoint for transcription requests.
69 ///
70 /// Defaults to OpenAI's endpoint but can be changed to support local Whisper
71 /// instances or other OpenAI-compatible APIs.
72 pub endpoint: String,
73
74 /// Optional file path to append transcription logs.
75 ///
76 /// When set, all transcriptions (including silence markers) will be written
77 /// to this file in addition to being sent via events. Use [`None`] to disable
78 /// file logging.
79 pub out_file: Option<PathBuf>,
80
81 /// Optional on-device configuration.
82 ///
83 /// When present and `enabled`, the service will run Whisper locally using
84 /// the bundled Candle integration instead of calling a remote API.
85 pub on_device: Option<OnDeviceConfig>,
86}
87
88impl Default for Config {
89 fn default() -> Self {
90 Self {
91 chunk_duration_secs: DEFAULT_CHUNK_DURATION_SECS,
92 model: DEFAULT_MODEL.to_string(),
93 endpoint: DEFAULT_ENDPOINT.to_string(),
94 out_file: None,
95 on_device: None,
96 }
97 }
98}
99
100impl Config {
101 /// Loads configuration from a JSON file at the specified path.
102 ///
103 /// The JSON file should contain fields matching the [`Config`] struct.
104 /// Missing fields will use their default values via serde's `#[serde(default)]`.
105 ///
106 /// # Examples
107 ///
108 /// ```no_run
109 /// use vtt_rs::Config;
110 ///
111 /// # fn main() -> anyhow::Result<()> {
112 /// let config = Config::from_file("my_config.json")?;
113 /// println!("Using model: {}", config.model);
114 /// # Ok(())
115 /// # }
116 /// ```
117 ///
118 /// # Errors
119 ///
120 /// Returns an error if:
121 /// - The file cannot be read
122 /// - The file contains invalid JSON
123 /// - The JSON structure doesn't match the expected format
124 pub fn from_file(path: impl AsRef<Path>) -> Result<Self> {
125 let path = path.as_ref();
126 let contents = fs::read_to_string(path)
127 .with_context(|| format!("reading config from {}", path.display()))?;
128 serde_json::from_str(&contents)
129 .with_context(|| format!("parsing config from {}", path.display()))
130 }
131
132 /// Attempts to load configuration from `vtt.config.json` in the current directory.
133 ///
134 /// If the default config file exists, it will be loaded. Otherwise, returns
135 /// default configuration values.
136 ///
137 /// # Examples
138 ///
139 /// ```no_run
140 /// use vtt_rs::Config;
141 ///
142 /// # fn main() -> anyhow::Result<()> {
143 /// let (config, source_path) = Config::load_or_default()?;
144 ///
145 /// if let Some(path) = source_path {
146 /// println!("Loaded config from: {}", path.display());
147 /// } else {
148 /// println!("Using default configuration");
149 /// }
150 /// # Ok(())
151 /// # }
152 /// ```
153 ///
154 /// # Errors
155 ///
156 /// Returns an error if the default config file exists but cannot be read or parsed.
157 pub fn load_or_default() -> Result<(Self, Option<PathBuf>)> {
158 let default_path = PathBuf::from(DEFAULT_CONFIG_PATH);
159 if default_path.exists() {
160 let config = Self::from_file(&default_path)?;
161 Ok((config, Some(default_path)))
162 } else {
163 Ok((Self::default(), None))
164 }
165 }
166
167 /// Resolves the output file path relative to the configuration file's location.
168 ///
169 /// This method handles both absolute and relative paths in `out_file`:
170 /// - Absolute paths are returned as-is
171 /// - Relative paths are resolved relative to the config file's directory
172 /// - If no source path is provided, relative paths remain unchanged
173 ///
174 /// # Examples
175 ///
176 /// ```
177 /// use vtt_rs::Config;
178 /// use std::path::{Path, PathBuf};
179 ///
180 /// let mut config = Config::default();
181 /// config.out_file = Some(PathBuf::from("logs/output.log"));
182 ///
183 /// let source = Path::new("/etc/vtt/config.json");
184 /// let resolved = config.resolve_out_path(Some(source));
185 ///
186 /// assert_eq!(resolved, Some(PathBuf::from("/etc/vtt/logs/output.log")));
187 /// ```
188 pub fn resolve_out_path(&self, source: Option<&Path>) -> Option<PathBuf> {
189 self.out_file.as_ref().map(|path| {
190 if path.is_absolute() {
191 path.clone()
192 } else if let Some(source) = source {
193 source
194 .parent()
195 .map(Path::to_path_buf)
196 .unwrap_or_else(|| PathBuf::from("."))
197 .join(path)
198 } else {
199 path.clone()
200 }
201 })
202 }
203
204 /// Returns true when on-device transcription is enabled.
205 pub fn uses_on_device(&self) -> bool {
206 self.on_device
207 .as_ref()
208 .map(|cfg| cfg.enabled)
209 .unwrap_or(false)
210 }
211
212 /// Returns the enabled on-device configuration if present.
213 pub fn on_device_config(&self) -> Option<&OnDeviceConfig> {
214 self.on_device.as_ref().filter(|cfg| cfg.enabled)
215 }
216}
217
218/// Configuration for the on-device Whisper backend.
219#[derive(Debug, Deserialize, Clone)]
220#[serde(default)]
221pub struct OnDeviceConfig {
222 /// Whether to enable the on-device backend.
223 pub enabled: bool,
224 /// Force CPU execution instead of GPU.
225 pub cpu: bool,
226 /// Predefined Whisper model identifier (e.g. `"tiny.en"`).
227 pub model: String,
228 /// Optional custom Hugging Face model id.
229 pub model_id: Option<String>,
230 /// Optional revision for the custom model id.
231 pub revision: Option<String>,
232 /// Use quantized weights where available (tiny/tiny.en).
233 pub quantized: bool,
234 /// RNG seed passed to the decoder.
235 pub seed: u64,
236 /// Optional forced language token (e.g. `"en"`).
237 pub language: Option<String>,
238 /// Optional decoding task (`"transcribe"` or `"translate"`).
239 pub task: Option<String>,
240 /// Emit timestamped segments.
241 pub timestamps: bool,
242 /// Print verbose logs from the decoder.
243 pub verbose: bool,
244 /// Preferred audio input device name.
245 pub audio_device: Option<String>,
246}
247
248impl Default for OnDeviceConfig {
249 fn default() -> Self {
250 Self {
251 enabled: false,
252 cpu: true,
253 model: "tiny.en".to_string(),
254 model_id: None,
255 revision: None,
256 quantized: false,
257 seed: 299_792_458,
258 language: None,
259 task: None,
260 timestamps: false,
261 verbose: false,
262 audio_device: None,
263 }
264 }
265}