vtt_rs/
config.rs

1use anyhow::{Context, Result};
2use serde::Deserialize;
3use std::{
4    fs,
5    path::{Path, PathBuf},
6};
7
8const DEFAULT_CONFIG_PATH: &str = "vtt.config.json";
9const DEFAULT_CHUNK_DURATION_SECS: usize = 5;
10const DEFAULT_MODEL: &str = "whisper-1";
11const DEFAULT_ENDPOINT: &str = "https://api.openai.com/v1/audio/transcriptions";
12
13/// Configuration for the transcription service.
14///
15/// This struct controls how audio is captured, chunked, and sent to the transcription API.
16/// All fields have sensible defaults and can be loaded from a JSON configuration file.
17///
18/// # Examples
19///
20/// ## Using defaults
21///
22/// ```
23/// use vtt_rs::Config;
24///
25/// let config = Config::default();
26/// assert_eq!(config.chunk_duration_secs, 5);
27/// assert_eq!(config.model, "whisper-1");
28/// ```
29///
30/// ## Custom configuration
31///
32/// ```
33/// use vtt_rs::Config;
34/// use std::path::PathBuf;
35///
36/// let config = Config {
37///     chunk_duration_secs: 3,
38///     model: "whisper-1".to_string(),
39///     endpoint: "https://api.openai.com/v1/audio/transcriptions".to_string(),
40///     out_file: Some(PathBuf::from("output.log")),
41/// };
42/// ```
43///
44/// ## Loading from JSON file
45///
46/// ```no_run
47/// use vtt_rs::Config;
48///
49/// # fn main() -> anyhow::Result<()> {
50/// let config = Config::from_file("config.json")?;
51/// # Ok(())
52/// # }
53/// ```
54#[derive(Debug, Deserialize, Clone)]
55#[serde(default)]
56pub struct Config {
57    /// Duration of each audio chunk in seconds.
58    ///
59    /// Smaller values (2-3 seconds) provide faster response times but may reduce accuracy.
60    /// Larger values (5-10 seconds) improve accuracy but increase latency.
61    pub chunk_duration_secs: usize,
62
63    /// OpenAI model to use for transcription.
64    ///
65    /// Common values: `"whisper-1"` for OpenAI's Whisper model.
66    pub model: String,
67
68    /// API endpoint for transcription requests.
69    ///
70    /// Defaults to OpenAI's endpoint but can be changed to support local Whisper
71    /// instances or other OpenAI-compatible APIs.
72    pub endpoint: String,
73
74    /// Optional file path to append transcription logs.
75    ///
76    /// When set, all transcriptions (including silence markers) will be written
77    /// to this file in addition to being sent via events. Use [`None`] to disable
78    /// file logging.
79    pub out_file: Option<PathBuf>,
80
81    /// Optional on-device configuration.
82    ///
83    /// When present and `enabled`, the service will run Whisper locally using
84    /// the bundled Candle integration instead of calling a remote API.
85    pub on_device: Option<OnDeviceConfig>,
86}
87
88impl Default for Config {
89    fn default() -> Self {
90        Self {
91            chunk_duration_secs: DEFAULT_CHUNK_DURATION_SECS,
92            model: DEFAULT_MODEL.to_string(),
93            endpoint: DEFAULT_ENDPOINT.to_string(),
94            out_file: None,
95            on_device: None,
96        }
97    }
98}
99
100impl Config {
101    /// Loads configuration from a JSON file at the specified path.
102    ///
103    /// The JSON file should contain fields matching the [`Config`] struct.
104    /// Missing fields will use their default values via serde's `#[serde(default)]`.
105    ///
106    /// # Examples
107    ///
108    /// ```no_run
109    /// use vtt_rs::Config;
110    ///
111    /// # fn main() -> anyhow::Result<()> {
112    /// let config = Config::from_file("my_config.json")?;
113    /// println!("Using model: {}", config.model);
114    /// # Ok(())
115    /// # }
116    /// ```
117    ///
118    /// # Errors
119    ///
120    /// Returns an error if:
121    /// - The file cannot be read
122    /// - The file contains invalid JSON
123    /// - The JSON structure doesn't match the expected format
124    pub fn from_file(path: impl AsRef<Path>) -> Result<Self> {
125        let path = path.as_ref();
126        let contents = fs::read_to_string(path)
127            .with_context(|| format!("reading config from {}", path.display()))?;
128        serde_json::from_str(&contents)
129            .with_context(|| format!("parsing config from {}", path.display()))
130    }
131
132    /// Attempts to load configuration from `vtt.config.json` in the current directory.
133    ///
134    /// If the default config file exists, it will be loaded. Otherwise, returns
135    /// default configuration values.
136    ///
137    /// # Examples
138    ///
139    /// ```no_run
140    /// use vtt_rs::Config;
141    ///
142    /// # fn main() -> anyhow::Result<()> {
143    /// let (config, source_path) = Config::load_or_default()?;
144    ///
145    /// if let Some(path) = source_path {
146    ///     println!("Loaded config from: {}", path.display());
147    /// } else {
148    ///     println!("Using default configuration");
149    /// }
150    /// # Ok(())
151    /// # }
152    /// ```
153    ///
154    /// # Errors
155    ///
156    /// Returns an error if the default config file exists but cannot be read or parsed.
157    pub fn load_or_default() -> Result<(Self, Option<PathBuf>)> {
158        let default_path = PathBuf::from(DEFAULT_CONFIG_PATH);
159        if default_path.exists() {
160            let config = Self::from_file(&default_path)?;
161            Ok((config, Some(default_path)))
162        } else {
163            Ok((Self::default(), None))
164        }
165    }
166
167    /// Resolves the output file path relative to the configuration file's location.
168    ///
169    /// This method handles both absolute and relative paths in `out_file`:
170    /// - Absolute paths are returned as-is
171    /// - Relative paths are resolved relative to the config file's directory
172    /// - If no source path is provided, relative paths remain unchanged
173    ///
174    /// # Examples
175    ///
176    /// ```
177    /// use vtt_rs::Config;
178    /// use std::path::{Path, PathBuf};
179    ///
180    /// let mut config = Config::default();
181    /// config.out_file = Some(PathBuf::from("logs/output.log"));
182    ///
183    /// let source = Path::new("/etc/vtt/config.json");
184    /// let resolved = config.resolve_out_path(Some(source));
185    ///
186    /// assert_eq!(resolved, Some(PathBuf::from("/etc/vtt/logs/output.log")));
187    /// ```
188    pub fn resolve_out_path(&self, source: Option<&Path>) -> Option<PathBuf> {
189        self.out_file.as_ref().map(|path| {
190            if path.is_absolute() {
191                path.clone()
192            } else if let Some(source) = source {
193                source
194                    .parent()
195                    .map(Path::to_path_buf)
196                    .unwrap_or_else(|| PathBuf::from("."))
197                    .join(path)
198            } else {
199                path.clone()
200            }
201        })
202    }
203
204    /// Returns true when on-device transcription is enabled.
205    pub fn uses_on_device(&self) -> bool {
206        self.on_device
207            .as_ref()
208            .map(|cfg| cfg.enabled)
209            .unwrap_or(false)
210    }
211
212    /// Returns the enabled on-device configuration if present.
213    pub fn on_device_config(&self) -> Option<&OnDeviceConfig> {
214        self.on_device.as_ref().filter(|cfg| cfg.enabled)
215    }
216}
217
218/// Configuration for the on-device Whisper backend.
219#[derive(Debug, Deserialize, Clone)]
220#[serde(default)]
221pub struct OnDeviceConfig {
222    /// Whether to enable the on-device backend.
223    pub enabled: bool,
224    /// Force CPU execution instead of GPU.
225    pub cpu: bool,
226    /// Predefined Whisper model identifier (e.g. `"tiny.en"`).
227    pub model: String,
228    /// Optional custom Hugging Face model id.
229    pub model_id: Option<String>,
230    /// Optional revision for the custom model id.
231    pub revision: Option<String>,
232    /// Use quantized weights where available (tiny/tiny.en).
233    pub quantized: bool,
234    /// RNG seed passed to the decoder.
235    pub seed: u64,
236    /// Optional forced language token (e.g. `"en"`).
237    pub language: Option<String>,
238    /// Optional decoding task (`"transcribe"` or `"translate"`).
239    pub task: Option<String>,
240    /// Emit timestamped segments.
241    pub timestamps: bool,
242    /// Print verbose logs from the decoder.
243    pub verbose: bool,
244    /// Preferred audio input device name.
245    pub audio_device: Option<String>,
246}
247
248impl Default for OnDeviceConfig {
249    fn default() -> Self {
250        Self {
251            enabled: false,
252            cpu: true,
253            model: "tiny.en".to_string(),
254            model_id: None,
255            revision: None,
256            quantized: false,
257            seed: 299_792_458,
258            language: None,
259            task: None,
260            timestamps: false,
261            verbose: false,
262            audio_device: None,
263        }
264    }
265}