Use this file to discover all available pages before exploring further.
Qwen3-ASR provides state-of-the-art multilingual speech recognition on Apple Silicon with support for 30+ languages and speeds up to 50x real-time on M4 Max.
Run transcription with automatic language detection:
# Using default model (1.7B)cargo run --release --example transcribe -- audio.wav# Using 0.6B modelcargo run --release --example transcribe -- ~/.OminiX/models/qwen3-asr-0.6b audio.wav# Specify language explicitlycargo run --release --example transcribe -- audio.wav --language English# Non-WAV formats (requires ffmpeg)cargo run --release --example transcribe -- meeting.m4a
3
Use as a library
Integrate into your Rust application:
use qwen3_asr_mlx::{Qwen3ASR, default_model_path};let mut model = Qwen3ASR::load(default_model_path())?;// Simple transcription (default: Chinese)let text = model.transcribe("audio.wav")?;// With language specificationlet text = model.transcribe_with_language("audio.wav", "English")?;// From raw samples (16kHz mono f32)let text = model.transcribe_samples(&samples, "Chinese")?;
use qwen3_asr_mlx::{Qwen3ASR, default_model_path};// Load from default path (~/.OminiX/models/qwen3-asr-1.7b)let mut model = Qwen3ASR::load(default_model_path())?;// Load from custom pathlet mut model = Qwen3ASR::load("~/.OminiX/models/qwen3-asr-0.6b")?;
// Transcribe WAV file (default: Chinese)let text = model.transcribe("audio.wav")?;// Transcribe with language specificationlet text = model.transcribe_with_language("audio.wav", "English")?;// Transcribe raw 16kHz f32 sampleslet text = model.transcribe_samples(&samples, "Japanese")?;
let mut model = Qwen3ASR::load(default_model_path())?;for audio_file in audio_files { let text = model.transcribe(audio_file)?; println!("{}\n{}", audio_file, text);}
model.* - Text decoder (8-bit affine quantized, group_size=64)
The audio encoder is not quantized to preserve audio feature quality, while the text decoder uses 8-bit quantization to reduce memory usage for the larger LLM component.
Complete transcription example from examples/transcribe.rs:
use qwen3_asr_mlx::{Qwen3ASR, default_model_path};use qwen3_asr_mlx::audio;use std::time::Instant;fn main() { // Load model println!("Loading model..."); let start = Instant::now(); let mut model = Qwen3ASR::load(default_model_path()) .expect("Failed to load model"); println!("Model loaded in {:.2}s", start.elapsed().as_secs_f32()); // Load and resample audio let (samples, sample_rate) = audio::load_wav("audio.wav") .expect("Failed to load audio"); let duration_secs = samples.len() as f32 / sample_rate as f32; let samples = audio::resample(&samples, sample_rate, 16000) .expect("Resample failed"); // Transcribe let start = Instant::now(); let text = model.transcribe_samples(&samples, "English") .expect("Transcription failed"); let elapsed = start.elapsed().as_secs_f32(); println!("Transcription ({:.2}s, {:.1}x realtime):", elapsed, duration_secs / elapsed); println!("{}", text);}