Code Examples
This guide contains complete examples demonstrating real-time audio transcription for various use cases in different programming languages.
- Python Example: Shows how to use websockets to transcribe a pre-recorded file in chunks.
- Node.js Example: Imitates real websocket usage by chunking a pre-recorded file in Node JS.
- JavaScript Example: Shows a browser example using Javascript.
- Streaming from Microphone: Shows real-time transcription from microphone audio.
Prerequisites
Python
$ pip install websockets
Node.js
$ npm install ws
Python Example
This example shows how to stream audio from a file and receive real-time transcriptions:
1 import asyncio 2 import websockets 3 import json 4 import os 5 import pathlib 6 from urllib.parse import urlencode 7 8 BASE_WS_URL = "wss://api.smallest.ai/waves/v1/pulse/get_text" 9 params = { 10 "language": "en", 11 "encoding": "linear16", 12 "sample_rate": "16000", 13 "word_timestamps": "true" 14 } 15 WS_URL = f"{BASE_WS_URL}?{urlencode(params)}" 16 17 API_KEY = os.getenv("SMALLEST_API_KEY") 18 AUDIO_FILE = "path/to/audio.wav" 19 20 async def stream_audio(): 21 headers = { 22 "Authorization": f"Bearer {API_KEY}" 23 } 24 25 async with websockets.connect(WS_URL, additional_headers=headers) as ws: 26 print("Connected to STT WebSocket") 27 28 audio_bytes = pathlib.Path(AUDIO_FILE).read_bytes() 29 chunk_size = 4096 30 offset = 0 31 32 print(f"Streaming {len(audio_bytes)} bytes from {os.path.basename(AUDIO_FILE)}") 33 34 async def send_chunks(): 35 nonlocal offset 36 while offset < len(audio_bytes): 37 chunk = audio_bytes[offset: offset + chunk_size] 38 await ws.send(chunk) 39 offset += chunk_size 40 await asyncio.sleep(0.05) # 50ms delay between chunks 41 42 print("Finished sending audio, sending end signal...") 43 await ws.send(json.dumps({"type": "finalize"})) 44 45 sender = asyncio.create_task(send_chunks()) 46 47 try: 48 async for message in ws: 49 try: 50 data = json.loads(message) 51 print("Received:", json.dumps(data, indent=2)) 52 53 # Handle partial transcripts 54 if not data.get("is_final"): 55 print(f"Partial: {data.get('transcript')}") 56 else: 57 print(f"Final: {data.get('transcript')}") 58 print(f"Full transcript: {data.get('full_transcript')}") 59 60 if data.get("is_last"): 61 print("Transcription complete!") 62 break 63 except json.JSONDecodeError: 64 print("Received raw:", message) 65 except websockets.ConnectionClosed as e: 66 print(f"Connection closed: {e.code} - {e.reason}") 67 68 await sender 69 70 if __name__ == "__main__": 71 asyncio.run(stream_audio())
Node.js Example
This example demonstrates real-time transcription using the ws library:
1 const WebSocket = require("ws"); 2 const fs = require("fs"); 3 4 const API_KEY = process.env.SMALLEST_API_KEY; 5 const AUDIO_FILE = "path/to/audio.wav"; 6 7 const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); 8 url.searchParams.append("language", "en"); 9 url.searchParams.append("encoding", "linear16"); 10 url.searchParams.append("sample_rate", "16000"); 11 url.searchParams.append("word_timestamps", "true"); 12 13 const ws = new WebSocket(url.toString(), { 14 headers: { 15 Authorization: `Bearer ${API_KEY}`, 16 }, 17 }); 18 19 ws.on("open", () => { 20 console.log("Connected to STT WebSocket"); 21 22 const audioBuffer = fs.readFileSync(AUDIO_FILE); 23 const chunkSize = 4096; 24 let offset = 0; 25 26 const sendChunk = () => { 27 if (offset >= audioBuffer.length) { 28 console.log("Finished sending audio, sending end signal..."); 29 ws.send(JSON.stringify({ type: "finalize" })); 30 return; 31 } 32 33 const chunk = audioBuffer.slice(offset, offset + chunkSize); 34 ws.send(chunk); 35 offset += chunkSize; 36 37 setTimeout(sendChunk, 50); // 50ms delay between chunks 38 }; 39 40 sendChunk(); 41 }); 42 43 ws.on("message", (data) => { 44 try { 45 const message = JSON.parse(data.toString()); 46 console.log("Received:", JSON.stringify(message, null, 2)); 47 48 // Handle partial transcripts 49 if (!message.is_final) { 50 console.log(`Partial: ${message.transcript}`); 51 } else { 52 console.log(`Final: ${message.transcript}`); 53 console.log(`Full transcript: ${message.full_transcript}`); 54 55 if (message.is_last) { 56 console.log("Transcription complete!"); 57 ws.close(); 58 } 59 } 60 } catch (error) { 61 console.error("Error parsing message:", error); 62 } 63 }); 64 65 ws.on("error", (error) => { 66 console.error("WebSocket error:", error.message); 67 }); 68 69 ws.on("close", (code, reason) => { 70 console.log(`Connection closed: ${code} - ${reason.toString()}`); 71 });
Browser JavaScript Example
This example shows how to stream audio from a file input in the browser:
1 const API_KEY = "SMALLEST_API_KEY"; 2 3 async function transcribeAudio(audioFile) { 4 const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); 5 url.searchParams.append("language", "en"); 6 url.searchParams.append("encoding", "linear16"); 7 url.searchParams.append("sample_rate", "16000"); 8 url.searchParams.append("word_timestamps", "true"); 9 10 const ws = new WebSocket(url.toString()); 11 12 ws.onopen = async () => { 13 console.log("Connected to STT WebSocket"); 14 15 const arrayBuffer = await audioFile.arrayBuffer(); 16 const chunkSize = 4096; 17 let offset = 0; 18 19 const sendChunk = () => { 20 if (offset >= arrayBuffer.byteLength) { 21 console.log("Finished sending audio"); 22 ws.send(JSON.stringify({ type: "finalize" })); 23 return; 24 } 25 26 const chunk = arrayBuffer.slice(offset, offset + chunkSize); 27 ws.send(chunk); 28 offset += chunkSize; 29 30 setTimeout(sendChunk, 50); // 50ms delay between chunks 31 }; 32 33 sendChunk(); 34 }; 35 36 ws.onmessage = (event) => { 37 try { 38 const message = JSON.parse(event.data); 39 console.log("Received:", message); 40 41 // Update UI with transcript 42 if (message.is_final) { 43 updateTranscript(message.full_transcript); 44 } else { 45 updatePartialTranscript(message.transcript); 46 } 47 48 if (message.is_last) { 49 console.log("Transcription complete!"); 50 ws.close(); 51 } 52 } catch (error) { 53 console.error("Error parsing message:", error); 54 } 55 }; 56 57 ws.onerror = (error) => { 58 console.error("WebSocket error:", error); 59 }; 60 61 ws.onclose = (event) => { 62 console.log(`Connection closed: ${event.code}`); 63 }; 64 } 65 66 // Example usage with file input 67 const fileInput = document.getElementById("audioFile"); 68 fileInput.addEventListener("change", (e) => { 69 const file = e.target.files[0]; 70 if (file) { 71 transcribeAudio(file); 72 } 73 });
Streaming from Microphone
Here’s an example of streaming live audio from a microphone in the browser:
1 const API_KEY = "SMALLEST_API_KEY"; 2 3 async function streamMicrophone() { 4 // Get microphone access 5 const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); 6 const audioContext = new AudioContext({ sampleRate: 16000 }); 7 const source = audioContext.createMediaStreamSource(stream); 8 9 // Create script processor for audio chunks 10 const processor = audioContext.createScriptProcessor(4096, 1, 1); 11 12 const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); 13 url.searchParams.append("language", "en"); 14 url.searchParams.append("encoding", "linear16"); 15 url.searchParams.append("sample_rate", "16000"); 16 17 const ws = new WebSocket(url.toString()); 18 19 ws.onopen = () => { 20 console.log("Connected, starting microphone stream"); 21 22 processor.onaudioprocess = (e) => { 23 const inputData = e.inputBuffer.getChannelData(0); 24 // Convert Float32Array to Int16Array 25 const int16Data = new Int16Array(inputData.length); 26 for (let i = 0; i < inputData.length; i++) { 27 int16Data[i] = Math.max(-32768, Math.min(32767, inputData[i] * 32768)); 28 } 29 ws.send(int16Data.buffer); 30 }; 31 32 source.connect(processor); 33 processor.connect(audioContext.destination); 34 }; 35 36 ws.onmessage = (event) => { 37 const message = JSON.parse(event.data); 38 if (message.is_final) { 39 console.log("Transcript:", message.full_transcript); 40 } 41 }; 42 43 // Stop streaming after 30 seconds (example) 44 setTimeout(() => { 45 processor.disconnect(); 46 source.disconnect(); 47 stream.getTracks().forEach(track => track.stop()); 48 ws.send(JSON.stringify({ type: "finalize" })); 49 ws.close(); 50 }, 30000); 51 } 52 53 // Start streaming 54 streamMicrophone().catch(console.error);
Handling Responses
The WebSocket API sends JSON messages with the following structure:
1 { 2 "session_id": "sess_12345abcde", 3 "transcript": "Hello, how are you?", 4 "full_transcript": "Hello, how are you?", 5 "is_final": true, 6 "is_last": false, 7 "language": "en", 8 "word_timestamps": [ 9 { 10 "word": "Hello", 11 "start": 0.0, 12 "end": 0.5 13 } 14 ] 15 }
Key Response Fields
is_final:falseindicates a partial/interim transcript;trueindicates a final transcriptis_last:truewhen the session is completetranscript: Current segment textfull_transcript: Accumulated text from the entire sessionword_timestamps: Only included whenword_timestamps=truein query params
Browser
No additional dependencies required - uses native WebSocket API.
Error Handling
Always implement proper error handling for production use:
1 ws.onerror = (error) => { 2 console.error("WebSocket error:", error); 3 // Implement retry logic or user notification 4 }; 5 6 ws.onclose = (event) => { 7 if (event.code !== 1000) { // Not a normal closure 8 console.error(`Unexpected closure: ${event.code} - ${event.reason}`); 9 // Implement reconnection logic 10 } 11 };

