Code Examples
This guide contains complete examples demonstrating real-time audio transcription for various use cases in different programming languages.
- Python Example: Shows how to use websockets to transcribe a pre-recorded file in chunks.
- Node.js Example: Imitates real websocket usage by chunking a pre-recorded file in Node JS.
- JavaScript Example: Shows a browser example using Javascript.
- Streaming from Microphone: Shows real-time transcription from microphone audio.
Prerequisites
Python
$ pip install websockets
Node.js
$ npm install ws
Python Example
This example shows how to stream audio from a file and receive real-time transcriptions:
1 import asyncio 2 import websockets 3 import json 4 import os 5 import requests 6 from urllib.parse import urlencode 7 8 BASE_WS_URL = "wss://api.smallest.ai/waves/v1/pulse/get_text" 9 SAMPLE_URL = ( 10 "https://github.com/smallest-inc/cookbook/raw/main/" 11 "speech-to-text/getting-started/samples/audio.wav" 12 ) 13 params = { 14 "language": "en", 15 "encoding": "linear16", 16 "sample_rate": "24000", # must match the source audio sample rate 17 "word_timestamps": "true", 18 } 19 WS_URL = f"{BASE_WS_URL}?{urlencode(params)}" 20 21 API_KEY = os.environ["SMALLEST_API_KEY"] 22 23 24 async def stream_audio(): 25 headers = {"Authorization": f"Bearer {API_KEY}"} 26 27 async with websockets.connect(WS_URL, additional_headers=headers) as ws: 28 print("Connected to STT WebSocket") 29 30 # Download sample audio (or replace with your own bytes) 31 audio_bytes = requests.get(SAMPLE_URL).content 32 chunk_size = 4096 33 offset = 0 34 35 print(f"Streaming {len(audio_bytes)} bytes") 36 37 async def send_chunks(): 38 nonlocal offset 39 while offset < len(audio_bytes): 40 chunk = audio_bytes[offset: offset + chunk_size] 41 await ws.send(chunk) 42 offset += chunk_size 43 await asyncio.sleep(0.05) # 50ms delay between chunks 44 45 print("Finished sending audio, closing stream...") 46 await ws.send(json.dumps({"type": "close_stream"})) 47 48 sender = asyncio.create_task(send_chunks()) 49 50 full_transcript = "" 51 try: 52 async for message in ws: 53 try: 54 data = json.loads(message) 55 print("Received:", json.dumps(data, indent=2)) 56 57 # Handle partial transcripts 58 if not data.get("is_final"): 59 print(f"Partial: {data.get('transcript')}") 60 else: 61 print(f"Final: {data.get('transcript')}") 62 full_transcript += data.get("transcript", "") or "" 63 64 if data.get("is_last"): 65 print("Transcription complete!") 66 print(f"Full Transcript: {full_transcript}") 67 break 68 except json.JSONDecodeError: 69 print("Received raw:", message) 70 except websockets.ConnectionClosed as e: 71 print(f"Connection closed: {e.code} - {e.reason}") 72 73 await sender 74 75 if __name__ == "__main__": 76 asyncio.run(stream_audio())
Node.js Example
This example demonstrates real-time transcription using the ws library:
1 const WebSocket = require("ws"); 2 const fs = require("fs"); 3 4 const API_KEY = process.env.SMALLEST_API_KEY; 5 const AUDIO_FILE = "path/to/audio.wav"; 6 7 const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); 8 url.searchParams.append("language", "en"); 9 url.searchParams.append("encoding", "linear16"); 10 url.searchParams.append("sample_rate", "16000"); 11 url.searchParams.append("word_timestamps", "true"); 12 13 const ws = new WebSocket(url.toString(), { 14 headers: { 15 Authorization: `Bearer ${API_KEY}`, 16 }, 17 }); 18 19 ws.on("open", () => { 20 console.log("Connected to STT WebSocket"); 21 22 const audioBuffer = fs.readFileSync(AUDIO_FILE); 23 const chunkSize = 4096; 24 let offset = 0; 25 26 const sendChunk = () => { 27 if (offset >= audioBuffer.length) { 28 console.log("Finished sending audio, closing stream..."); 29 ws.send(JSON.stringify({ type: "close_stream" })); 30 return; 31 } 32 33 const chunk = audioBuffer.slice(offset, offset + chunkSize); 34 ws.send(chunk); 35 offset += chunkSize; 36 37 setTimeout(sendChunk, 50); // 50ms delay between chunks 38 }; 39 40 sendChunk(); 41 }); 42 43 let fullTranscript = ""; 44 45 ws.on("message", (data) => { 46 try { 47 const message = JSON.parse(data.toString()); 48 console.log("Received:", JSON.stringify(message, null, 2)); 49 50 // Handle partial transcripts 51 if (!message.is_final) { 52 console.log(`Partial: ${message.transcript}`); 53 } else { 54 console.log(`Final: ${message.transcript}`); 55 fullTranscript += message.transcript ?? ""; 56 57 if (message.is_last) { 58 console.log("Transcription complete!"); 59 console.log(`Full Transcript: ${fullTranscript}`); 60 ws.close(); 61 } 62 } 63 } catch (error) { 64 console.error("Error parsing message:", error); 65 } 66 }); 67 68 ws.on("error", (error) => { 69 console.error("WebSocket error:", error.message); 70 }); 71 72 ws.on("close", (code, reason) => { 73 console.log(`Connection closed: ${code} - ${reason.toString()}`); 74 });
Browser JavaScript Example
This example shows how to stream audio from a file input in the browser:
1 const API_KEY = "SMALLEST_API_KEY"; 2 3 async function transcribeAudio(audioFile) { 4 const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); 5 url.searchParams.append("language", "en"); 6 url.searchParams.append("encoding", "linear16"); 7 url.searchParams.append("sample_rate", "16000"); 8 url.searchParams.append("word_timestamps", "true"); 9 10 const ws = new WebSocket(url.toString()); 11 12 ws.onopen = async () => { 13 console.log("Connected to STT WebSocket"); 14 15 const arrayBuffer = await audioFile.arrayBuffer(); 16 const chunkSize = 4096; 17 let offset = 0; 18 19 const sendChunk = () => { 20 if (offset >= arrayBuffer.byteLength) { 21 console.log("Finished sending audio, closing stream..."); 22 ws.send(JSON.stringify({ type: "close_stream" })); 23 return; 24 } 25 26 const chunk = arrayBuffer.slice(offset, offset + chunkSize); 27 ws.send(chunk); 28 offset += chunkSize; 29 30 setTimeout(sendChunk, 50); // 50ms delay between chunks 31 }; 32 33 sendChunk(); 34 }; 35 36 let fullTranscript = ""; 37 38 ws.onmessage = (event) => { 39 try { 40 const message = JSON.parse(event.data); 41 console.log("Received:", message); 42 43 // Update UI with transcript 44 if (message.is_final) { 45 updateTranscript(message.transcript); 46 fullTranscript += message.transcript ?? ""; 47 } else { 48 updatePartialTranscript(message.transcript); 49 } 50 51 if (message.is_last) { 52 console.log("Transcription complete!"); 53 console.log("Full Transcript:", fullTranscript); 54 ws.close(); 55 } 56 } catch (error) { 57 console.error("Error parsing message:", error); 58 } 59 }; 60 61 ws.onerror = (error) => { 62 console.error("WebSocket error:", error); 63 }; 64 65 ws.onclose = (event) => { 66 console.log(`Connection closed: ${event.code}`); 67 }; 68 } 69 70 // Example usage with file input 71 const fileInput = document.getElementById("audioFile"); 72 fileInput.addEventListener("change", (e) => { 73 const file = e.target.files[0]; 74 if (file) { 75 transcribeAudio(file); 76 } 77 });
Streaming from Microphone
Here’s an example of streaming live audio from a microphone in the browser:
1 const API_KEY = "SMALLEST_API_KEY"; 2 3 async function streamMicrophone() { 4 // Get microphone access 5 const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); 6 const audioContext = new AudioContext({ sampleRate: 16000 }); 7 const source = audioContext.createMediaStreamSource(stream); 8 9 // Create script processor for audio chunks 10 const processor = audioContext.createScriptProcessor(4096, 1, 1); 11 12 const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); 13 url.searchParams.append("language", "en"); 14 url.searchParams.append("encoding", "linear16"); 15 url.searchParams.append("sample_rate", "16000"); 16 17 const ws = new WebSocket(url.toString()); 18 19 ws.onopen = () => { 20 console.log("Connected, starting microphone stream"); 21 22 processor.onaudioprocess = (e) => { 23 const inputData = e.inputBuffer.getChannelData(0); 24 // Convert Float32Array to Int16Array 25 const int16Data = new Int16Array(inputData.length); 26 for (let i = 0; i < inputData.length; i++) { 27 int16Data[i] = Math.max(-32768, Math.min(32767, inputData[i] * 32768)); 28 } 29 ws.send(int16Data.buffer); 30 }; 31 32 source.connect(processor); 33 processor.connect(audioContext.destination); 34 }; 35 36 let fullTranscript = ""; 37 38 ws.onmessage = (event) => { 39 const message = JSON.parse(event.data); 40 if (message.is_final) { 41 console.log("Transcript:", message.transcript); 42 fullTranscript += message.transcript ?? ""; 43 } 44 if (message.is_last) { 45 console.log("Full Transcript:", fullTranscript); 46 ws.close(); 47 } 48 }; 49 50 // Stop streaming after 30 seconds (example) 51 setTimeout(() => { 52 processor.disconnect(); 53 source.disconnect(); 54 stream.getTracks().forEach(track => track.stop()); 55 ws.send(JSON.stringify({ type: "close_stream" })); 56 }, 30000); 57 } 58 59 // Start streaming 60 streamMicrophone().catch(console.error);
Handling Responses
The WebSocket API sends JSON messages with the following structure:
1 { 2 "session_id": "sess_12345abcde", 3 "transcript": "Hello, how are you?", 4 "is_final": true, 5 "is_last": false, 6 "language": "en", 7 "word_timestamps": [ 8 { 9 "word": "Hello", 10 "start": 0.0, 11 "end": 0.5 12 } 13 ] 14 }
Key Response Fields
is_final:falseindicates a partial/interim transcript;trueindicates a final transcriptis_last:truewhen the session is completetranscript: Current segment text. Concatenate eachis_final=truevalue to build a session-level transcript on the client.word_timestamps: Only included whenword_timestamps=truein query params
Browser
No additional dependencies required - uses native WebSocket API.
Error Handling
Always implement proper error handling for production use:
1 ws.onerror = (error) => { 2 console.error("WebSocket error:", error); 3 // Implement retry logic or user notification 4 }; 5 6 ws.onclose = (event) => { 7 if (event.code !== 1000) { // Not a normal closure 8 console.error(`Unexpected closure: ${event.code} - ${event.reason}`); 9 // Implement reconnection logic 10 } 11 };

