Code Examples | Smallest AI Docs

This guide contains complete examples demonstrating real-time audio transcription for various use cases in different programming languages.

Python Example: Shows how to use websockets to transcribe a pre-recorded file in chunks.
Node.js Example: Imitates real websocket usage by chunking a pre-recorded file in Node JS.
JavaScript Example: Shows a browser example using Javascript.
Streaming from Microphone: Shows real-time transcription from microphone audio.

Prerequisites

Python

$ pip install websockets

Node.js

$ npm install ws

Python Example

This example shows how to stream audio from a file and receive real-time transcriptions:

1 import asyncio
2 import websockets
3 import json
4 import os
5 import requests
6 from urllib.parse import urlencode
7 
8 BASE_WS_URL = "wss://api.smallest.ai/waves/v1/pulse/get_text"
9 SAMPLE_URL = (
10     "https://github.com/smallest-inc/cookbook/raw/main/"
11     "speech-to-text/getting-started/samples/audio.wav"
12 )
13 params = {
14     "language": "en",
15     "encoding": "linear16",
16     "sample_rate": "24000",   # must match the source audio sample rate
17     "word_timestamps": "true",
18 }
19 WS_URL = f"{BASE_WS_URL}?{urlencode(params)}"
20 
21 API_KEY = os.environ["SMALLEST_API_KEY"]
22 
23 
24 async def stream_audio():
25     headers = {"Authorization": f"Bearer {API_KEY}"}
26 
27     async with websockets.connect(WS_URL, additional_headers=headers) as ws:
28         print("Connected to STT WebSocket")
29 
30         # Download sample audio (or replace with your own bytes)
31         audio_bytes = requests.get(SAMPLE_URL).content
32         chunk_size = 4096
33         offset = 0
34 
35         print(f"Streaming {len(audio_bytes)} bytes")
36 
37         async def send_chunks():
38             nonlocal offset
39             while offset < len(audio_bytes):
40                 chunk = audio_bytes[offset: offset + chunk_size]
41                 await ws.send(chunk)
42                 offset += chunk_size
43                 await asyncio.sleep(0.05)  # 50ms delay between chunks
44 
45             print("Finished sending audio, closing stream...")
46             await ws.send(json.dumps({"type": "close_stream"}))
47 
48         sender = asyncio.create_task(send_chunks())
49 
50         full_transcript = ""
51         try:
52             async for message in ws:
53                 try:
54                     data = json.loads(message)
55                     print("Received:", json.dumps(data, indent=2))
56 
57                     # Handle partial transcripts
58                     if not data.get("is_final"):
59                         print(f"Partial: {data.get('transcript')}")
60                     else:
61                         print(f"Final: {data.get('transcript')}")
62                         full_transcript += data.get("transcript", "") or ""
63 
64                         if data.get("is_last"):
65                             print("Transcription complete!")
66                             print(f"Full Transcript: {full_transcript}")
67                             break
68                 except json.JSONDecodeError:
69                     print("Received raw:", message)
70         except websockets.ConnectionClosed as e:
71             print(f"Connection closed: {e.code} - {e.reason}")
72 
73         await sender
74 
75 if __name__ == "__main__":
76     asyncio.run(stream_audio())

Node.js Example

This example demonstrates real-time transcription using the ws library:

1 const WebSocket = require("ws");
2 const fs = require("fs");
3 
4 const API_KEY = process.env.SMALLEST_API_KEY;
5 const AUDIO_FILE = "path/to/audio.wav";
6 
7 const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text");
8 url.searchParams.append("language", "en");
9 url.searchParams.append("encoding", "linear16");
10 url.searchParams.append("sample_rate", "16000");
11 url.searchParams.append("word_timestamps", "true");
12 
13 const ws = new WebSocket(url.toString(), {
14   headers: {
15     Authorization: `Bearer ${API_KEY}`,
16   },
17 });
18 
19 ws.on("open", () => {
20   console.log("Connected to STT WebSocket");
21 
22   const audioBuffer = fs.readFileSync(AUDIO_FILE);
23   const chunkSize = 4096;
24   let offset = 0;
25 
26   const sendChunk = () => {
27     if (offset >= audioBuffer.length) {
28       console.log("Finished sending audio, closing stream...");
29       ws.send(JSON.stringify({ type: "close_stream" }));
30       return;
31     }
32 
33     const chunk = audioBuffer.slice(offset, offset + chunkSize);
34     ws.send(chunk);
35     offset += chunkSize;
36 
37     setTimeout(sendChunk, 50); // 50ms delay between chunks
38   };
39 
40   sendChunk();
41 });
42 
43 let fullTranscript = "";
44 
45 ws.on("message", (data) => {
46   try {
47     const message = JSON.parse(data.toString());
48     console.log("Received:", JSON.stringify(message, null, 2));
49 
50     // Handle partial transcripts
51     if (!message.is_final) {
52       console.log(`Partial: ${message.transcript}`);
53     } else {
54       console.log(`Final: ${message.transcript}`);
55       fullTranscript += message.transcript ?? "";
56 
57       if (message.is_last) {
58         console.log("Transcription complete!");
59         console.log(`Full Transcript: ${fullTranscript}`);
60         ws.close();
61       }
62     }
63   } catch (error) {
64     console.error("Error parsing message:", error);
65   }
66 });
67 
68 ws.on("error", (error) => {
69   console.error("WebSocket error:", error.message);
70 });
71 
72 ws.on("close", (code, reason) => {
73   console.log(`Connection closed: ${code} - ${reason.toString()}`);
74 });

Browser JavaScript Example

This example shows how to stream audio from a file input in the browser:

1 const API_KEY = "SMALLEST_API_KEY";
2 
3 async function transcribeAudio(audioFile) {
4   const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text");
5   url.searchParams.append("language", "en");
6   url.searchParams.append("encoding", "linear16");
7   url.searchParams.append("sample_rate", "16000");
8   url.searchParams.append("word_timestamps", "true");
9 
10   const ws = new WebSocket(url.toString());
11 
12   ws.onopen = async () => {
13     console.log("Connected to STT WebSocket");
14 
15     const arrayBuffer = await audioFile.arrayBuffer();
16     const chunkSize = 4096;
17     let offset = 0;
18 
19     const sendChunk = () => {
20       if (offset >= arrayBuffer.byteLength) {
21         console.log("Finished sending audio, closing stream...");
22         ws.send(JSON.stringify({ type: "close_stream" }));
23         return;
24       }
25 
26       const chunk = arrayBuffer.slice(offset, offset + chunkSize);
27       ws.send(chunk);
28       offset += chunkSize;
29 
30       setTimeout(sendChunk, 50); // 50ms delay between chunks
31     };
32 
33     sendChunk();
34   };
35 
36   let fullTranscript = "";
37 
38   ws.onmessage = (event) => {
39     try {
40       const message = JSON.parse(event.data);
41       console.log("Received:", message);
42 
43       // Update UI with transcript
44       if (message.is_final) {
45         updateTranscript(message.transcript);
46         fullTranscript += message.transcript ?? "";
47       } else {
48         updatePartialTranscript(message.transcript);
49       }
50 
51       if (message.is_last) {
52         console.log("Transcription complete!");
53         console.log("Full Transcript:", fullTranscript);
54         ws.close();
55       }
56     } catch (error) {
57       console.error("Error parsing message:", error);
58     }
59   };
60 
61   ws.onerror = (error) => {
62     console.error("WebSocket error:", error);
63   };
64 
65   ws.onclose = (event) => {
66     console.log(`Connection closed: ${event.code}`);
67   };
68 }
69 
70 // Example usage with file input
71 const fileInput = document.getElementById("audioFile");
72 fileInput.addEventListener("change", (e) => {
73   const file = e.target.files[0];
74   if (file) {
75     transcribeAudio(file);
76   }
77 });

Streaming from Microphone

Here’s an example of streaming live audio from a microphone in the browser:

1 const API_KEY = "SMALLEST_API_KEY";
2 
3 async function streamMicrophone() {
4   // Get microphone access
5   const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
6   const audioContext = new AudioContext({ sampleRate: 16000 });
7   const source = audioContext.createMediaStreamSource(stream);
8   
9   // Create script processor for audio chunks
10   const processor = audioContext.createScriptProcessor(4096, 1, 1);
11   
12   const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text");
13   url.searchParams.append("language", "en");
14   url.searchParams.append("encoding", "linear16");
15   url.searchParams.append("sample_rate", "16000");
16   
17   const ws = new WebSocket(url.toString());
18   
19   ws.onopen = () => {
20     console.log("Connected, starting microphone stream");
21     
22     processor.onaudioprocess = (e) => {
23       const inputData = e.inputBuffer.getChannelData(0);
24       // Convert Float32Array to Int16Array
25       const int16Data = new Int16Array(inputData.length);
26       for (let i = 0; i < inputData.length; i++) {
27         int16Data[i] = Math.max(-32768, Math.min(32767, inputData[i] * 32768));
28       }
29       ws.send(int16Data.buffer);
30     };
31     
32     source.connect(processor);
33     processor.connect(audioContext.destination);
34   };
35   
36   let fullTranscript = "";
37 
38   ws.onmessage = (event) => {
39     const message = JSON.parse(event.data);
40     if (message.is_final) {
41       console.log("Transcript:", message.transcript);
42       fullTranscript += message.transcript ?? "";
43     }
44     if (message.is_last) {
45       console.log("Full Transcript:", fullTranscript);
46       ws.close();
47     }
48   };
49 
50   // Stop streaming after 30 seconds (example)
51   setTimeout(() => {
52     processor.disconnect();
53     source.disconnect();
54     stream.getTracks().forEach(track => track.stop());
55     ws.send(JSON.stringify({ type: "close_stream" }));
56   }, 30000);
57 }
58 
59 // Start streaming
60 streamMicrophone().catch(console.error);

Handling Responses

The WebSocket API sends JSON messages with the following structure:

1 {
2   "session_id": "sess_12345abcde",
3   "transcript": "Hello, how are you?",
4   "is_final": true,
5   "is_last": false,
6   "language": "en",
7   "word_timestamps": [
8     {
9       "word": "Hello",
10       "start": 0.0,
11       "end": 0.5
12     }
13   ]
14 }

Key Response Fields

is_final: false indicates a partial/interim transcript; true indicates a final transcript
is_last: true when the session is complete
transcript: Current segment text. Concatenate each is_final=true value to build a session-level transcript on the client.
word_timestamps: Only included when word_timestamps=true in query params

Browser

No additional dependencies required - uses native WebSocket API.

Error Handling

Always implement proper error handling for production use:

1 ws.onerror = (error) => {
2   console.error("WebSocket error:", error);
3   // Implement retry logic or user notification
4 };
5 
6 ws.onclose = (event) => {
7   if (event.code !== 1000) { // Not a normal closure
8     console.error(`Unexpected closure: ${event.code} - ${event.reason}`);
9     // Implement reconnection logic
10   }
11 };