Code Examples

View as Markdown

This guide contains complete examples demonstrating real-time audio transcription for various use cases in different programming languages.

Prerequisites

Python

$pip install websockets

Node.js

$npm install ws

Python Example

This example shows how to stream audio from a file and receive real-time transcriptions:

1import asyncio
2import websockets
3import json
4import os
5import pathlib
6from urllib.parse import urlencode
7
8BASE_WS_URL = "wss://api.smallest.ai/waves/v1/pulse/get_text"
9params = {
10 "language": "en",
11 "encoding": "linear16",
12 "sample_rate": "16000",
13 "word_timestamps": "true"
14}
15WS_URL = f"{BASE_WS_URL}?{urlencode(params)}"
16
17API_KEY = os.getenv("SMALLEST_API_KEY")
18AUDIO_FILE = "path/to/audio.wav"
19
20async def stream_audio():
21 headers = {
22 "Authorization": f"Bearer {API_KEY}"
23 }
24
25 async with websockets.connect(WS_URL, additional_headers=headers) as ws:
26 print("Connected to STT WebSocket")
27
28 audio_bytes = pathlib.Path(AUDIO_FILE).read_bytes()
29 chunk_size = 4096
30 offset = 0
31
32 print(f"Streaming {len(audio_bytes)} bytes from {os.path.basename(AUDIO_FILE)}")
33
34 async def send_chunks():
35 nonlocal offset
36 while offset < len(audio_bytes):
37 chunk = audio_bytes[offset: offset + chunk_size]
38 await ws.send(chunk)
39 offset += chunk_size
40 await asyncio.sleep(0.05) # 50ms delay between chunks
41
42 print("Finished sending audio, sending end signal...")
43 await ws.send(json.dumps({"type": "finalize"}))
44
45 sender = asyncio.create_task(send_chunks())
46
47 try:
48 async for message in ws:
49 try:
50 data = json.loads(message)
51 print("Received:", json.dumps(data, indent=2))
52
53 # Handle partial transcripts
54 if not data.get("is_final"):
55 print(f"Partial: {data.get('transcript')}")
56 else:
57 print(f"Final: {data.get('transcript')}")
58 print(f"Full transcript: {data.get('full_transcript')}")
59
60 if data.get("is_last"):
61 print("Transcription complete!")
62 break
63 except json.JSONDecodeError:
64 print("Received raw:", message)
65 except websockets.ConnectionClosed as e:
66 print(f"Connection closed: {e.code} - {e.reason}")
67
68 await sender
69
70if __name__ == "__main__":
71 asyncio.run(stream_audio())

Node.js Example

This example demonstrates real-time transcription using the ws library:

1const WebSocket = require("ws");
2const fs = require("fs");
3
4const API_KEY = process.env.SMALLEST_API_KEY;
5const AUDIO_FILE = "path/to/audio.wav";
6
7const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text");
8url.searchParams.append("language", "en");
9url.searchParams.append("encoding", "linear16");
10url.searchParams.append("sample_rate", "16000");
11url.searchParams.append("word_timestamps", "true");
12
13const ws = new WebSocket(url.toString(), {
14 headers: {
15 Authorization: `Bearer ${API_KEY}`,
16 },
17});
18
19ws.on("open", () => {
20 console.log("Connected to STT WebSocket");
21
22 const audioBuffer = fs.readFileSync(AUDIO_FILE);
23 const chunkSize = 4096;
24 let offset = 0;
25
26 const sendChunk = () => {
27 if (offset >= audioBuffer.length) {
28 console.log("Finished sending audio, sending end signal...");
29 ws.send(JSON.stringify({ type: "finalize" }));
30 return;
31 }
32
33 const chunk = audioBuffer.slice(offset, offset + chunkSize);
34 ws.send(chunk);
35 offset += chunkSize;
36
37 setTimeout(sendChunk, 50); // 50ms delay between chunks
38 };
39
40 sendChunk();
41});
42
43ws.on("message", (data) => {
44 try {
45 const message = JSON.parse(data.toString());
46 console.log("Received:", JSON.stringify(message, null, 2));
47
48 // Handle partial transcripts
49 if (!message.is_final) {
50 console.log(`Partial: ${message.transcript}`);
51 } else {
52 console.log(`Final: ${message.transcript}`);
53 console.log(`Full transcript: ${message.full_transcript}`);
54
55 if (message.is_last) {
56 console.log("Transcription complete!");
57 ws.close();
58 }
59 }
60 } catch (error) {
61 console.error("Error parsing message:", error);
62 }
63});
64
65ws.on("error", (error) => {
66 console.error("WebSocket error:", error.message);
67});
68
69ws.on("close", (code, reason) => {
70 console.log(`Connection closed: ${code} - ${reason.toString()}`);
71});

Browser JavaScript Example

This example shows how to stream audio from a file input in the browser:

1const API_KEY = "SMALLEST_API_KEY";
2
3async function transcribeAudio(audioFile) {
4 const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text");
5 url.searchParams.append("language", "en");
6 url.searchParams.append("encoding", "linear16");
7 url.searchParams.append("sample_rate", "16000");
8 url.searchParams.append("word_timestamps", "true");
9
10 const ws = new WebSocket(url.toString());
11
12 ws.onopen = async () => {
13 console.log("Connected to STT WebSocket");
14
15 const arrayBuffer = await audioFile.arrayBuffer();
16 const chunkSize = 4096;
17 let offset = 0;
18
19 const sendChunk = () => {
20 if (offset >= arrayBuffer.byteLength) {
21 console.log("Finished sending audio");
22 ws.send(JSON.stringify({ type: "finalize" }));
23 return;
24 }
25
26 const chunk = arrayBuffer.slice(offset, offset + chunkSize);
27 ws.send(chunk);
28 offset += chunkSize;
29
30 setTimeout(sendChunk, 50); // 50ms delay between chunks
31 };
32
33 sendChunk();
34 };
35
36 ws.onmessage = (event) => {
37 try {
38 const message = JSON.parse(event.data);
39 console.log("Received:", message);
40
41 // Update UI with transcript
42 if (message.is_final) {
43 updateTranscript(message.full_transcript);
44 } else {
45 updatePartialTranscript(message.transcript);
46 }
47
48 if (message.is_last) {
49 console.log("Transcription complete!");
50 ws.close();
51 }
52 } catch (error) {
53 console.error("Error parsing message:", error);
54 }
55 };
56
57 ws.onerror = (error) => {
58 console.error("WebSocket error:", error);
59 };
60
61 ws.onclose = (event) => {
62 console.log(`Connection closed: ${event.code}`);
63 };
64}
65
66// Example usage with file input
67const fileInput = document.getElementById("audioFile");
68fileInput.addEventListener("change", (e) => {
69 const file = e.target.files[0];
70 if (file) {
71 transcribeAudio(file);
72 }
73});

Streaming from Microphone

Here’s an example of streaming live audio from a microphone in the browser:

1const API_KEY = "SMALLEST_API_KEY";
2
3async function streamMicrophone() {
4 // Get microphone access
5 const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
6 const audioContext = new AudioContext({ sampleRate: 16000 });
7 const source = audioContext.createMediaStreamSource(stream);
8
9 // Create script processor for audio chunks
10 const processor = audioContext.createScriptProcessor(4096, 1, 1);
11
12 const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text");
13 url.searchParams.append("language", "en");
14 url.searchParams.append("encoding", "linear16");
15 url.searchParams.append("sample_rate", "16000");
16
17 const ws = new WebSocket(url.toString());
18
19 ws.onopen = () => {
20 console.log("Connected, starting microphone stream");
21
22 processor.onaudioprocess = (e) => {
23 const inputData = e.inputBuffer.getChannelData(0);
24 // Convert Float32Array to Int16Array
25 const int16Data = new Int16Array(inputData.length);
26 for (let i = 0; i < inputData.length; i++) {
27 int16Data[i] = Math.max(-32768, Math.min(32767, inputData[i] * 32768));
28 }
29 ws.send(int16Data.buffer);
30 };
31
32 source.connect(processor);
33 processor.connect(audioContext.destination);
34 };
35
36 ws.onmessage = (event) => {
37 const message = JSON.parse(event.data);
38 if (message.is_final) {
39 console.log("Transcript:", message.full_transcript);
40 }
41 };
42
43 // Stop streaming after 30 seconds (example)
44 setTimeout(() => {
45 processor.disconnect();
46 source.disconnect();
47 stream.getTracks().forEach(track => track.stop());
48 ws.send(JSON.stringify({ type: "finalize" }));
49 ws.close();
50 }, 30000);
51}
52
53// Start streaming
54streamMicrophone().catch(console.error);

Handling Responses

The WebSocket API sends JSON messages with the following structure:

1{
2 "session_id": "sess_12345abcde",
3 "transcript": "Hello, how are you?",
4 "full_transcript": "Hello, how are you?",
5 "is_final": true,
6 "is_last": false,
7 "language": "en",
8 "word_timestamps": [
9 {
10 "word": "Hello",
11 "start": 0.0,
12 "end": 0.5
13 }
14 ]
15}

Key Response Fields

  • is_final: false indicates a partial/interim transcript; true indicates a final transcript
  • is_last: true when the session is complete
  • transcript: Current segment text
  • full_transcript: Accumulated text from the entire session
  • word_timestamps: Only included when word_timestamps=true in query params

Browser

No additional dependencies required - uses native WebSocket API.

Error Handling

Always implement proper error handling for production use:

1ws.onerror = (error) => {
2 console.error("WebSocket error:", error);
3 // Implement retry logic or user notification
4};
5
6ws.onclose = (event) => {
7 if (event.code !== 1000) { // Not a normal closure
8 console.error(`Unexpected closure: ${event.code} - ${event.reason}`);
9 // Implement reconnection logic
10 }
11};