Code Examples

View as Markdown

This guide contains complete examples demonstrating real-time audio transcription for various use cases in different programming languages.

Prerequisites

Python

$pip install websockets

Node.js

$npm install ws

Python Example

This example shows how to stream audio from a file and receive real-time transcriptions:

1import asyncio
2import websockets
3import json
4import os
5import requests
6from urllib.parse import urlencode
7
8BASE_WS_URL = "wss://api.smallest.ai/waves/v1/pulse/get_text"
9SAMPLE_URL = (
10 "https://github.com/smallest-inc/cookbook/raw/main/"
11 "speech-to-text/getting-started/samples/audio.wav"
12)
13params = {
14 "language": "en",
15 "encoding": "linear16",
16 "sample_rate": "24000", # must match the source audio sample rate
17 "word_timestamps": "true",
18}
19WS_URL = f"{BASE_WS_URL}?{urlencode(params)}"
20
21API_KEY = os.environ["SMALLEST_API_KEY"]
22
23
24async def stream_audio():
25 headers = {"Authorization": f"Bearer {API_KEY}"}
26
27 async with websockets.connect(WS_URL, additional_headers=headers) as ws:
28 print("Connected to STT WebSocket")
29
30 # Download sample audio (or replace with your own bytes)
31 audio_bytes = requests.get(SAMPLE_URL).content
32 chunk_size = 4096
33 offset = 0
34
35 print(f"Streaming {len(audio_bytes)} bytes")
36
37 async def send_chunks():
38 nonlocal offset
39 while offset < len(audio_bytes):
40 chunk = audio_bytes[offset: offset + chunk_size]
41 await ws.send(chunk)
42 offset += chunk_size
43 await asyncio.sleep(0.05) # 50ms delay between chunks
44
45 print("Finished sending audio, closing stream...")
46 await ws.send(json.dumps({"type": "close_stream"}))
47
48 sender = asyncio.create_task(send_chunks())
49
50 full_transcript = ""
51 try:
52 async for message in ws:
53 try:
54 data = json.loads(message)
55 print("Received:", json.dumps(data, indent=2))
56
57 # Handle partial transcripts
58 if not data.get("is_final"):
59 print(f"Partial: {data.get('transcript')}")
60 else:
61 print(f"Final: {data.get('transcript')}")
62 full_transcript += data.get("transcript", "") or ""
63
64 if data.get("is_last"):
65 print("Transcription complete!")
66 print(f"Full Transcript: {full_transcript}")
67 break
68 except json.JSONDecodeError:
69 print("Received raw:", message)
70 except websockets.ConnectionClosed as e:
71 print(f"Connection closed: {e.code} - {e.reason}")
72
73 await sender
74
75if __name__ == "__main__":
76 asyncio.run(stream_audio())

Node.js Example

This example demonstrates real-time transcription using the ws library:

1const WebSocket = require("ws");
2const fs = require("fs");
3
4const API_KEY = process.env.SMALLEST_API_KEY;
5const AUDIO_FILE = "path/to/audio.wav";
6
7const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text");
8url.searchParams.append("language", "en");
9url.searchParams.append("encoding", "linear16");
10url.searchParams.append("sample_rate", "16000");
11url.searchParams.append("word_timestamps", "true");
12
13const ws = new WebSocket(url.toString(), {
14 headers: {
15 Authorization: `Bearer ${API_KEY}`,
16 },
17});
18
19ws.on("open", () => {
20 console.log("Connected to STT WebSocket");
21
22 const audioBuffer = fs.readFileSync(AUDIO_FILE);
23 const chunkSize = 4096;
24 let offset = 0;
25
26 const sendChunk = () => {
27 if (offset >= audioBuffer.length) {
28 console.log("Finished sending audio, closing stream...");
29 ws.send(JSON.stringify({ type: "close_stream" }));
30 return;
31 }
32
33 const chunk = audioBuffer.slice(offset, offset + chunkSize);
34 ws.send(chunk);
35 offset += chunkSize;
36
37 setTimeout(sendChunk, 50); // 50ms delay between chunks
38 };
39
40 sendChunk();
41});
42
43let fullTranscript = "";
44
45ws.on("message", (data) => {
46 try {
47 const message = JSON.parse(data.toString());
48 console.log("Received:", JSON.stringify(message, null, 2));
49
50 // Handle partial transcripts
51 if (!message.is_final) {
52 console.log(`Partial: ${message.transcript}`);
53 } else {
54 console.log(`Final: ${message.transcript}`);
55 fullTranscript += message.transcript ?? "";
56
57 if (message.is_last) {
58 console.log("Transcription complete!");
59 console.log(`Full Transcript: ${fullTranscript}`);
60 ws.close();
61 }
62 }
63 } catch (error) {
64 console.error("Error parsing message:", error);
65 }
66});
67
68ws.on("error", (error) => {
69 console.error("WebSocket error:", error.message);
70});
71
72ws.on("close", (code, reason) => {
73 console.log(`Connection closed: ${code} - ${reason.toString()}`);
74});

Browser JavaScript Example

This example shows how to stream audio from a file input in the browser:

1const API_KEY = "SMALLEST_API_KEY";
2
3async function transcribeAudio(audioFile) {
4 const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text");
5 url.searchParams.append("language", "en");
6 url.searchParams.append("encoding", "linear16");
7 url.searchParams.append("sample_rate", "16000");
8 url.searchParams.append("word_timestamps", "true");
9
10 const ws = new WebSocket(url.toString());
11
12 ws.onopen = async () => {
13 console.log("Connected to STT WebSocket");
14
15 const arrayBuffer = await audioFile.arrayBuffer();
16 const chunkSize = 4096;
17 let offset = 0;
18
19 const sendChunk = () => {
20 if (offset >= arrayBuffer.byteLength) {
21 console.log("Finished sending audio, closing stream...");
22 ws.send(JSON.stringify({ type: "close_stream" }));
23 return;
24 }
25
26 const chunk = arrayBuffer.slice(offset, offset + chunkSize);
27 ws.send(chunk);
28 offset += chunkSize;
29
30 setTimeout(sendChunk, 50); // 50ms delay between chunks
31 };
32
33 sendChunk();
34 };
35
36 let fullTranscript = "";
37
38 ws.onmessage = (event) => {
39 try {
40 const message = JSON.parse(event.data);
41 console.log("Received:", message);
42
43 // Update UI with transcript
44 if (message.is_final) {
45 updateTranscript(message.transcript);
46 fullTranscript += message.transcript ?? "";
47 } else {
48 updatePartialTranscript(message.transcript);
49 }
50
51 if (message.is_last) {
52 console.log("Transcription complete!");
53 console.log("Full Transcript:", fullTranscript);
54 ws.close();
55 }
56 } catch (error) {
57 console.error("Error parsing message:", error);
58 }
59 };
60
61 ws.onerror = (error) => {
62 console.error("WebSocket error:", error);
63 };
64
65 ws.onclose = (event) => {
66 console.log(`Connection closed: ${event.code}`);
67 };
68}
69
70// Example usage with file input
71const fileInput = document.getElementById("audioFile");
72fileInput.addEventListener("change", (e) => {
73 const file = e.target.files[0];
74 if (file) {
75 transcribeAudio(file);
76 }
77});

Streaming from Microphone

Here’s an example of streaming live audio from a microphone in the browser:

1const API_KEY = "SMALLEST_API_KEY";
2
3async function streamMicrophone() {
4 // Get microphone access
5 const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
6 const audioContext = new AudioContext({ sampleRate: 16000 });
7 const source = audioContext.createMediaStreamSource(stream);
8
9 // Create script processor for audio chunks
10 const processor = audioContext.createScriptProcessor(4096, 1, 1);
11
12 const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text");
13 url.searchParams.append("language", "en");
14 url.searchParams.append("encoding", "linear16");
15 url.searchParams.append("sample_rate", "16000");
16
17 const ws = new WebSocket(url.toString());
18
19 ws.onopen = () => {
20 console.log("Connected, starting microphone stream");
21
22 processor.onaudioprocess = (e) => {
23 const inputData = e.inputBuffer.getChannelData(0);
24 // Convert Float32Array to Int16Array
25 const int16Data = new Int16Array(inputData.length);
26 for (let i = 0; i < inputData.length; i++) {
27 int16Data[i] = Math.max(-32768, Math.min(32767, inputData[i] * 32768));
28 }
29 ws.send(int16Data.buffer);
30 };
31
32 source.connect(processor);
33 processor.connect(audioContext.destination);
34 };
35
36 let fullTranscript = "";
37
38 ws.onmessage = (event) => {
39 const message = JSON.parse(event.data);
40 if (message.is_final) {
41 console.log("Transcript:", message.transcript);
42 fullTranscript += message.transcript ?? "";
43 }
44 if (message.is_last) {
45 console.log("Full Transcript:", fullTranscript);
46 ws.close();
47 }
48 };
49
50 // Stop streaming after 30 seconds (example)
51 setTimeout(() => {
52 processor.disconnect();
53 source.disconnect();
54 stream.getTracks().forEach(track => track.stop());
55 ws.send(JSON.stringify({ type: "close_stream" }));
56 }, 30000);
57}
58
59// Start streaming
60streamMicrophone().catch(console.error);

Handling Responses

The WebSocket API sends JSON messages with the following structure:

1{
2 "session_id": "sess_12345abcde",
3 "transcript": "Hello, how are you?",
4 "is_final": true,
5 "is_last": false,
6 "language": "en",
7 "word_timestamps": [
8 {
9 "word": "Hello",
10 "start": 0.0,
11 "end": 0.5
12 }
13 ]
14}

Key Response Fields

  • is_final: false indicates a partial/interim transcript; true indicates a final transcript
  • is_last: true when the session is complete
  • transcript: Current segment text. Concatenate each is_final=true value to build a session-level transcript on the client.
  • word_timestamps: Only included when word_timestamps=true in query params

Browser

No additional dependencies required - uses native WebSocket API.

Error Handling

Always implement proper error handling for production use:

1ws.onerror = (error) => {
2 console.error("WebSocket error:", error);
3 // Implement retry logic or user notification
4};
5
6ws.onclose = (event) => {
7 if (event.code !== 1000) { // Not a normal closure
8 console.error(`Unexpected closure: ${event.code} - ${event.reason}`);
9 // Implement reconnection logic
10 }
11};