Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions fern/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -358,9 +358,6 @@ navigation:
href: /docs/medical-scribe-best-practices
- link: Build a contact center application
href: /docs/contact-center-best-practices
- page: Apply LLMs to pre-recorded audio
path: pages/lemur/apply-llms-to-audio-files.mdx
slug: /apply-llms-to-audio-files
- page: Overview
path: pages/guides/pre-recorded.mdx
slug: /pre-recorded-audio
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ Next, we'll create a promise that will submit the file path for transcription. M
const getTranscript = (filePath) => new Promise((resolve, reject) => {
client.transcripts.transcribe({
audio: filePath,
language_detection: true
language_detection: true,
speech_models: ["universal-3-pro", "universal-2"]
})
.then(result => resolve(result))
.catch(error => reject(error));
Expand Down Expand Up @@ -124,6 +125,7 @@ const getTranscript = (filePath) => new Promise((resolve, reject) => {
client.transcripts.transcribe({
audio: filePath,
language_detection: true,
speech_models: ["universal-3-pro", "universal-2"],
})
.then(result => resolve(result))
.catch(error => reject(error));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import assemblyai as aai
import subprocess

aai.settings.api_key = "YOUR_API_KEY"
config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber()

def get_duration_ffprobe(file_path):
Expand Down Expand Up @@ -66,7 +67,7 @@ def check_audio_durations(file_path):

def transcribe(file):
print("Executing transcription as audio durations are consistent.")
transcript = transcriber.transcribe(file)
transcript = transcriber.transcribe(file, config)
print(transcript.text)

def transcode(input_file, output_file):
Expand Down Expand Up @@ -146,6 +147,7 @@ import assemblyai as aai
import subprocess

aai.settings.api_key = "YOUR_API_KEY"
config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber()
```

Expand Down Expand Up @@ -225,7 +227,7 @@ Define the `transcribe` function. This will run only when the duration is consis
```python
def transcribe(file):
print("Executing transcription as audio durations are consistent.")
transcript = transcriber.transcribe(file)
transcript = transcriber.transcribe(file, config)
print(transcript.text)
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,15 @@ const params = {
audio: audioUrl,
language_detection: true,
language_confidence_threshold: 0.8,
speech_models: ["universal-3-pro", "universal-2"],
// Add any other params
};
```

```python
transcriber = aai.Transcriber()
audio_url = ("https://example.org/audio.mp3")
config = aai.TranscriptionConfig(language_detection=True, language_confidence_threshold=0.8)
config = aai.TranscriptionConfig(language_detection=True, language_confidence_threshold=0.8, speech_models=["universal-3-pro", "universal-2"])
transcript = transcriber.transcribe(audio_url, config)
```

Expand Down Expand Up @@ -151,7 +152,7 @@ run(params);
if transcript.error:
if "below the requested confidence threshold value" in transcript.error:
print(f"{transcript.error}. Running transcript again with language set to '{default_language}'.")
new_config = aai.TranscriptionConfig(language_code=default_language)
new_config = aai.TranscriptionConfig(language_code=default_language, speech_models=["universal-3-pro", "universal-2"])
transcript = transcriber.transcribe(audio_url, new_config)
print(f"Transcript ID: {transcript.id}")
print(transcript.text)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ aai.settings.api_key = "YOUR_API_KEY"
batch_folder = "audio"
transcription_result_folder = "transcripts"

config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber()

def transcribe_audio(audio_file):
transcriber = aai.Transcriber()
transcript = transcriber.transcribe(os.path.join(batch_folder, audio_file))
transcript = transcriber.transcribe(os.path.join(batch_folder, audio_file), config)
if transcript.status == "completed":
with open(f"{transcription_result_folder}/{audio_file}.txt", "w") as f:
f.write(transcript.text)
Expand Down Expand Up @@ -77,8 +78,9 @@ Function to transcribe an audio file. Once the transcript is complete, a .txt fi

```python
def transcribe_audio(audio_file):
config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber()
transcript = transcriber.transcribe(os.path.join(batch_folder, audio_file))
transcript = transcriber.transcribe(os.path.join(batch_folder, audio_file), config)
if transcript.status == "completed":
with open(f"{transcription_result_folder}/{audio_file}.txt", "w") as f:
f.write(transcript.text)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Next create the transcript with your audio file, either via local audio file or
```javascript
const transcript = await client.transcripts.transcribe({
audio_url: "./sample.mp4",
speech_models: ["universal-3-pro", "universal-2"],
});
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ assemblyai.settings.api_key = "API_KEY_HERE"


def transcribe(audio_path):
config = assemblyai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = assemblyai.Transcriber()
transcript = transcriber.transcribe(audio_path)
transcript = transcriber.transcribe(audio_path, config)

if transcript.status == assemblyai.TranscriptStatus.error:
print(f"Transcription failed: {transcript.error}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,14 @@ updated_path = convert_to_mono_if_duplicate(original_path)
# Check if we modified the file and thus appended _mono to it.
if updated_path != original_path:
# Use the default configuration, which will treat the file as mono, which it now is.
transcriber = aai.Transcriber()
config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber(config=config)

# Transcribe our new mono file.
print(transcriber.transcribe(updated_path).text)
else:
# Submit the file as Multi Channel if the content wasn't the same.
config = aai.TranscriptionConfig(multichannel=True)
config = aai.TranscriptionConfig(multichannel=True, speech_models=["universal-3-pro", "universal-2"])

# Load the config into our Transcriber.
transcriber = aai.Transcriber(config=config)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def handle_error_transcription(audio_url, transcriber, config, retries=1, wait_t

audio_url = "YOUR_AUDIO_URL"
transcriber = aai.Transcriber()
config = aai.TranscriptionConfig()
config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])

transcript = handle_error_transcription(audio_url, transcriber, config, retries=1, wait_time=5)
if transcript:
Expand Down Expand Up @@ -99,7 +99,7 @@ Create a `Transcriber` object and specify features in `TranscriptionConfig`.

```python
transcriber = aai.Transcriber()
config = aai.TranscriptionConfig()
config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
```

Call the function to handle transcription with error handling. Specify number of retries and wait time. Return the transcribed text if transcription is successful.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def transcribe_with_upload_retry(file_path, retries=3, delay=5):
for attempt in range(retries):
try:
# Attempt to transcribe the file
config = aai.TranscriptionConfig(speaker_labels=True)
config = aai.TranscriptionConfig(speaker_labels=True, speech_models=["universal-3-pro", "universal-2"])
transcript = transcriber.transcribe(file_path, config)
return transcript

Expand Down Expand Up @@ -76,7 +76,7 @@ def transcribe_with_upload_retry(file_path, retries=3, delay=5):
for attempt in range(retries):
try:
# Attempt to transcribe the file
config = aai.TranscriptionConfig(speaker_labels=True)
config = aai.TranscriptionConfig(speaker_labels=True, speech_models=["universal-3-pro", "universal-2"])
transcript = transcriber.transcribe(file_path, config)
return transcript

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ aai.settings.api_key = "YOUR_ASSEMBLYAI_API_KEY"
EASYCRON_API_TOKEN = "YOUR_EASYCRON_API_KEY"

# Create transcriber instance and transcribe audio
config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber()
transcript = transcriber.transcribe('https://assembly.ai/sports_injuries.mp3')
transcript = transcriber.transcribe('https://assembly.ai/sports_injuries.mp3', config)

# Get the transcript ID
transcript_id = transcript.id
Expand Down Expand Up @@ -80,10 +81,11 @@ aai.settings.api_key = f"{YOUR_API_KEY}"
```

```python
config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber()

# this is just an example file
transcript = transcriber.transcribe('https://assembly.ai/sports_injuries.mp3')
transcript = transcriber.transcribe('https://assembly.ai/sports_injuries.mp3', config)
```

Store the transcript ID
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import assemblyai as aai
# SETTINGS
aai.settings.api_key = "YOUR-API-KEY"
filename = "YOUR-FILE-NAME"
transcriber = aai.Transcriber(config=aai.TranscriptionConfig(speaker_labels=True))
transcriber = aai.Transcriber(config=aai.TranscriptionConfig(speaker_labels=True, speech_models=["universal-3-pro", "universal-2"]))
transcript = transcriber.transcribe(filename)

# Maximum number of words per subtitle
Expand Down Expand Up @@ -97,7 +97,7 @@ import assemblyai as aai
# SETTINGS
aai.settings.api_key = "YOUR-API-KEY"
filename = "YOUR-FILE-NAME"
transcriber = aai.Transcriber(config=aai.TranscriptionConfig(speaker_labels=True))
transcriber = aai.Transcriber(config=aai.TranscriptionConfig(speaker_labels=True, speech_models=["universal-3-pro", "universal-2"]))
transcript = transcriber.transcribe(filename)

# Maximum number of words per subtitle
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import matplotlib.pyplot as plt

aai.settings.api_key = "YOUR_API_KEY"

config = aai.TranscriptionConfig(speaker_labels=True)
config = aai.TranscriptionConfig(speaker_labels=True, speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber()
transcript = transcriber.transcribe("./my-audio.mp3", config)
utterances = transcript.utterances
Expand Down Expand Up @@ -65,7 +65,7 @@ aai.settings.api_key = "YOUR_API_KEY"
Create a `TranscriptionConfig` object and set speaker labels to `True`.

```python
config = aai.TranscriptionConfig(speaker_labels=True)
config = aai.TranscriptionConfig(speaker_labels=True, speech_models=["universal-3-pro", "universal-2"])
```

Create a `Transcriber` object.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ import assemblyai as aai

aai.settings.api_key = "YOUR-API-KEY"

config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber()

transcript = transcriber.transcribe("./my-audio.mp3")
transcript = transcriber.transcribe("./my-audio.mp3", config)

def second_to_timecode(x: float) -> str:
hour, x = divmod(x, 3600)
Expand Down Expand Up @@ -75,19 +76,20 @@ aai.settings.api_key = "YOUR-API-KEY"
Create a Transcriber object.

```python
config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber()
```

Use the Transcriber object's transcribe method and pass in the audio file's path as a parameter. The transcribe method saves the results of the transcription to the Transcriber object's transcript attribute.

```python
transcript = transcriber.transcribe("./my-audio.mp3")
transcript = transcriber.transcribe("./my-audio.mp3", config)
```

Alternatively, you can pass in the URL of the publicly accessible audio file on the internet.

```python
transcript = transcriber.transcribe("https://storage.googleapis.com/aai-docs-samples/espn.m4a")
transcript = transcriber.transcribe("https://storage.googleapis.com/aai-docs-samples/espn.m4a", config)
```

Define a function that converts seconds to timecodes
Expand Down
5 changes: 3 additions & 2 deletions fern/pages/guides/cookbooks/core-transcription/subtitles.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,20 @@ aai.settings.api_key = "YOUR_API_KEY"
Create a `Transcriber` object.

```python
config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber()
```

Use the `Transcriber` object's `transcribe` method and pass in the audio file's path as a parameter. The `transcribe` method saves the results of the transcription to the `Transcriber` object's `transcript` attribute.

```python
transcript = transcriber.transcribe("./my-audio.mp3")
transcript = transcriber.transcribe("./my-audio.mp3", config)
```

Alternatively, you can pass in the URL of the publicly accessible audio file on the internet.

```python
transcript = transcriber.transcribe("https://example.org/audio.mp3")
transcript = transcriber.transcribe("https://example.org/audio.mp3", config)
```

Export SRT subtitles with the `export_subtitles_srt` method.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def calculate_talk_listen_ratios(transcript):

transcriber = aai.Transcriber()
audio_url = ("YOUR_AUDIO_URL")
config = aai.TranscriptionConfig(speaker_labels=True)
config = aai.TranscriptionConfig(speaker_labels=True, speech_models=["universal-3-pro", "universal-2"])
transcript = transcriber.transcribe(audio_url, config)

talk_listen_stats = calculate_talk_listen_ratios(transcript)
Expand Down Expand Up @@ -128,7 +128,7 @@ Define a `transcriber`, an `audio_url` set to a link to the audio file (replace
```python
transcriber = aai.Transcriber()
audio_url = ("https://api.assemblyai-solutions.com/storage/v1/object/public/dual-channel-phone-data/Fisher_Call_Centre/audio05851.wav")
config = aai.TranscriptionConfig(speaker_labels=True)
config = aai.TranscriptionConfig(speaker_labels=True, speech_models=["universal-3-pro", "universal-2"])
transcript = transcriber.transcribe(audio_url, config)

talk_listen_stats = calculate_talk_listen_ratios(transcript)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ pc = Pinecone(api_key="PINECONE_KEY_HERE")
aai.settings.api_key = "AAI_KEY_HERE"

def transcribe(file_url):
config = aai.TranscriptionConfig(speaker_labels=True) # Speaker labels must be enabled for this Cookbook.
config = aai.TranscriptionConfig(speaker_labels=True, speech_models=["universal-3-pro", "universal-2"]) # Speaker labels must be enabled for this Cookbook.

transcriber = aai.Transcriber(config=config)

Expand Down Expand Up @@ -348,7 +348,7 @@ aai.settings.api_key = "AAI_KEY_HERE"


def transcribe(file_url):
config = aai.TranscriptionConfig(speaker_labels=True) # Speaker labels must be enabled for this Cookbook.
config = aai.TranscriptionConfig(speaker_labels=True, speech_models=["universal-3-pro", "universal-2"]) # Speaker labels must be enabled for this Cookbook.

transcriber = aai.Transcriber(config=config)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ aai.settings.api_key = "<YOUR-API-KEY>"

audio_file = "https://assembly.ai/chinese-interview.mp4"

config = aai.TranscriptionConfig(language_code="zh")
config = aai.TranscriptionConfig(language_code="zh", speech_models=["universal-3-pro", "universal-2"])

transcript = aai.Transcriber(config=config).transcribe(audio_file)

Expand Down Expand Up @@ -59,7 +59,7 @@ Specify your audio source and create a configuration for Chinese language transc
```python
audio_file = "https://assembly.ai/chinese-interview.mp4"

config = aai.TranscriptionConfig(language_code="zh")
config = aai.TranscriptionConfig(language_code="zh", speech_models=["universal-3-pro", "universal-2"])

transcript = aai.Transcriber(config=config).transcribe(audio_file)
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ headers = {"authorization": assembly_key, "content-type": "application/json"}
transcript_endpoint = "https://api.assemblyai.com/v2/transcript"

# Use the presigned URL as the `audio_url` in the POST request.
json = {"audio_url": p_url}
json = {"audio_url": p_url, "speech_models": ["universal-3-pro", "universal-2"]}

# Queue the audio file for transcription with a POST request.
post_response = requests.post(transcript_endpoint, json=json, headers=headers)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ def transcribe_youtube_video(video_url: str, api_key: str) -> str:
aai.settings.api_key = api_key

# Transcribe the downloaded audio file
config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber()
transcript = transcriber.transcribe(f"{video_id}.m4a")
transcript = transcriber.transcribe(f"{video_id}.m4a", config)

return transcript.text

Expand Down Expand Up @@ -101,8 +102,9 @@ import assemblyai as aai

aai.settings.api_key = "YOUR_API_KEY"

config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber()
transcript = transcriber.transcribe("wtolixa9XTg.m4a")
transcript = transcriber.transcribe("wtolixa9XTg.m4a", config)
print(transcript.text)
```

Expand Down Expand Up @@ -138,6 +140,7 @@ import assemblyai as aai

aai.settings.api_key = "YOUR_API_KEY"

config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber()
transcript = transcriber.transcribe("wtolixa9XTg.m4a")
transcript = transcriber.transcribe("wtolixa9XTg.m4a", config)
```
Loading
Loading