Hi @Thomas Bauer,
Thank you for reaching out to Miorosoft Q& forum!
To achieve sentence-by-sentence transcription in Azure Speech SDK, use the SpeechRecognizerFromFile
class with the following setup:
- Segmentation Strategy: Sets
speech_config.set_property(speechsdk.PropertyId.Speech_SegmentationStrategy, "Semantic")
to leverage semantic segmentation. - Sentence-by-Sentence Output: A custom
recognized_handler
splits sentences based on punctuation (periods, question marks, exclamations), ensuring each sentence is printed as it’s fully recognized.
Here’s a complete code example implementing this approach:
import azure.cognitiveservices.speech as speechsdk
import time
import re
class SpeechRecognizerFromFile:
def __init__(self, subscription_key, region, audio_file):
# Initialize speech configuration
self.speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=region)
self.speech_config.speech_recognition_language = "en-US"
# Set segmentation strategy to 'Semantic'
self.speech_config.set_property(speechsdk.PropertyId.Speech_SegmentationStrategy, "Semantic")
# Create audio configuration using the provided audio file
audio_config = speechsdk.audio.AudioConfig(filename=audio_file)
# Initialize the speech recognizer with the audio configuration
self.speech_recognizer = speechsdk.SpeechRecognizer(speech_config=self.speech_config, audio_config=audio_config)
# Initialize the state for recognizing speech continuously
self.done = False
def stop_cb(self, evt):
"""Callback function to stop continuous recognition."""
print(f"CLOSING on {evt}")
self.speech_recognizer.stop_continuous_recognition()
self.done = True
def recognized_handler(self, evt):
"""Callback for final recognition results."""
if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
# Split text by periods, question marks, or exclamation marks followed by a space
sentences = re.split(r'(?<=[.!?])\s+', evt.result.text)
for sentence in sentences:
print(f"{sentence.strip()}")
elif evt.result.reason == speechsdk.ResultReason.NoMatch:
print("No speech could be recognized.")
elif evt.result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = evt.result.cancellation_details
print(f"Speech Recognition canceled: {cancellation_details.reason}")
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print(f"Error details: {cancellation_details.error_details}")
def start_recognition(self):
"""Start continuous speech recognition."""
# Connect events to handlers
self.speech_recognizer.recognized.connect(self.recognized_handler) # Only handle final recognized results
self.speech_recognizer.session_started.connect(lambda evt: print(f"SESSION STARTED: {evt}"))
self.speech_recognizer.session_stopped.connect(lambda evt: print(f"SESSION STOPPED: {evt}"))
self.speech_recognizer.canceled.connect(lambda evt: print(f"CANCELED: {evt}"))
# Connect the stop callback to stop recognition when needed
self.speech_recognizer.session_stopped.connect(self.stop_cb)
self.speech_recognizer.canceled.connect(self.stop_cb)
# Start continuous recognition
print("Starting continuous recognition...")
self.speech_recognizer.start_continuous_recognition()
# Keep the program running until 'done' is set to True
while not self.done:
time.sleep(0.5)
# Example usage
if __name__ == "__main__":
subscription_key = "SPEECH_KEY"
region = "SPEECH_REGION" # e.g., "eastus"
audio_file = r"C:\Users\XXXXXXXXXX\Downloads\Untitled.wav" # path to your audio file here
recognizer = SpeechRecognizerFromFile(subscription_key, region, audio_file)
recognizer.start_recognition()
Output:
I hope this helps. Still if you face any errors, do let us know will try to figure out the issue.
If this answers your query, do click Accept Answer
and Yes
for was this answer helpful.