speech to text twilio telugu transcript is not coming empty transcript and intitally system is not responding

MR. JACK!! 0 Reputation points
2024-11-04T06:03:08.5633333+00:00
  async def receive_json(self, text_data):
        try:
            event = text_data.get('event')
            if event == 'connected':
                logger.info("WebSocket connected event received")
            elif event == 'start':
                logger.info("Start event received, initializing stream")
            elif event == 'media':
                audio_data = base64.b64decode(text_data['media']['payload'])
                self.speech_recognizer.push_audio(audio_data)
            elif event == 'close':
                logger.info("Close event received")
                await self.close()
        except json.JSONDecodeError:
            logger.error("Error decoding JSON message")
        except KeyError:
            logger.error("Missing key in JSON message")
        except Exception as e:
            logger.error(f"Error processing message: {e}")



class AzureSpeechRecognizer:
    def __init__(self):

        self.key = "F9ybMmlThSNVFodKolaXoot9AESnd53g6hTujoG9HoLi7yolSGBoJQQJ99AJACYeBjFXJ3w3AAAYACOGPTJo"
        self.service_region =  "eastus"
        
        audio_format = speechsdk.audio.AudioStreamFormat(samples_per_second=8000, bits_per_sample=16, channels=1, wave_stream_format=speechsdk.AudioStreamWaveFormat.MULAW)
        self.stream = speechsdk.audio.PushAudioInputStream(stream_format=audio_format)
        self.audio_config = speechsdk.audio.AudioConfig(stream=self.stream)

        # Configure recognition timeouts
        initial_silence_timeout_ms = 0
        end_silence_timeout_ms = 0
        babble_timeout_ms = 0
        end_silence_timeout_ambiguous_ms = 0
        endpoint_template = ("wss://{}.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1"
                             "?initialSilenceTimeoutMs={}&endSilenceTimeoutMs={}&babbleTimeoutMs={}&endSilenceTimeoutAmbiguousMs={}")
        
        endpoint = endpoint_template.format(
            self.service_region,
            initial_silence_timeout_ms,
            end_silence_timeout_ms,
            babble_timeout_ms,
            end_silence_timeout_ambiguous_ms
        )


        # Configure speech recognizer
        self.speech_config = speechsdk.SpeechConfig(subscription=self.key, region=self.service_region)
        self.speech_config.set_property(speechsdk.PropertyId.Speech_LogFilename, "/home/vassar/Desktop/Telugu/myproject/anil.log")
        self.speech_config.enable_audio_logging()
        self.speech_config.speech_recognition_language = 'te-IN'
        self.speech_config.endpoint_id = endpoint
        self.is_audio_logging_enabled = self.speech_config.get_property(property_id=speechsdk.PropertyId.SpeechServiceConnection_EnableAudioLogging)
        self.speech_recognizer = speechsdk.SpeechRecognizer(speech_config=self.speech_config, audio_config=self.audio_config)

        self.speech_recognizer.recognizing.connect(self.recognizing_cb)
        self.speech_recognizer.recognized.connect(self.recognized_cb)
        self.speech_recognizer.session_stopped.connect(self.session_stopped_cb)
        self.speech_recognizer.canceled.connect(self.canceled_cb)

        # Start recognition thread
        self.recognition_done = threading.Event()
        self.recognize_thread = threading.Thread(target=self.recognize_audio)
        self.recognize_thread.start()

    def session_stopped_cb(self, evt):
        logger.info("Session stopped")
        self.recognition_done.set()

    def canceled_cb(self, evt):
        logger.error(f"Recognition canceled: {evt.reason}")
        self.recognition_done.set()

    def recognizing_cb(self, evt):
        logger.debug(f"Recognizing: {evt.result.text}")

    def recognized_cb(self, evt):
        # Print and log recognized transcript for debugging
        transcription = evt.result.text
        print(f"RECOGNIZED: {transcription}")
        logger.info(f"Transcription: {transcription}")

    def push_audio(self, audio_data):
        self.stream.write(audio_data)

    def recognize_audio(self):
        if not self.speech_recognizer:
            print(self.speech_recognizer)
            logger.error("Speech recognizer is not initialized.")
            return

        logger.info("Starting continuous recognition")

        try:
            self.speech_recognizer.start_continuous_recognition()
            self.recognition_done.wait()
            self.speech_recognizer.stop_continuous_recognition()
        except Exception as e:
            logger.error(f"Error during continuous recognition: {e}")

i am getting transcript when i tried with microphone for telugu streaming it gave me a valid transcript .

so telugu is avaialble for realtime streaming.

now i am trying to send twilio audio to the stt

initially the user is speaking but for a 15 second the systme is not giving transcript....

for english version

after 15 seconds it is giving a valid transcript....

but telugu i am geting no transcript.empty transcript.....always...

and system is not respodning..

i need telugu to give valid transcript and system should respond properly from the beggining stage to the end

Azure AI Speech
Azure AI Speech
An Azure service that integrates speech processing into apps and services.
1,772 questions
Azure Functions
Azure Functions
An Azure service that provides an event-driven serverless compute platform.
5,095 questions
SAP HANA on Azure Large Instances
SAP HANA on Azure Large Instances
Microsoft branding terminology for an Azure offer to run HANA instances on SAP HANA hardware deployed in Large Instance stamps in different Azure regions.
124 questions
{count} votes

Your answer

Answers can be marked as Accepted Answers by the question author, which helps users to know the answer solved the author's problem.