"InternalServerError: Error code: 500 - {'error': {'code': 'InternalServerError', 'message': 'Backend returned unexpected response. Please contact Microsoft for help.'}}"
I am trying to connect to Llama-3-8B-instruct.
Different authentications give different errors but nothing goes through. Here is my code.
import os
import requests
from dotenv import load_dotenv
Load environment variables from .env
load_dotenv()
Corrected endpoint and deployment name
endpoint_url = os.getenv("ENDPOINT_URL")
deployment_name = os.getenv("DEPLOYMENT_NAME")
#deployment_name = "Meta-Llama-3-8B-Instruct"
api_key = os.getenv("API_KEY")
api_version = "2024-05-01-preview"
Construct the correct endpoint URL
endpoint = f"{endpoint_url}openai/deployments/{deployment_name}/chat/completions?api-version={api_version}"
Request headers
headers = {
"Content-Type": "application/json",
"api-key": api_key,
}
Define the conversation
conversation = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello, how are you?"}
]
Payload for the request
payload = {
"model": deployment_name, # Include the model name in the payload
"messages": conversation,
"temperature": 0.7,
"top_p": 0.95,
"max_tokens": 800
}
Send request to the LLM
try:
print(f"Sending request to endpoint: {endpoint}")
response = requests.post(endpoint, headers=headers, json=payload)
response.raise_for_status() # Raise error for bad HTTP status codes
# Extract and print the assistant's response
model_response = response.json()
assistant_content = model_response["choices"][0]["message"]["content"]
print(f"Assistant: {assistant_content}")
except requests.RequestException as e:
print(f"Failed to communicate with the LLM. Error: {e}")
if hasattr(e, 'response') and e.response is not None:
print(f"Response Content: {e.response.content}")