Azure Document Intelligence - Mismatch Between Trained API Version and Response API Version
Hello,
I have trained a Custom Neural Model under Document Intelligence API Version 2023-07-31.
When performing inference via the API, the "apiVersion" field is not consistent with the trained API version. Inference returns a response with API version 2024-02-29-preview. The problem is consistent across custom neural models and prebuilt-layout. The issue persists when specifying a model version directly in the HTTP request.
Can anyone recreate this behavior, and could MS please advise on or fix this issue?
Test Case 1: Custom Neural Model
Model:
Code:
import os
import yaml
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
def load_azure_config(directory="."):
filename = "azure_config.yaml"
filepath = os.path.join(directory, filename)
with open(filepath, "r") as stream:
try:
azure_config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
return azure_config
def get_doc_client(AzureKeys):
key = AzureKeys["DocumentIntelligence"]["KEY_1"]
endpoint = AzureKeys["DocumentIntelligence"]["AZURE_ENDPOINT"]
return DocumentIntelligenceClient(endpoint, AzureKeyCredential(key))
def analyze_document(filepath, doc_client, model_id):
with open(filepath, "rb") as f:
poller = doc_client.begin_analyze_document(
model_id=model_id,
analyze_request=f,
content_type="application/octet-stream"
)
return poller.result()
# Main execution
if __name__ == "__main__":
# Load Azure configuration
AzureKeys = load_azure_config()['AzureKeys']
# Set up the Document Intelligence client
doc_client = get_doc_client(AzureKeys)
# Define the input file and model ID
input_file = r"C:\Users\xxxxxx\Downloads\AIAI2021 (1).pdf"
model_id = "test-response"
# Analyze the document
result = analyze_document(input_file, doc_client, model_id)
# Print the result (you can modify this to process the result as needed)
print(result.as_dict())
Response:
{'apiVersion': '2024-02-29-preview', 'modelId': 'test-response', 'stringIndexType': 'textElements', 'content': 'Goldsmiths Research Online\nCitation\nGoldsmiths Research Online (GRO) is the institutional research repository for Goldsmiths, University of London\nOlaniyan, Rapheal; Stamate, Daniel and Pu, Ida. 2021. \'A Two-Step Optimised BERT-Based NLP Algorithm for Extracting Sentiment from Financial News\' ... }
Test Case 2: Prebuilt Layout Model
Model:
prebuilt-layout version 2024-07-31
Code:
import os
import yaml
import requests
import time
def load_azure_config(directory="."):
"""
Loads Azure configuration from a YAML file.
"""
filename = "azure_config.yaml"
filepath = os.path.join(directory, filename)
with open(filepath, "r") as stream:
try:
azure_config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
return azure_config
def analyze_document_rest(filepath, azure_keys, model_id, api_version="2023-07-31"):
"""
Submits a document for analysis using the Azure Document Intelligence REST API and polls for results.
Args:
filepath (str): Path to the document file to analyze.
azure_keys (dict): Azure keys and endpoint information.
model_id (str): Document model ID (e.g., 'prebuilt-layout').
api_version (str): API version to use.
Returns:
dict: Analysis results as a dictionary, or None if an error occurred.
"""
endpoint = azure_keys["DocumentIntelligence"]["AZURE_ENDPOINT"]
key = azure_keys["DocumentIntelligence"]["KEY_1"]
# Correct the REST API URL
url = f"{endpoint}/formrecognizer/documentModels/{model_id}:analyze?api-version={api_version}"
headers = {
"Ocp-Apim-Subscription-Key": key,
"Content-Type": "application/octet-stream",
}
try:
# Read the file and send the request
with open(filepath, "rb") as file_data:
response = requests.post(url, headers=headers, data=file_data)
# Handle initial response
if response.status_code == 404:
print(
"Error 404: Endpoint or resource not found. Check your endpoint, model ID, or API version.")
return None
elif response.status_code != 202:
print(f"Error: {response.status_code}")
try:
print(response.json()) # Print error details if available
except Exception:
print(response.text) # Fallback if JSON decoding fails
return None
# Poll for the result using the operation-location header
operation_location = response.headers.get("Operation-Location")
if not operation_location:
print("Error: Operation-Location header not found.")
return None
print("Document analysis submitted. Polling for results...")
# Polling for result
while True:
poll_response = requests.get(operation_location, headers={
"Ocp-Apim-Subscription-Key": key})
if poll_response.status_code != 200:
print(f"Polling Error: {poll_response.status_code}")
print(poll_response.text)
return None
poll_data = poll_response.json()
status = poll_data.get("status")
if status == "succeeded":
print("Analysis succeeded.")
return poll_data
elif status == "failed":
print("Analysis failed.")
print(poll_data)
return None
# Wait and retry polling
print("Waiting for 5 seconds before retrying...")
time.sleep(5)
except Exception as e:
print(f"An error occurred: {e}")
return None
# Load Azure configuration
azure_keys = load_azure_config()["AzureKeys"]
# Define the input file and model ID
input_file = r"C:\Users\xxxxxxx\Downloads\AIAI2021 (1).pdf"
model_id = "prebuilt-layout"
# Specify the API version
api_version = "2023-07-31"
# Analyze the document using REST API
result = analyze_document_rest(input_file, azure_keys, model_id, api_version)
Response:
{'apiVersion': '2024-02-29-preview',
'modelId': 'prebuilt-layout',
'stringIndexType': 'textElements',
'content': 'Goldsmiths Research Online\nGoldsmiths '...}
1 answer
Sort by: Most helpful
-
Deleted
This answer has been deleted due to a violation of our Code of Conduct. The answer was manually reported or identified through automated detection before action was taken. Please refer to our Code of Conduct for more information.
Comments have been turned off. Learn more