I'm trying to extract figure from a PDF document. Below is the code that I'm running:
import os
from dotenv import load_dotenv
import base64
import json
# Azure packages to import
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeOutputOption, AnalyzeResult, AnalyzeDocumentRequest
load_dotenv()
document_intelligence_client = DocumentIntelligenceClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
)
filepath = "input_data/zomato-annual-report-2024-short.pdf"
with open(filepath, "rb") as f:
poller = document_intelligence_client.begin_analyze_document(
"prebuilt-layout",
analyze_request=f,
output=[AnalyzeOutputOption.FIGURES],
content_type="application/octet-stream",
)
result: AnalyzeResult = poller.result()
operation_id = poller.details["operation_id"]
if result.figures:
for figure in result.figures:
if figure.id:
response = document_intelligence_client.get_analyze_result_figure(
model_id=result.model_id, result_id=operation_id, figure_id=figure.id
)
with open(f"{figure.id}.png", "wb") as writer:
writer.writelines(response)
else:
print("No figures found.")
I'm getting the below error. Not sure where I'm going wrong since I'm following the documentation as is: https://learn.microsoft.com/en-us/python/api/overview/azure/ai-documentintelligence-readme?view=azure-python-preview
I'm using the Azure AI Document Intelligence client library for Python - version 1.0.0b4
---------------------------------------------------------------------------
ResourceNotFoundError Traceback (most recent call last)
Cell In[66], line 4
2 for figure in result.figures:
3 if figure.id:
----> 4 response = document_intelligence_client.get_analyze_result_figure(
5 model_id=result.model_id, result_id=operation_id, figure_id=figure.id
6 )
7 with open(f"{figure.id}.png", "wb") as writer:
8 writer.writelines(response)
File c:\Users\meraj\anaconda3\envs\jupyter_env\lib\site-packages\azure\core\tracing\decorator.py:105, in distributed_trace.<locals>.decorator.<locals>.wrapper_use_tracer(*args, **kwargs)
103 span_impl_type = settings.tracing_implementation()
104 if span_impl_type is None:
--> 105 return func(*args, **kwargs)
107 # Merge span is parameter is set, but only if no explicit parent are passed
108 if merge_span and not passed_in_parent:
File c:\Users\meraj\anaconda3\envs\jupyter_env\lib\site-packages\azure\ai\documentintelligence\_operations\_operations.py:1470, in DocumentIntelligenceClientOperationsMixin.get_analyze_result_figure(self, model_id, result_id, figure_id, **kwargs)
1468 except (StreamConsumedError, StreamClosedError):
1469 pass
-> 1470 map_error(status_code=response.status_code, response=response, error_map=error_map)
1471 error = _deserialize(_models.ErrorResponse, response.json())
1472 raise HttpResponseError(response=response, model=error)
File c:\Users\meraj\anaconda3\envs\jupyter_env\lib\site-packages\azure\core\exceptions.py:163, in map_error(status_code, response, error_map)
161 return
162 error = error_type(response=response)
--> 163 raise error
ResourceNotFoundError: (NotFound) Figures analyze result does not exist.
Code: NotFound
Message: Figures analyze result does not exist.