So I have an issue where when Im trying to upload documents without vector fields , it works well but when I'm involving vector fields which are vector embeddings, it displays this error:
**azure.search.documents._generated.operations._documents_operations.DocumentsOperations.index() got multiple values for keyword argument 'error_map'
**
This is my current code:
def upload_documents_to_search_client(df, chunk_size=32000):
"""Uploads documents to the search client in chunks (without embeddings)."""
data = [
{
"@search.action": "mergeOrUpload",
"hardware_id": str(row["hardware_id"]) if "hardware_id" in row else "",
"text_feedback": str(row["text_feedback"]) if "text_feedback" in row else "",
"uninstall_text_feedback": str(row["uninstall_text_feedback"]) if "uninstall_text_feedback" in row else "",
"os": str(row["os"]) if "os" in row else "",
"date_ymd": str(row["date_ymd"]) if "date_ymd" in row else "",
"Feature_Category": str(row["Feature_Category"]) if "Feature_Category" in row else "",
"Sentiment": str(row["Sentiment"]) if "Sentiment" in row else "",
"country": str(row["country"]) if "country" in row else "",
"aiid": str(map_aiid_to_label(row["aiid"])) if "aiid" in row else "",
"version_app": str(row["version_app"]) if "version_app" in row else "",
"os_version": str(row["version"]) if "version" in row else "",
"architecture": str(row["architecture"]) if "architecture" in row else "",
"score": str(row["score"]) if "score" in row else "",
"region": str(row["region"]) if "region" in row else "",
"city": str(row["city"]) if "city" in row else "",
}
for _, row in df.iterrows()
]
for chunk in chunk_data(data, chunk_size):
try:
search_client.upload_documents(documents=chunk)
print(f"Uploaded {len(chunk)} documents successfully.")
except Exception as e:
print(f"An error occurred during document upload: {e}")
return None
def upload_documents_with_embeddings(df, embeddings_dict, chunk_size=32000):
"""Uploads only hardware_id documents with vector embeddings."""
data = []
valid_hardware_ids = {item["hardware_id"] for item in embeddings_dict}
for _, row in df.iterrows():
hardware_id = str(row["hardware_id"])
if hardware_id in valid_hardware_ids:
document = {
"@search.action": "mergeOrUpload",
"hardware_id": hardware_id,
"text_feedback": str(row["text_feedback"]) if "text_feedback" in row else "",
"uninstall_text_feedback": str(row["uninstall_text_feedback"]) if "uninstall_text_feedback" in row else "",
"os": str(row["os"]) if "os" in row else "",
"date_ymd": str(row["date_ymd"]) if "date_ymd" in row else "",
"Feature_Category": str(row["Feature_Category"]) if "Feature_Category" in row else "",
"Sentiment": str(row["Sentiment"]) if "Sentiment" in row else "",
"country": str(row["country"]) if "country" in row else "",
"aiid": str(map_aiid_to_label(row["aiid"])) if "aiid" in row else "",
"version_app": str(row["version_app"]) if "version_app" in row else "",
"os_version": str(row["version"]) if "version" in row else "",
"architecture": str(row["architecture"]) if "architecture" in row else "",
"score": str(row["score"]) if "score" in row else "",
"region": str(row["region"]) if "region" in row else "",
"city": str(row["city"]) if "city" in row else "",
"vector_text_feedback": next(
(item["embeddings"].get("vector_text_feedback", []) for item in embeddings_dict if item["hardware_id"] == hardware_id),
[]
),
"vector_uninstall_feedback": next(
(item["embeddings"].get("vector_uninstall_feedback", []) for item in embeddings_dict if item["hardware_id"] == hardware_id),
[]
)
}
data.append(document)
for chunk in chunk_data(data, chunk_size):
try:
search_client.upload_documents(documents=chunk)
print(f"Uploaded {len(chunk)} documents with embeddings successfully.")
except Exception as e:
print(f"An error occurred during embeddings upload: {e}")
return None
try:
output_json_path = os.path.join(downloads_folder, "feedback_embeddings.json")
with open(output_json_path, 'r') as json_file:
embeddings_dict = json.load(json_file)
upload_documents_to_search_client(df)
upload_documents_with_embeddings(df, embeddings_dict)
print("All documents and embeddings have been uploaded successfully.")
except Exception as e:
print(f"An error occurred: {e}")
And I get this as my output:
Uploaded 32000 documents successfully.
Uploaded 32000 documents successfully.
Uploaded 32000 documents successfully.
Uploaded 28345 documents successfully.
An error occurred during embeddings upload: azure.search.documents._generated.operations._documents_operations.DocumentsOperations.index() got multiple values for keyword argument 'error_map'
All documents and embeddings have been uploaded successfully.
Basically the first function works but the 2nd one isnt. Can anyone help with this?
This is also the format of the json file:
[
{
"hardware_id": "example",
"embeddings": {
"vector_text_feedback": [],
"vector_uninstall_feedback": []
}
}
]
{
"hardware_id": "C286E4952A934E3782D02259E4620AD899F33263848199AB062BD00A2DD2F9AE",
"embeddings": {
"vector_text_feedback": [],
"vector_uninstall_feedback": []
}
}
]