Using Tags in Semantic Search
Hi, we're trying to use Azure Open AI search on our own sharepoint index to search for our organizational data. We have used the default mapping for the fields and have created one new field called "tags" to improve the searchability of certain documents. However, after implementing the tags field, and calling the azure open AI chat completions api, the results are the same as they would be without this new field. However, if we search directly on the index the search results are different and do in fact seem to make use of the tags field. Are there any other properties that we need to set when calling the Azure Open AI search API? Below is the index definition:
{
"@odata.context": "https://url/$metadata#indexes/$entity",
"@odata.etag": "\"0x8DD052B245C13A2\"",
"name": "sharepoint-index-cis",
"defaultScoringProfile": null,
"fields": [
{
"name": "id",
"type": "Edm.String",
"searchable": false,
"filterable": true,
"retrievable": true,
"stored": true,
"sortable": true,
"facetable": true,
"key": true,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_spo_item_name",
"type": "Edm.String",
"searchable": true,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_spo_item_path",
"type": "Edm.String",
"searchable": false,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_spo_item_weburi",
"type": "Edm.String",
"searchable": false,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_spo_item_content_type",
"type": "Edm.String",
"searchable": false,
"filterable": true,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": true,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_spo_item_last_modified",
"type": "Edm.DateTimeOffset",
"searchable": false,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": true,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "metadata_spo_item_size",
"type": "Edm.Int64",
"searchable": false,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "content",
"type": "Edm.String",
"searchable": true,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "tags",
"type": "Collection(Edm.String)",
"searchable": true,
"filterable": true,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": "standard.lucene",
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
}
],
"scoringProfiles": [],
"corsOptions": null,
"suggesters": [],
"analyzers": [],
"normalizers": [],
"tokenizers": [],
"tokenFilters": [],
"charFilters": [],
"encryptionKey": null,
"similarity": {
"@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
"k1": null,
"b": null
},
"semantic": {
"defaultConfiguration": null,
"configurations": [
{
"name": "sp-semantic",
"prioritizedFields": {
"titleField": {
"fieldName": "metadata_spo_item_name"
},
"prioritizedContentFields": [
{
"fieldName": "content"
}
],
"prioritizedKeywordsFields": [
{
"fieldName": "tags"
}
]
}
}
]
},
"vectorSearch": null
}
When we call the https://url/openai/deployments/oai-gpt4o/chat/completions?api-version=2024-06-01 end point, and include the all_retrieved_documents, it uses the search query "authorisation limits for bad debt write-off" and returns Document A.
However, if we search directly on the index using the https://url/indexes/sharepoint-index-cis/docs/search?api-version=2024-05-01-preview end point, for the same search query "authorisation limits for bad debt write-off", it returns Document B (which is correct because it has the authorisation limits tag)
Why does the Azure Open AI chat completions endpoint also not return Document B?