Created
December 7, 2023 23:51
-
-
Save mahiya/fa0e47b70cc2383fa6a5fb8185777682 to your computer and use it in GitHub Desktop.
C# で Azure AI Search にインデックスを作成して、そこにドキュメントを登録するコード
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// dotnet add package Azure.Search.Documents | |
using Azure; | |
using Azure.Search.Documents; | |
using Azure.Search.Documents.Indexes; | |
using Azure.Search.Documents.Indexes.Models; | |
using Azure.Search.Documents.Models; | |
namespace SearchIndexCreator | |
{ | |
class CognitiveSearchClient | |
{ | |
readonly CognitiveSearchSettings _settings; | |
readonly SearchIndexClient indexClient; | |
public CognitiveSearchClient(CognitiveSearchSettings settings) | |
{ | |
_settings = settings; | |
var credential = new AzureKeyCredential(settings.CognitiveSearchKey); | |
var serviceEndpoint = new Uri($"https://{settings.CognitiveSearchName}.search.windows.net/"); | |
var options = new SearchClientOptions(SearchClientOptions.ServiceVersion.V2023_10_01_Preview); | |
indexClient = new SearchIndexClient(serviceEndpoint, credential, options); | |
} | |
/// <summary> | |
/// インデックスを作成する | |
/// </summary> | |
public async Task CreateIndexAsync(bool deleteExistedIndex = true) | |
{ | |
// 既存のインデックスを削除する | |
if(deleteExistedIndex) | |
await indexClient.DeleteIndexAsync(_settings.IndexName); | |
// 検索フィールドを追加する | |
var builder = new FieldBuilder(); | |
var fields = builder.Build(typeof(SearchDocumentModel)); | |
const string VectorSearchProfile = "vectorConfig"; | |
const string VectorSearchHnswConfig = "hnsw"; | |
// セマンティック検索用の設定を追加する | |
var semanticConfig = new PrioritizedFields(); | |
if(!string.IsNullOrWhiteSpace(_settings.SemanticSearchTitleField)) | |
semanticConfig.TitleField = new SemanticField { FieldName = _settings.SemanticSearchTitleField }; | |
_settings.SemanticSearchContentFields.ForEach(f => semanticConfig.ContentFields.Add(new SemanticField { FieldName = f })); | |
_settings.SemanticSearchKeywordFields.ForEach(f => semanticConfig.KeywordFields.Add(new SemanticField { FieldName = f })); | |
// インデックスの設定を行う | |
var index = new SearchIndex(_settings.IndexName, fields) | |
{ | |
SemanticSettings = new() | |
{ | |
Configurations = { new SemanticConfiguration(_settings.SemanticSearchConfigName, semanticConfig) } | |
}, | |
VectorSearch = new() | |
{ | |
Profiles = { new VectorSearchProfile(VectorSearchProfile, VectorSearchHnswConfig) }, | |
Algorithms = { new HnswVectorSearchAlgorithmConfiguration(VectorSearchHnswConfig) } | |
}, | |
}; | |
// CORS 設定を追加する | |
index.CorsOptions = new CorsOptions(new[] { "*" }); | |
// インデックスを作成する | |
await indexClient.CreateOrUpdateIndexAsync(index); | |
} | |
/// <summary> | |
/// インデックスにドキュメントを登録する | |
/// </summary> | |
public async Task UploadDocumentsAsync(List<SearchDocumentModel> docs) | |
{ | |
// インデックスにドキュメントを読み込む | |
foreach (var chunk in docs.Chunk(1000)) | |
{ | |
var batch = IndexDocumentsBatch.Create(chunk.Select(doc => IndexDocumentsAction.Upload(doc)).ToArray()); | |
var searchClient = indexClient.GetSearchClient(_settings.IndexName); | |
await searchClient.IndexDocumentsAsync(batch); | |
} | |
} | |
} | |
class SearchDocumentModel | |
{ | |
[SimpleField(IsKey = true)] | |
public string id { get; set; } = Guid.NewGuid().ToString(); | |
[SimpleField(IsFilterable = true)] | |
public string url { get; set; } | |
[SearchableField(AnalyzerName = "ja.microsoft")] | |
public string title { get; set; } | |
[SearchableField(AnalyzerName = "ja.microsoft")] | |
public string description { get; set; } | |
[SearchableField(AnalyzerName = "ja.microsoft")] | |
public string[] contentTitles { get; set; } | |
[SearchableField(AnalyzerName = "ja.microsoft")] | |
public string content { get; set; } | |
[SearchableField(VectorSearchDimensions = "1536", VectorSearchProfile = "vectorConfig")] | |
public float[] vector { get; set; } | |
} | |
class CognitiveSearchSettings | |
{ | |
public string CognitiveSearchName { get; set; } | |
public string CognitiveSearchKey { get; set; } | |
public string IndexName { get; set; } | |
public string SemanticSearchConfigName { get; set; } | |
public string SemanticSearchTitleField { get; set; } | |
public List<string> SemanticSearchContentFields { get; set; } | |
public List<string> SemanticSearchKeywordFields { get; set; } | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment