From 56516551c004c3604f79b391ed8dc32370debc81 Mon Sep 17 00:00:00 2001 From: Sebastian Stehle Date: Sat, 6 Nov 2021 16:34:45 +0100 Subject: [PATCH] Feature/azure search (#787) * Azure cognitive search. * Azure search finalized. * More fixes. * Remove api key. * Document configuration options. * Reverts the schema-id field. --- .../Text/Azure/AzureIndexDefinition.cs | 8 +- .../Text/Azure/AzureTextIndex.cs | 100 ++-- .../ElasticSearch/ElasticSearchTextIndex.cs | 24 +- .../FullText/MongoTextIndex.cs | 38 +- backend/src/Squidex/appsettings.json | 16 +- .../Contents/Text/IIndexerFactory.cs | 19 - .../Contents/Text/TextIndexerTestsBase.cs | 428 ++++++++---------- .../Contents/Text/TextIndexerTests_Azure.cs | 52 +++ .../Contents/Text/TextIndexerTests_Elastic.cs | 48 +- .../Contents/Text/TextIndexerTests_Mongo.cs | 57 +-- 10 files changed, 395 insertions(+), 395 deletions(-) delete mode 100644 backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/IIndexerFactory.cs create mode 100644 backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Azure.cs diff --git a/backend/extensions/Squidex.Extensions/Text/Azure/AzureIndexDefinition.cs b/backend/extensions/Squidex.Extensions/Text/Azure/AzureIndexDefinition.cs index e8348badf..4ea7ddc2d 100644 --- a/backend/extensions/Squidex.Extensions/Text/Azure/AzureIndexDefinition.cs +++ b/backend/extensions/Squidex.Extensions/Text/Azure/AzureIndexDefinition.cs @@ -17,8 +17,8 @@ namespace Squidex.Extensions.Text.Azure { private static readonly Dictionary AllowedLanguages = new Dictionary(StringComparer.OrdinalIgnoreCase) { - ["iv"] = ("text_iv", LexicalAnalyzerName.StandardLucene.ToString()), - ["zh"] = ("text_zh", LexicalAnalyzerName.ZhHansLucene.ToString()) + ["iv"] = ("iv", LexicalAnalyzerName.StandardLucene.ToString()), + ["zh"] = ("zh", LexicalAnalyzerName.ZhHansLucene.ToString()) }; static AzureIndexDefinition() @@ -47,7 +47,7 @@ namespace Squidex.Extensions.Text.Azure if (isValidLanguage && addedLanguage.Add(language)) { - var fieldName = $"text_{language.Replace('-', '_')}"; + var fieldName = language.Replace('-', '_'); AllowedLanguages[language] = (fieldName, analyzer); } @@ -90,7 +90,7 @@ namespace Squidex.Extensions.Text.Azure { IsFilterable = false }, - new SearchableField("schemaId") + new SimpleField("schemaId", SearchFieldDataType.String) { IsFilterable = true }, diff --git a/backend/extensions/Squidex.Extensions/Text/Azure/AzureTextIndex.cs b/backend/extensions/Squidex.Extensions/Text/Azure/AzureTextIndex.cs index 5f3df056e..bc745ecff 100644 --- a/backend/extensions/Squidex.Extensions/Text/Azure/AzureTextIndex.cs +++ b/backend/extensions/Squidex.Extensions/Text/Azure/AzureTextIndex.cs @@ -8,7 +8,6 @@ using System; using System.Collections.Generic; using System.Linq; -using System.Text; using System.Threading; using System.Threading.Tasks; using Azure; @@ -27,15 +26,19 @@ namespace Squidex.Extensions.Text.Azure { private readonly SearchIndexClient indexClient; private readonly SearchClient searchClient; + private readonly int waitAfterUpdate; public AzureTextIndex( string serviceEndpoint, string serviceApiKey, - string indexName) + string indexName, + int waitAfterUpdate = 0) { indexClient = new SearchIndexClient(new Uri(serviceEndpoint), new AzureKeyCredential(serviceApiKey)); searchClient = indexClient.GetSearchClient(indexName); + + this.waitAfterUpdate = waitAfterUpdate; } public async Task InitializeAsync( @@ -67,7 +70,17 @@ namespace Squidex.Extensions.Text.Azure commands.Foreach(x => CommandFactory.CreateCommands(x, batch.Actions)); + if (batch.Actions.Count == 0) + { + return; + } + await searchClient.IndexDocumentsAsync(batch, cancellationToken: ct); + + if (waitAfterUpdate > 0) + { + await Task.Delay(waitAfterUpdate, ct); + } } public Task> SearchAsync(IAppEntity app, GeoQuery query, SearchScope scope, @@ -90,51 +103,76 @@ namespace Squidex.Extensions.Text.Azure return null; } - var searchOptions = new SearchOptions - { - Filter = BuildFilter(app, query, scope) - }; + List<(DomainId, double)> documents; - searchOptions.Select.Add("contentId"); - searchOptions.Size = 2000; + if (query.RequiredSchemaIds?.Count > 0) + { + documents = await SearchBySchemaAsync(query.Text, query.RequiredSchemaIds, scope, query.Take, 1, ct); + } + else if (query.PreferredSchemaId == null) + { + documents = await SearchByAppAsync(query.Text, app, scope, query.Take, 1, ct); + } + else + { + var halfBucket = query.Take / 2; - var results = await searchClient.SearchAsync("*", searchOptions, ct); + var schemaIds = Enumerable.Repeat(query.PreferredSchemaId.Value, 1); - var ids = new List(); + documents = await SearchBySchemaAsync( + query.Text, + schemaIds, + scope, + halfBucket, 1, + ct); - await foreach (var item in results.Value.GetResultsAsync().WithCancellation(ct)) - { - if (item != null) - { - ids.Add(DomainId.Create(item.Document["contentId"].ToString())); - } + documents.AddRange(await SearchByAppAsync(query.Text, app, scope, halfBucket, 1, ct)); } - return ids; + return documents.OrderByDescending(x => x.Item2).Select(x => x.Item1).Distinct().ToList(); } - private static string BuildFilter(IAppEntity app, TextQuery query, SearchScope scope) + private Task> SearchBySchemaAsync(string search, IEnumerable schemaIds, SearchScope scope, int limit, double factor, + CancellationToken ct = default) { - var sb = new StringBuilder(); + var filter = $"{string.Join(" or ", schemaIds.Select(x => $"schemaId eq '{x}'"))} and {GetServeField(scope)} eq true"; - sb.Append($"appId eq '{app.Id}' and {GetServeField(scope)} eq true"); + return SearchAsync(search, filter, limit, factor, ct); + } - if (query.RequiredSchemaIds?.Count > 0) - { - var schemaIds = string.Join(" or ", query.RequiredSchemaIds.Select(x => $"schemaId eq '{x}'")); + private Task> SearchByAppAsync(string search, IAppEntity app, SearchScope scope, int limit, double factor, + CancellationToken ct = default) + { + var filter = $"appId eq '{app.Id}' and {GetServeField(scope)} eq true"; - sb.Append($" and ({schemaIds}) and search.ismatchscoring('{query.Text}')"); - } - else if (query.PreferredSchemaId.HasValue) + return SearchAsync(search, filter, limit, factor, ct); + } + + private async Task> SearchAsync(string search, string filter, int size, double factor, + CancellationToken ct = default) + { + var searchOptions = new SearchOptions { - sb.Append($" and ((search.ismatchscoring('{query.Text}') and search.ismatchscoring('{query.PreferredSchemaId}', 'schemaId')) or search.ismatchscoring('{query.Text}'))"); - } - else + Filter = filter + }; + + searchOptions.Select.Add("contentId"); + searchOptions.Size = size; + searchOptions.QueryType = SearchQueryType.Full; + + var results = await searchClient.SearchAsync(search, searchOptions, ct); + + var ids = new List<(DomainId, double)>(); + + await foreach (var item in results.Value.GetResultsAsync().WithCancellation(ct)) { - sb.Append($" and search.ismatchscoring('{query.Text}')"); + if (item != null) + { + ids.Add((DomainId.Create(item.Document["contentId"].ToString()), factor * item.Score ?? 0)); + } } - return sb.ToString(); + return ids; } private static string GetServeField(SearchScope scope) diff --git a/backend/extensions/Squidex.Extensions/Text/ElasticSearch/ElasticSearchTextIndex.cs b/backend/extensions/Squidex.Extensions/Text/ElasticSearch/ElasticSearchTextIndex.cs index afd819a27..d03d02df3 100644 --- a/backend/extensions/Squidex.Extensions/Text/ElasticSearch/ElasticSearchTextIndex.cs +++ b/backend/extensions/Squidex.Extensions/Text/ElasticSearch/ElasticSearchTextIndex.cs @@ -23,9 +23,9 @@ namespace Squidex.Extensions.Text.ElasticSearch { private readonly ElasticLowLevelClient client; private readonly string indexName; - private readonly bool waitForTesting; + private readonly int waitAfterUpdate; - public ElasticSearchTextIndex(string configurationString, string indexName, bool waitForTesting = false) + public ElasticSearchTextIndex(string configurationString, string indexName, int waitAfterUpdate = 0) { var config = new ConnectionConfiguration(new Uri(configurationString)); @@ -33,7 +33,7 @@ namespace Squidex.Extensions.Text.ElasticSearch this.indexName = indexName; - this.waitForTesting = waitForTesting; + this.waitAfterUpdate = waitAfterUpdate; } public Task InitializeAsync( @@ -58,19 +58,21 @@ namespace Squidex.Extensions.Text.ElasticSearch CommandFactory.CreateCommands(command, args, indexName); } - if (args.Count > 0) + if (args.Count == 0) { - var result = await client.BulkAsync(PostData.MultiJson(args), ctx: ct); + return; + } - if (!result.Success) - { - throw new InvalidOperationException($"Failed with ${result.Body}", result.OriginalException); - } + var result = await client.BulkAsync(PostData.MultiJson(args), ctx: ct); + + if (!result.Success) + { + throw new InvalidOperationException($"Failed with ${result.Body}", result.OriginalException); } - if (waitForTesting) + if (waitAfterUpdate > 0) { - await Task.Delay(1000, ct); + await Task.Delay(waitAfterUpdate, ct); } } diff --git a/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoTextIndex.cs b/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoTextIndex.cs index 439805af1..28cbcea2c 100644 --- a/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoTextIndex.cs +++ b/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoTextIndex.cs @@ -130,15 +130,15 @@ namespace Squidex.Domain.Apps.Entities.MongoDb.FullText return null; } - List documents; + List<(DomainId, double)> documents; if (query.RequiredSchemaIds?.Count > 0) { - documents = await SearchBySchemaAsync(query.Text, app, query.RequiredSchemaIds, scope, query.Take, ct); + documents = await SearchBySchemaAsync(query.Text, app, query.RequiredSchemaIds, scope, query.Take, 1, ct); } else if (query.PreferredSchemaId == null) { - documents = await SearchByAppAsync(query.Text, app, scope, query.Take, ct); + documents = await SearchByAppAsync(query.Text, app, scope, query.Take, 1, ct); } else { @@ -146,15 +146,21 @@ namespace Squidex.Domain.Apps.Entities.MongoDb.FullText var schemaIds = Enumerable.Repeat(query.PreferredSchemaId.Value, 1); - documents = new List(); - documents.AddRange(await SearchBySchemaAsync(query.Text, app, schemaIds, scope, halfBucket, ct)); - documents.AddRange(await SearchByAppAsync(query.Text, app, scope, halfBucket, ct)); + documents = await SearchBySchemaAsync( + query.Text, + app, + schemaIds, + scope, + halfBucket, 1, + ct); + + documents.AddRange(await SearchByAppAsync(query.Text, app, scope, halfBucket, 1, ct)); } - return documents.OrderByDescending(x => x.Score).Select(x => x.ContentId).Distinct().ToList(); + return documents.OrderByDescending(x => x.Item2).Select(x => x.Item1).Distinct().ToList(); } - private Task> SearchBySchemaAsync(string queryText, IAppEntity app, IEnumerable schemaIds, SearchScope scope, int limit, + private Task> SearchBySchemaAsync(string text, IAppEntity app, IEnumerable schemaIds, SearchScope scope, int limit, double factor, CancellationToken ct = default) { var filter = @@ -162,12 +168,12 @@ namespace Squidex.Domain.Apps.Entities.MongoDb.FullText Filter.Eq(x => x.AppId, app.Id), Filter.In(x => x.SchemaId, schemaIds), Filter_ByScope(scope), - Filter.Text(queryText, "none")); + Filter.Text(text, "none")); - return SearchAsync(filter, scope, limit, ct); + return SearchAsync(filter, scope, limit, factor, ct); } - private Task> SearchByAppAsync(string queryText, IAppEntity app, SearchScope scope, int limit, + private Task> SearchByAppAsync(string text, IAppEntity app, SearchScope scope, int limit, double factor, CancellationToken ct = default) { var filter = @@ -175,12 +181,12 @@ namespace Squidex.Domain.Apps.Entities.MongoDb.FullText Filter.Eq(x => x.AppId, app.Id), Filter.Exists(x => x.SchemaId), Filter_ByScope(scope), - Filter.Text(queryText, "none")); + Filter.Text(text, "none")); - return SearchAsync(filter, scope, limit, ct); + return SearchAsync(filter, scope, limit, factor, ct); } - private Task> SearchAsync(FilterDefinition filter, SearchScope scope, int limit, + private async Task> SearchAsync(FilterDefinition filter, SearchScope scope, int limit, double factor, CancellationToken ct = default) { var collection = GetCollection(scope); @@ -189,7 +195,9 @@ namespace Squidex.Domain.Apps.Entities.MongoDb.FullText collection.Find(filter).Limit(limit) .Project(searchTextProjection).Sort(Sort.MetaTextScore("score")); - return find.ToListAsync(ct); + var documents = await find.ToListAsync(ct); + + return documents.Select(x => (x.ContentId, x.Score * factor)).ToList(); } private static FilterDefinition Filter_ByScope(SearchScope scope) diff --git a/backend/src/Squidex/appsettings.json b/backend/src/Squidex/appsettings.json index 8c2fa8835..2c8dab3b0 100644 --- a/backend/src/Squidex/appsettings.json +++ b/backend/src/Squidex/appsettings.json @@ -30,7 +30,7 @@ "fullText": { // Define the type of the full text store. // - // SUPPORTED: elastic (ElasticSearch), default. Default: default + // SUPPORTED: elastic (ElasticSearch), azure (Azure Cognitive Search), default. Default: default "type": "default", "elastic": { @@ -38,6 +38,20 @@ // // Read More: https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/client-configuration.html "configuration": "http://localhost:9200", + + // The name of the index. + "indexName": "squidex" + }, + + "azure": { + // The URL to your azure search instance. + // + // Read More: https://docs.microsoft.com/en-us/azure/search/search-create-service-portal#get-a-key-and-url-endpoint + "serviceEndpoint": "https://.search.windows.net", + + // The api key. See link above. + "apiKey": "", + // The name of the index. "indexName": "squidex" } diff --git a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/IIndexerFactory.cs b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/IIndexerFactory.cs deleted file mode 100644 index 569467d58..000000000 --- a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/IIndexerFactory.cs +++ /dev/null @@ -1,19 +0,0 @@ -// ========================================================================== -// Squidex Headless CMS -// ========================================================================== -// Copyright (c) Squidex UG (haftungsbeschraenkt) -// All rights reserved. Licensed under the MIT license. -// ========================================================================== - -using System.Threading.Tasks; -using Squidex.Infrastructure; - -namespace Squidex.Domain.Apps.Entities.Contents.Text -{ - public interface IIndexerFactory - { - Task CreateAsync(DomainId schemaId); - - Task CleanupAsync(); - } -} diff --git a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTestsBase.cs b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTestsBase.cs index 8ebf01a28..e6290d1f7 100644 --- a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTestsBase.cs +++ b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTestsBase.cs @@ -21,7 +21,6 @@ using Squidex.Infrastructure.Json.Objects; using Xunit; #pragma warning disable SA1401 // Fields should be private -#pragma warning disable SA1115 // Parameter should follow comma namespace Squidex.Domain.Apps.Entities.Contents.Text { @@ -33,18 +32,11 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text private readonly NamedId appId = NamedId.Of(DomainId.NewGuid(), "my-app"); private readonly NamedId schemaId = NamedId.Of(DomainId.NewGuid(), "my-schema"); private readonly IAppEntity app; + private readonly TextIndexingProcess sut; - protected delegate Task IndexOperation(TextIndexingProcess process); + public virtual bool SupportsQuerySyntax => true; - public abstract IIndexerFactory Factory { get; } - - public virtual bool SupportsCleanup { get; set; } = false; - - public virtual bool SupportsQuerySyntax { get; set; } = true; - - public virtual bool SupportsGeo { get; set; } = false; - - public virtual InMemoryTextIndexerState State { get; } = new InMemoryTextIndexerState(); + public virtual bool SupportsGeo => false; protected TextIndexerTestsBase() { @@ -52,20 +44,32 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text Mocks.App(appId, Language.DE, Language.EN); + +#pragma warning disable MA0056 // Do not call overridable members in constructor + sut = new TextIndexingProcess(TestUtils.DefaultSerializer, CreateIndex(), new InMemoryTextIndexerState()); +#pragma warning restore MA0056 // Do not call overridable members in constructor } + public abstract ITextIndex CreateIndex(); + [SkippableFact] public async Task Should_index_invariant_content_and_retrieve_with_fuzzy() { Skip.IfNot(SupportsQuerySyntax); - await TestCombinations( - CreateText(ids1[0], "iv", "Hello"), - CreateText(ids2[0], "iv", "World"), + await CreateTextAsync(ids1[0], "iv", "Hello"); + + await SearchText(expected: ids1, text: "helo~"); + } + + [SkippableFact] + public async Task Should_index_invariant_content_and_retrieve_with_fuzzy_with_full_scope() + { + Skip.IfNot(SupportsQuerySyntax); + + await CreateTextAsync(ids2[0], "iv", "World"); - SearchText(expected: ids1, text: "helo~"), - SearchText(expected: ids2, text: "wold~", SearchScope.All) - ); + await SearchText(expected: ids2, text: "wold~", SearchScope.All); } [SkippableFact] @@ -73,13 +77,9 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { Skip.IfNot(SupportsQuerySyntax); - await TestCombinations( - CreateText(ids1[0], "en", "City"), - CreateText(ids2[0], "de", "Stadt"), + await CreateTextAsync(ids1[0], "en", "City"); - SearchText(expected: ids1, text: "en:city"), - SearchText(expected: ids2, text: "de:Stadt") - ); + await SearchText(expected: ids1, text: "en:city"); } [SkippableFact] @@ -87,394 +87,322 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { Skip.IfNot(SupportsGeo); - await TestCombinations( - CreateGeo(ids1[0], "geo", 51.343391192211506, 12.401476788622826), // Within radius - CreateGeo(ids2[0], "geo", 51.30765141427311, 12.379631713912486), // Not in radius + // Within radius + await CreateGeoAsync(ids1[0], "geo", 51.343391192211506, 12.401476788622826); + + // Not in radius + await CreateGeoAsync(ids2[0], "geo", 51.30765141427311, 12.379631713912486); - SearchGeo(expected: ids1, "geo.iv", 51.34641682574934, 12.401965298137707), - SearchGeo(expected: null, "abc.iv", 51.48596429889613, 12.102629469505713) // Wrong field - ); + await SearchGeo(expected: ids1, "geo.iv", 51.34641682574934, 12.401965298137707); + + // Wrong field + await SearchGeo(expected: null, "abc.iv", 51.48596429889613, 12.102629469505713); } [Fact] public async Task Should_index_invariant_content_and_retrieve() { - await TestCombinations( - CreateText(ids1[0], "iv", "Hello"), - CreateText(ids2[0], "iv", "World"), + await CreateTextAsync(ids1[0], "iv", "Hello"); + await CreateTextAsync(ids2[0], "iv", "World"); - SearchText(expected: ids1, text: "Hello"), - SearchText(expected: ids2, text: "World"), + await SearchText(expected: ids1, text: "Hello"); + await SearchText(expected: ids2, text: "World"); - SearchText(expected: null, text: "Hello", SearchScope.Published), - SearchText(expected: null, text: "World", SearchScope.Published) - ); + await SearchText(expected: null, text: "Hello", SearchScope.Published); + await SearchText(expected: null, text: "World", SearchScope.Published); } [Fact] public async Task Should_update_draft_only() { - await TestCombinations( - CreateText(ids1[0], "iv", "V1"), + await CreateTextAsync(ids1[0], "iv", "V1"); - UpdateText(ids1[0], "iv", "V2"), + await UpdateTextAsync(ids1[0], "iv", "V2"); - SearchText(expected: null, text: "V1", target: SearchScope.All), - SearchText(expected: null, text: "V1", target: SearchScope.Published), + await SearchText(expected: null, text: "V1", target: SearchScope.All); + await SearchText(expected: null, text: "V1", target: SearchScope.Published); - SearchText(expected: ids1, text: "V2", target: SearchScope.All), - SearchText(expected: null, text: "V2", target: SearchScope.Published) - ); + await SearchText(expected: ids1, text: "V2", target: SearchScope.All); + await SearchText(expected: null, text: "V2", target: SearchScope.Published); } [Fact] public async Task Should_update_draft_only_multiple_times() { - await TestCombinations( - CreateText(ids1[0], "iv", "V1"), + await CreateTextAsync(ids1[0], "iv", "V1"); - UpdateText(ids1[0], "iv", "V2"), - UpdateText(ids1[0], "iv", "V3"), + await UpdateTextAsync(ids1[0], "iv", "V2"); + await UpdateTextAsync(ids1[0], "iv", "V3"); - SearchText(expected: null, text: "V2", target: SearchScope.All), - SearchText(expected: null, text: "V2", target: SearchScope.Published), + await SearchText(expected: null, text: "V2", target: SearchScope.All); + await SearchText(expected: null, text: "V2", target: SearchScope.Published); - SearchText(expected: ids1, text: "V3", target: SearchScope.All), - SearchText(expected: null, text: "V3", target: SearchScope.Published) - ); + await SearchText(expected: ids1, text: "V3", target: SearchScope.All); + await SearchText(expected: null, text: "V3", target: SearchScope.Published); } [Fact] public async Task Should_also_serve_published_after_publish() { - await TestCombinations( - CreateText(ids1[0], "iv", "V1"), + await CreateTextAsync(ids1[0], "iv", "V1"); - Publish(ids1[0]), + await PublishAsync(ids1[0]); - SearchText(expected: ids1, text: "V1", target: SearchScope.All), - SearchText(expected: ids1, text: "V1", target: SearchScope.Published) - ); + await SearchText(expected: ids1, text: "V1", target: SearchScope.All); + await SearchText(expected: ids1, text: "V1", target: SearchScope.Published); } [Fact] public async Task Should_also_update_published_content() { - await TestCombinations( - CreateText(ids1[0], "iv", "V1"), + await CreateTextAsync(ids1[0], "iv", "V1"); - Publish(ids1[0]), + await PublishAsync(ids1[0]); - UpdateText(ids1[0], "iv", "V2"), + await UpdateTextAsync(ids1[0], "iv", "V2"); - SearchText(expected: null, text: "V1", target: SearchScope.All), - SearchText(expected: null, text: "V1", target: SearchScope.Published), + await SearchText(expected: null, text: "V1", target: SearchScope.All); + await SearchText(expected: null, text: "V1", target: SearchScope.Published); - SearchText(expected: ids1, text: "V2", target: SearchScope.All), - SearchText(expected: ids1, text: "V2", target: SearchScope.Published) - ); + await SearchText(expected: ids1, text: "V2", target: SearchScope.All); + await SearchText(expected: ids1, text: "V2", target: SearchScope.Published); } [Fact] public async Task Should_also_update_published_content_multiple_times() { - await TestCombinations( - CreateText(ids1[0], "iv", "V1"), + await CreateTextAsync(ids1[0], "iv", "V1"); - Publish(ids1[0]), + await PublishAsync(ids1[0]); - UpdateText(ids1[0], "iv", "V2"), - UpdateText(ids1[0], "iv", "V3"), + await UpdateTextAsync(ids1[0], "iv", "V2"); + await UpdateTextAsync(ids1[0], "iv", "V3"); - SearchText(expected: null, text: "V2", target: SearchScope.All), - SearchText(expected: null, text: "V2", target: SearchScope.Published), + await SearchText(expected: null, text: "V2", target: SearchScope.All); + await SearchText(expected: null, text: "V2", target: SearchScope.Published); - SearchText(expected: ids1, text: "V3", target: SearchScope.All), - SearchText(expected: ids1, text: "V3", target: SearchScope.Published) - ); - } + await SearchText(expected: ids1, text: "V3", target: SearchScope.All); + await SearchText(expected: ids1, text: "V3", target: SearchScope.Published); + } [Fact] public async Task Should_simulate_new_version() { - await TestCombinations(0, - CreateText(ids1[0], "iv", "V1"), + await CreateTextAsync(ids1[0], "iv", "V1"); - // Publish the content. - Publish(ids1[0]), + // Publish the content. + await PublishAsync(ids1[0]); - SearchText(expected: ids1, text: "V1", target: SearchScope.All), - SearchText(expected: ids1, text: "V1", target: SearchScope.Published), + await SearchText(expected: ids1, text: "V1", target: SearchScope.All); + await SearchText(expected: ids1, text: "V1", target: SearchScope.Published); - // Create a new version, the value is still the same as old version. - CreateDraft(ids1[0]), + // Create a new version, the value is still the same as old version. + await CreateDraftAsync(ids1[0]); - SearchText(expected: ids1, text: "V1", target: SearchScope.All), - SearchText(expected: ids1, text: "V1", target: SearchScope.Published), + await SearchText(expected: ids1, text: "V1", target: SearchScope.All); + await SearchText(expected: ids1, text: "V1", target: SearchScope.Published); - // Make an update, this updates the new version only. - UpdateText(ids1[0], "iv", "V2"), + // Make an update, this updates the new version only. + await UpdateTextAsync(ids1[0], "iv", "V2"); - SearchText(expected: null, text: "V1", target: SearchScope.All), - SearchText(expected: ids1, text: "V1", target: SearchScope.Published), + await SearchText(expected: null, text: "V1", target: SearchScope.All); + await SearchText(expected: ids1, text: "V1", target: SearchScope.Published); - SearchText(expected: ids1, text: "V2", target: SearchScope.All), - SearchText(expected: null, text: "V2", target: SearchScope.Published), + await SearchText(expected: ids1, text: "V2", target: SearchScope.All); + await SearchText(expected: null, text: "V2", target: SearchScope.Published); - // Publish the new version to get rid of the "V1" version. - Publish(ids1[0]), + // Publish the new version to get rid of the "V1" version. + await PublishAsync(ids1[0]); - SearchText(expected: null, text: "V1", target: SearchScope.All), - SearchText(expected: null, text: "V1", target: SearchScope.Published), + await SearchText(expected: null, text: "V1", target: SearchScope.All); + await SearchText(expected: null, text: "V1", target: SearchScope.Published); - SearchText(expected: ids1, text: "V2", target: SearchScope.All), - SearchText(expected: ids1, text: "V2", target: SearchScope.Published), + await SearchText(expected: ids1, text: "V2", target: SearchScope.All); + await SearchText(expected: ids1, text: "V2", target: SearchScope.Published); - // Unpublish the version - Unpublish(ids1[0]), + // Unpublish the version + await UnpublishAsync(ids1[0]); - SearchText(expected: ids1, text: "V2", target: SearchScope.All), - SearchText(expected: null, text: "V2", target: SearchScope.Published) - ); + await SearchText(expected: ids1, text: "V2", target: SearchScope.All); + await SearchText(expected: null, text: "V2", target: SearchScope.Published); } [Fact] public async Task Should_simulate_new_version_with_migration() { - await TestCombinations(0, - CreateText(ids1[0], "iv", "V1"), + await CreateTextAsync(ids1[0], "iv", "V1"); - // Publish the content. - Publish(ids1[0]), + // Publish the content. + await PublishAsync(ids1[0]); - SearchText(expected: ids1, text: "V1", target: SearchScope.All), - SearchText(expected: ids1, text: "V1", target: SearchScope.Published), + await SearchText(expected: ids1, text: "V1", target: SearchScope.All); + await SearchText(expected: ids1, text: "V1", target: SearchScope.Published); - // Create a new version, his updates the new version also. - CreateDraftWithText(ids1[0], "iv", "V2"), + // Create a new version, his updates the new version also. + await CreateDraftWithTextAsync(ids1[0], "iv", "V2"); - SearchText(expected: null, text: "V1", target: SearchScope.All), - SearchText(expected: ids1, text: "V1", target: SearchScope.Published), + await SearchText(expected: null, text: "V1", target: SearchScope.All); + await SearchText(expected: ids1, text: "V1", target: SearchScope.Published); - SearchText(expected: ids1, text: "V2", target: SearchScope.All), - SearchText(expected: null, text: "V2", target: SearchScope.Published) - ); + await SearchText(expected: ids1, text: "V2", target: SearchScope.All); + await SearchText(expected: null, text: "V2", target: SearchScope.Published); } [Fact] public async Task Should_simulate_content_reversion() { - await TestCombinations( - CreateText(ids1[0], "iv", "V1"), + await CreateTextAsync(ids1[0], "iv", "V1"); - // Publish the content. - Publish(ids1[0]), + // Publish the content. + await PublishAsync(ids1[0]); - // Create a new version, the value is still the same as old version. - CreateDraft(ids1[0]), + // Create a new version, the value is still the same as old version. + await CreateDraftAsync(ids1[0]); - // Make an update, this updates the new version only. - UpdateText(ids1[0], "iv", "V2"), + // Make an update, this updates the new version only. + await UpdateTextAsync(ids1[0], "iv", "V2"); - // Make an update, this updates the new version only. - DeleteDraft(ids1[0]), + // Make an update, this updates the new version only. + await DeleteDraftAsync(ids1[0]); - SearchText(expected: ids1, text: "V1", target: SearchScope.All), - SearchText(expected: ids1, text: "V1", target: SearchScope.Published), + await SearchText(expected: ids1, text: "V1", target: SearchScope.All); + await SearchText(expected: ids1, text: "V1", target: SearchScope.Published); - SearchText(expected: null, text: "V2", target: SearchScope.All), - SearchText(expected: null, text: "V2", target: SearchScope.Published), + await SearchText(expected: null, text: "V2", target: SearchScope.All); + await SearchText(expected: null, text: "V2", target: SearchScope.Published); - // Make an update, this updates the current version only. - UpdateText(ids1[0], "iv", "V3"), + // Make an update, this updates the current version only. + await UpdateTextAsync(ids1[0], "iv", "V3"); - SearchText(expected: ids1, text: "V3", target: SearchScope.All), - SearchText(expected: ids1, text: "V3", target: SearchScope.Published) - ); + await SearchText(expected: ids1, text: "V3", target: SearchScope.All); + await SearchText(expected: ids1, text: "V3", target: SearchScope.Published); } [Fact] public async Task Should_delete_documents_from_index() { - await TestCombinations( - CreateText(ids1[0], "iv", "V1_1"), - CreateText(ids2[0], "iv", "V2_1"), + await CreateTextAsync(ids1[0], "iv", "V1_1"); + await CreateTextAsync(ids2[0], "iv", "V2_1"); - SearchText(expected: ids1, text: "V1_1"), - SearchText(expected: ids2, text: "V2_1"), + await SearchText(expected: ids1, text: "V1_1"); + await SearchText(expected: ids2, text: "V2_1"); - Delete(ids1[0]), + await DeleteAsync(ids1[0]); - SearchText(expected: null, text: "V1_1"), - SearchText(expected: ids2, text: "V2_1") - ); + await SearchText(expected: null, text: "V1_1"); + await SearchText(expected: ids2, text: "V2_1"); } - protected IndexOperation CreateText(DomainId id, string language, string text) + protected Task CreateTextAsync(DomainId id, string language, string text) { var data = TextData(language, text); - return Op(id, new ContentCreated { Data = data }); + return UpdateAsync(id, new ContentCreated { Data = data }); } - protected IndexOperation CreateGeo(DomainId id, string field, double latitude, double longitude) + protected Task CreateGeoAsync(DomainId id, string field, double latitude, double longitude) { var data = GeoData(field, latitude, longitude); - return Op(id, new ContentCreated { Data = data }); + return UpdateAsync(id, new ContentCreated { Data = data }); } - protected IndexOperation UpdateText(DomainId id, string language, string text) + protected Task UpdateTextAsync(DomainId id, string language, string text) { var data = TextData(language, text); - return Op(id, new ContentUpdated { Data = data }); + return UpdateAsync(id, new ContentUpdated { Data = data }); } - protected IndexOperation CreateDraftWithText(DomainId id, string language, string text) + protected Task CreateDraftWithTextAsync(DomainId id, string language, string text) { var data = TextData(language, text); - return Op(id, new ContentDraftCreated { MigratedData = data }); + return UpdateAsync(id, new ContentDraftCreated { MigratedData = data }); } - private static ContentData TextData(string language, string text) + protected Task CreateDraftAsync(DomainId id) { - return new ContentData() - .AddField("text", - new ContentFieldData() - .AddLocalized(language, text)); + return UpdateAsync(id, new ContentDraftCreated()); } - private static ContentData GeoData(string field, double latitude, double longitude) + protected Task PublishAsync(DomainId id) { - return new ContentData() - .AddField(field, - new ContentFieldData() - .AddInvariant(JsonValue.Object().Add("latitude", latitude).Add("longitude", longitude))); + return UpdateAsync(id, new ContentStatusChanged { Status = Status.Published }); } - protected IndexOperation CreateDraft(DomainId id) + protected Task UnpublishAsync(DomainId id) { - return Op(id, new ContentDraftCreated()); + return UpdateAsync(id, new ContentStatusChanged { Status = Status.Draft }); } - protected IndexOperation Publish(DomainId id) + protected Task DeleteDraftAsync(DomainId id) { - return Op(id, new ContentStatusChanged { Status = Status.Published }); + return UpdateAsync(id, new ContentDraftDeleted()); } - protected IndexOperation Unpublish(DomainId id) + protected Task DeleteAsync(DomainId id) { - return Op(id, new ContentStatusChanged { Status = Status.Draft }); + return UpdateAsync(id, new ContentDeleted()); } - protected IndexOperation DeleteDraft(DomainId id) - { - return Op(id, new ContentDraftDeleted()); - } - - protected IndexOperation Delete(DomainId id) - { - return Op(id, new ContentDeleted()); - } - - private IndexOperation Op(DomainId id, ContentEvent contentEvent) + private Task UpdateAsync(DomainId id, ContentEvent contentEvent) { contentEvent.ContentId = id; contentEvent.AppId = appId; contentEvent.SchemaId = schemaId; - return p => p.On(Enumerable.Repeat(Envelope.Create(contentEvent), 1)); + return sut.On(Enumerable.Repeat(Envelope.Create(contentEvent), 1)); } - protected IndexOperation SearchGeo(List? expected, string field, double latitude, double longitude, SearchScope target = SearchScope.All) + private static ContentData TextData(string language, string text) { - return async p => - { - var query = new GeoQuery(schemaId.Id, field, latitude, longitude, 1000, 1000); - - var result = await p.TextIndex.SearchAsync(app, query, target); - - if (expected != null) - { - result.Should().BeEquivalentTo(expected.ToHashSet()); - } - else - { - result.Should().BeEmpty(); - } - }; + return new ContentData() + .AddField("text", + new ContentFieldData() + .AddLocalized(language, text)); } - protected IndexOperation SearchText(List? expected, string text, SearchScope target = SearchScope.All) + private static ContentData GeoData(string field, double latitude, double longitude) { - return async p => - { - var query = new TextQuery(text, 1000) - { - RequiredSchemaIds = new List { schemaId.Id } - }; - - var result = await p.TextIndex.SearchAsync(app, query, target); - - if (expected != null) - { - result.Should().BeEquivalentTo(expected.ToHashSet()); - } - else - { - result.Should().BeEmpty(); - } - }; + return new ContentData() + .AddField(field, + new ContentFieldData() + .AddInvariant(JsonValue.Object().Add("latitude", latitude).Add("longitude", longitude))); } - protected async Task TestCombinations(params IndexOperation[] actions) + protected async Task SearchGeo(List? expected, string field, double latitude, double longitude, SearchScope target = SearchScope.All) { - if (SupportsCleanup) + var query = new GeoQuery(schemaId.Id, field, latitude, longitude, 1000, 1000); + + var result = await sut.TextIndex.SearchAsync(app, query, target); + + if (expected != null) { - for (var i = 0; i < actions.Length; i++) - { - await TestCombinations(i, actions); - } + result.Should().BeEquivalentTo(expected.ToHashSet()); } else { - await TestCombinations(0, actions); + result.Should().BeEmpty(); } } - protected async Task TestCombinations(int firstSteps, params IndexOperation[] actions) + protected async Task SearchText(List? expected, string text, SearchScope target = SearchScope.All) { - await ExecuteAsync(async sut => + var query = new TextQuery(text, 1000) { - foreach (var action in actions.Take(firstSteps)) - { - await action(sut); - } - }); + RequiredSchemaIds = new List { schemaId.Id } + }; - await ExecuteAsync(async sut => - { - foreach (var action in actions.Skip(firstSteps)) - { - await action(sut); - } - }); - } + var result = await sut.TextIndex.SearchAsync(app, query, target); - private async Task ExecuteAsync(IndexOperation action) - { - var indexer = await Factory.CreateAsync(schemaId.Id); - try + if (expected != null) { - var sut = new TextIndexingProcess(TestUtils.DefaultSerializer, indexer, State); - - await action(sut); + result.Should().BeEquivalentTo(expected.ToHashSet()); } - finally + else { - await Factory.CleanupAsync(); + result.Should().BeEmpty(); } } } diff --git a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Azure.cs b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Azure.cs new file mode 100644 index 000000000..edfcbdb57 --- /dev/null +++ b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Azure.cs @@ -0,0 +1,52 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +using System.Threading.Tasks; +using Squidex.Extensions.Text.Azure; +using Xunit; + +namespace Squidex.Domain.Apps.Entities.Contents.Text +{ + [Trait("Category", "Dependencies")] + public class TextIndexerTests_Azure : TextIndexerTestsBase + { + public override ITextIndex CreateIndex() + { + var index = new AzureTextIndex("https://squidex.search.windows.net", "API_KEY", "test", 2000); + + index.InitializeAsync(default).Wait(); + + return index; + } + + [Fact] + public async Task Should_retrieve_english_stopword_only_for_german_query() + { + await CreateTextAsync(ids1[0], "de", "and und"); + await CreateTextAsync(ids2[0], "en", "and und"); + + await SearchText(expected: ids2, text: "und"); + } + + [Fact] + public async Task Should_retrieve_german_stopword_only_for_english_query() + { + await CreateTextAsync(ids1[0], "de", "and und"); + await CreateTextAsync(ids2[0], "en", "and und"); + + await SearchText(expected: ids1, text: "and"); + } + + [Fact] + public async Task Should_index_cjk_content_and_retrieve() + { + await CreateTextAsync(ids1[0], "zh", "東京大学"); + + await SearchText(expected: ids1, text: "東京"); + } + } +} diff --git a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Elastic.cs b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Elastic.cs index 0c5411085..d45db4694 100644 --- a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Elastic.cs +++ b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Elastic.cs @@ -7,62 +7,46 @@ using System.Threading.Tasks; using Squidex.Extensions.Text.ElasticSearch; -using Squidex.Infrastructure; using Xunit; -#pragma warning disable SA1115 // Parameter should follow comma - namespace Squidex.Domain.Apps.Entities.Contents.Text { [Trait("Category", "Dependencies")] public class TextIndexerTests_Elastic : TextIndexerTestsBase { - private sealed class ElasticFactory : IIndexerFactory + public override ITextIndex CreateIndex() { - public Task CleanupAsync() - { - return Task.CompletedTask; - } - - public async Task CreateAsync(DomainId schemaId) - { - var index = new ElasticSearchTextIndex("http://localhost:9200", "squidex", true); + var index = new ElasticSearchTextIndex("http://localhost:9200", "squidex", 1000); - await index.InitializeAsync(default); + index.InitializeAsync(default).Wait(); - return index; - } + return index; } - public override IIndexerFactory Factory { get; } = new ElasticFactory(); - - public TextIndexerTests_Elastic() + [Fact] + public async Task Should_retrieve_english_stopword_only_for_german_query() { -#pragma warning disable MA0056 // Do not call overridable members in constructor - SupportsQuerySyntax = true; -#pragma warning restore MA0056 // Do not call overridable members in constructor + await CreateTextAsync(ids1[0], "de", "and und"); + await CreateTextAsync(ids2[0], "en", "and und"); + + await SearchText(expected: ids2, text: "und"); } [Fact] - public async Task Should_index_localized_content_without_stop_words_and_retrieve() + public async Task Should_retrieve_german_stopword_only_for_english_query() { - await TestCombinations( - CreateText(ids1[0], "de", "and und"), - CreateText(ids2[0], "en", "and und"), + await CreateTextAsync(ids1[0], "de", "and und"); + await CreateTextAsync(ids2[0], "en", "and und"); - SearchText(expected: ids1, text: "and"), - SearchText(expected: ids2, text: "und") - ); + await SearchText(expected: ids1, text: "and"); } [Fact] public async Task Should_index_cjk_content_and_retrieve() { - await TestCombinations( - CreateText(ids1[0], "zh", "東京大学"), + await CreateTextAsync(ids1[0], "zh", "東京大学"); - SearchText(expected: ids1, text: "東京") - ); + await SearchText(expected: ids1, text: "東京"); } } } diff --git a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Mongo.cs b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Mongo.cs index d3a5f1148..93f81a8a3 100644 --- a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Mongo.cs +++ b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Mongo.cs @@ -11,64 +11,57 @@ using MongoDB.Driver; using Newtonsoft.Json; using Squidex.Domain.Apps.Core.TestHelpers; using Squidex.Domain.Apps.Entities.MongoDb.FullText; -using Squidex.Infrastructure; using Squidex.Infrastructure.MongoDb; using Xunit; -#pragma warning disable SA1115 // Parameter should follow comma - namespace Squidex.Domain.Apps.Entities.Contents.Text { [Trait("Category", "Dependencies")] public class TextIndexerTests_Mongo : TextIndexerTestsBase { - private sealed class MongoFactory : IIndexerFactory + public override bool SupportsQuerySyntax => false; + + public override bool SupportsGeo => true; + + static TextIndexerTests_Mongo() { - private readonly MongoClient mongoClient = new MongoClient("mongodb://localhost"); + BsonJsonConvention.Register(JsonSerializer.Create(TestUtils.CreateSerializerSettings())); - public Task CleanupAsync() - { - return Task.CompletedTask; - } + DomainIdSerializer.Register(); + } - public async Task CreateAsync(DomainId schemaId) - { - var database = mongoClient.GetDatabase("Squidex_Testing"); + public override ITextIndex CreateIndex() + { + var mongoClient = new MongoClient("mongodb://localhost"); + var mongoDatabase = mongoClient.GetDatabase("Squidex_Testing"); - var index = new MongoTextIndex(database, false); + var index = new MongoTextIndex(mongoDatabase, false); - await index.InitializeAsync(default); + index.InitializeAsync(default).Wait(); - return index; - } + return index; } - public override IIndexerFactory Factory { get; } = new MongoFactory(); - - public TextIndexerTests_Mongo() + [Fact] + public async Task Should_retrieve_all_stopwords_for_english_query() { - BsonJsonConvention.Register(JsonSerializer.Create(TestUtils.CreateSerializerSettings())); + var both = ids2.Union(ids1).ToList(); - DomainIdSerializer.Register(); + await CreateTextAsync(ids1[0], "de", "and und"); + await CreateTextAsync(ids2[0], "en", "and und"); -#pragma warning disable MA0056 // Do not call overridable members in constructor - SupportsQuerySyntax = false; - SupportsGeo = true; -#pragma warning restore MA0056 // Do not call overridable members in constructor + await SearchText(expected: both, text: "and"); } [Fact] - public async Task Should_index_localized_content_without_stop_words_and_retrieve() + public async Task Should_retrieve_all_stopwords_for_german_query() { var both = ids2.Union(ids1).ToList(); - await TestCombinations( - CreateText(ids1[0], "de", "and und"), - CreateText(ids2[0], "en", "and und"), + await CreateTextAsync(ids1[0], "de", "and und"); + await CreateTextAsync(ids2[0], "en", "and und"); - SearchText(expected: both, text: "and"), - SearchText(expected: both, text: "und") - ); + await SearchText(expected: both, text: "und"); } } }