From 7f5ff73ef9e9ffb8175b7aca185c4fcb03f2b475 Mon Sep 17 00:00:00 2001 From: Sebastian Date: Wed, 27 Feb 2019 09:51:39 +0100 Subject: [PATCH] First working version of full text search. --- .../Contents/MongoContentRepository.cs | 16 ++-- .../Contents/Text/GrainTextIndexer.cs | 25 ++--- .../Contents/Text/ITextIndexer.cs | 2 +- .../Contents/Text/ITextIndexerGrain.cs | 2 +- .../Contents/Text/PersistenceHelper.cs | 3 +- .../Contents/Text/SearchContext.cs | 6 +- .../Contents/Text/TextIndexerGrain.cs | 93 ++++++++++++------- .../Squidex.Domain.Apps.Entities.csproj | 1 + src/Squidex/Config/Domain/StoreServices.cs | 3 +- src/Squidex/appsettings.json | 58 ++++++------ .../Contents/Text/TextIndexerGrainTests.cs | 53 ++++++----- .../AssetUserPictureStoreTests.cs | 2 +- 12 files changed, 149 insertions(+), 115 deletions(-) diff --git a/src/Squidex.Domain.Apps.Entities.MongoDb/Contents/MongoContentRepository.cs b/src/Squidex.Domain.Apps.Entities.MongoDb/Contents/MongoContentRepository.cs index ea8fd726b..405148b16 100644 --- a/src/Squidex.Domain.Apps.Entities.MongoDb/Contents/MongoContentRepository.cs +++ b/src/Squidex.Domain.Apps.Entities.MongoDb/Contents/MongoContentRepository.cs @@ -55,18 +55,16 @@ namespace Squidex.Domain.Apps.Entities.MongoDb.Contents { using (Profiler.TraceMethod("QueryAsyncByQuery")) { - if (RequiresPublished(status)) - { - var ids = await indexer.SearchAsync(query.FullText, app, schema); + var useDraft = RequiresPublished(status); - return await contents.QueryAsync(app, schema, query, ids); - } - else - { - var ids = await indexer.SearchAsync(query.FullText, app, schema, true); + var fullTextIds = await indexer.SearchAsync(query.FullText, app, schema, useDraft); - return await contents.QueryAsync(app, schema, query, ids, status, true); + if (fullTextIds?.Count == 0) + { + return ResultList.Create(0); } + + return await contents.QueryAsync(app, schema, query, fullTextIds, status, true); } } diff --git a/src/Squidex.Domain.Apps.Entities/Contents/Text/GrainTextIndexer.cs b/src/Squidex.Domain.Apps.Entities/Contents/Text/GrainTextIndexer.cs index aea939d31..d54cba803 100644 --- a/src/Squidex.Domain.Apps.Entities/Contents/Text/GrainTextIndexer.cs +++ b/src/Squidex.Domain.Apps.Entities/Contents/Text/GrainTextIndexer.cs @@ -14,6 +14,7 @@ using Squidex.Domain.Apps.Core.Contents; using Squidex.Domain.Apps.Entities.Apps; using Squidex.Domain.Apps.Entities.Schemas; using Squidex.Infrastructure; +using Squidex.Infrastructure.Log; namespace Squidex.Domain.Apps.Entities.Contents.Text { @@ -41,12 +42,12 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text if (data != null) { - await index.IndexAsync(id, new IndexData { }); + await index.IndexAsync(id, new IndexData { Data = data }); } if (dataDraft != null) { - await index.IndexAsync(id, new IndexData { IsDraft = true }); + await index.IndexAsync(id, new IndexData { Data = dataDraft, IsDraft = true }); } } @@ -59,17 +60,19 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text var index = grainFactory.GetGrain(schema.Id); - var languages = app.LanguagesConfig.Select(x => x.Key).ToList(); - - var context = new SearchContext + using (Profiler.TraceMethod("SearchAsync")) { - AppVersion = app.Version, - AppLanguages = languages, - SchemaVersion = schema.Version, - IsDraft = useDraft - }; + var context = CreateContext(app, useDraft); + + return await index.SearchAsync(queryText, context); + } + } + + private static SearchContext CreateContext(IAppEntity app, bool useDraft) + { + var languages = new HashSet(app.LanguagesConfig.Select(x => x.Key)); - return await index.SearchAsync(queryText, context); + return new SearchContext { Languages = languages, IsDraft = useDraft }; } } } diff --git a/src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexer.cs b/src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexer.cs index 27d552d20..51dcf82ed 100644 --- a/src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexer.cs +++ b/src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexer.cs @@ -20,6 +20,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text Task IndexAsync(Guid schemaId, Guid id, NamedContentData data, NamedContentData dataDraft); - Task> SearchAsync(string queryText, IAppEntity appEntity, ISchemaEntity schemaEntity, bool useDraft = false); + Task> SearchAsync(string queryText, IAppEntity app, ISchemaEntity schema, bool useDraft = false); } } diff --git a/src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexerGrain.cs b/src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexerGrain.cs index 8bd3884d1..dd1d4c5c8 100644 --- a/src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexerGrain.cs +++ b/src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexerGrain.cs @@ -19,6 +19,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text Task IndexAsync(Guid id, J data); - Task> SearchAsync(string queryText, J context); + Task> SearchAsync(string queryText, SearchContext context); } } \ No newline at end of file diff --git a/src/Squidex.Domain.Apps.Entities/Contents/Text/PersistenceHelper.cs b/src/Squidex.Domain.Apps.Entities/Contents/Text/PersistenceHelper.cs index 49424c856..621e141c9 100644 --- a/src/Squidex.Domain.Apps.Entities/Contents/Text/PersistenceHelper.cs +++ b/src/Squidex.Domain.Apps.Entities/Contents/Text/PersistenceHelper.cs @@ -34,7 +34,8 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { try { - if (!file.Name.Equals(ArchiveFile, StringComparison.OrdinalIgnoreCase) && !file.Name.Equals(LockFile, StringComparison.OrdinalIgnoreCase)) + if (!file.Name.Equals(ArchiveFile, StringComparison.OrdinalIgnoreCase) && + !file.Name.Equals(LockFile, StringComparison.OrdinalIgnoreCase)) { zipArchive.CreateEntryFromFile(file.FullName, file.Name); } diff --git a/src/Squidex.Domain.Apps.Entities/Contents/Text/SearchContext.cs b/src/Squidex.Domain.Apps.Entities/Contents/Text/SearchContext.cs index 9c6a1ddad..01bd8f78a 100644 --- a/src/Squidex.Domain.Apps.Entities/Contents/Text/SearchContext.cs +++ b/src/Squidex.Domain.Apps.Entities/Contents/Text/SearchContext.cs @@ -13,10 +13,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { public bool IsDraft { get; set; } - public long AppVersion { get; set; } - - public long SchemaVersion { get; set; } - - public List AppLanguages { get; set; } + public HashSet Languages { get; set; } } } diff --git a/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs b/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs index 58ea91fde..5335954ca 100644 --- a/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs +++ b/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs @@ -14,6 +14,7 @@ using System.Threading.Tasks; using Lucene.Net.Analysis; using Lucene.Net.Documents; using Lucene.Net.Index; +using Lucene.Net.Queries; using Lucene.Net.QueryParsers.Classic; using Lucene.Net.Search; using Lucene.Net.Store; @@ -30,15 +31,15 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text private const LuceneVersion Version = LuceneVersion.LUCENE_48; private const int MaxResults = 2000; private const int MaxUpdates = 100; - private static readonly HashSet IdFields = new HashSet(); + private static readonly TimeSpan CommitDelay = TimeSpan.FromSeconds(30); private static readonly Analyzer Analyzer = new MultiLanguageAnalyzer(Version); private readonly IAssetStore assetStore; + private IDisposable timer; private DirectoryInfo directory; private IndexWriter indexWriter; private IndexReader indexReader; private QueryParser queryParser; - private long currentAppVersion; - private long currentSchemaVersion; + private HashSet currentLanguages; private long updates; public TextIndexerGrain(IAssetStore assetStore) @@ -48,13 +49,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text this.assetStore = assetStore; } - public override Task OnActivateAsync() - { - RegisterTimer(_ => FlushAsync(), null, TimeSpan.Zero, TimeSpan.FromMinutes(10)); - - return base.OnActivateAsync(); - } - public override async Task OnDeactivateAsync() { await DeactivateAsync(true); @@ -79,16 +73,13 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text public Task IndexAsync(Guid id, J data) { - string idString = id.ToString(), draft = data.Value.IsDraft.ToString(); + var docId = id.ToString(); + var docDraft = data.Value.IsDraft.ToString(); + var docKey = $"{docId}_{docDraft}"; - indexWriter.DeleteDocuments( - new Term("id", idString), - new Term("dd", draft)); + var query = new BooleanQuery(); - var document = new Document(); - - document.AddStringField("id", idString, Field.Store.YES); - document.AddStringField("dd", draft, Field.Store.YES); + indexWriter.DeleteDocuments(new Term("key", docKey)); var languages = new Dictionary(); @@ -117,12 +108,23 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text } } - foreach (var field in languages) + if (languages.Count > 0) { - document.AddTextField(field.Key, field.Value.ToString(), Field.Store.NO); - } + var document = new Document(); - indexWriter.AddDocument(document); + document.AddStringField("id", docId, Field.Store.YES); + document.AddStringField("key", docKey, Field.Store.YES); + document.AddStringField("draft", docDraft, Field.Store.YES); + + foreach (var field in languages) + { + var fieldName = BuildFieldName(field.Key); + + document.AddTextField(fieldName, field.Value.ToString(), Field.Store.NO); + } + + indexWriter.AddDocument(document); + } return TryFlushAsync(); } @@ -131,9 +133,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { if (value.Type == JsonValueType.String) { - var text = value.ToString(); - - appendText(text); + appendText(value.ToString()); } else if (value is JsonArray array) { @@ -151,7 +151,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text } } - public Task> SearchAsync(string queryText, J context) + public Task> SearchAsync(string queryText, SearchContext context) { var result = new HashSet(); @@ -161,9 +161,9 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text if (indexReader != null) { - var filter = new QueryWrapperFilter(new TermQuery(new Term("dd", context.Value.IsDraft.ToString()))); + var filter = new TermsFilter(new Term("draft", context.IsDraft.ToString())); - var hits = new IndexSearcher(indexReader).Search(query, MaxResults).ScoreDocs; + var hits = new IndexSearcher(indexReader).Search(query, filter, MaxResults).ScoreDocs; foreach (var hit in hits) { @@ -184,17 +184,25 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text private Query BuildQuery(string query, SearchContext context) { - if (queryParser == null || currentAppVersion != context.AppVersion || currentSchemaVersion != context.SchemaVersion) + if (queryParser == null || !currentLanguages.SetEquals(context.Languages)) { - var fields = context.AppLanguages.Select(BuildFieldName).ToArray(); + var fields = + context.Languages.Select(BuildFieldName) + .Union(Enumerable.Repeat(BuildFieldName("iv"), 1)).ToArray(); queryParser = new MultiFieldQueryParser(Version, fields, Analyzer); - currentAppVersion = context.AppVersion; - currentSchemaVersion = context.SchemaVersion; + currentLanguages = context.Languages; } - return queryParser.Parse(query); + try + { + return queryParser.Parse(query); + } + catch (ParseException ex) + { + throw new ValidationException(ex.Message); + } } private async Task TryFlushAsync() @@ -205,6 +213,19 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { await FlushAsync(); } + else + { + timer?.Dispose(); + + try + { + timer = RegisterTimer(_ => FlushAsync(), null, CommitDelay, CommitDelay); + } + catch (InvalidOperationException) + { + return; + } + } } public async Task FlushAsync() @@ -221,6 +242,10 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text updates = 0; } + else + { + timer?.Dispose(); + } } public async Task DeactivateAsync(bool deleteFolder = false) @@ -241,7 +266,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text private static string BuildFieldName(string language) { - return $"field_{language}"; + return $"{language}_field"; } } } diff --git a/src/Squidex.Domain.Apps.Entities/Squidex.Domain.Apps.Entities.csproj b/src/Squidex.Domain.Apps.Entities/Squidex.Domain.Apps.Entities.csproj index b076b00cc..7b7db6907 100644 --- a/src/Squidex.Domain.Apps.Entities/Squidex.Domain.Apps.Entities.csproj +++ b/src/Squidex.Domain.Apps.Entities/Squidex.Domain.Apps.Entities.csproj @@ -18,6 +18,7 @@ + all diff --git a/src/Squidex/Config/Domain/StoreServices.cs b/src/Squidex/Config/Domain/StoreServices.cs index b95d436ba..5f61cf9d4 100644 --- a/src/Squidex/Config/Domain/StoreServices.cs +++ b/src/Squidex/Config/Domain/StoreServices.cs @@ -18,6 +18,7 @@ using Squidex.Domain.Apps.Entities.Assets.Repositories; using Squidex.Domain.Apps.Entities.Assets.State; using Squidex.Domain.Apps.Entities.Contents.Repositories; using Squidex.Domain.Apps.Entities.Contents.State; +using Squidex.Domain.Apps.Entities.Contents.Text; using Squidex.Domain.Apps.Entities.History.Repositories; using Squidex.Domain.Apps.Entities.MongoDb.Assets; using Squidex.Domain.Apps.Entities.MongoDb.Contents; @@ -104,7 +105,7 @@ namespace Squidex.Config.Domain c.GetRequiredService().GetDatabase(mongoContentDatabaseName), c.GetRequiredService(), c.GetRequiredService(), - c.GetRequiredService>())) + c.GetRequiredService())) .AsOptional() .AsOptional>() .AsOptional(); diff --git a/src/Squidex/appsettings.json b/src/Squidex/appsettings.json index 3e5ba0b28..08771041a 100644 --- a/src/Squidex/appsettings.json +++ b/src/Squidex/appsettings.json @@ -283,7 +283,11 @@ /* * The database for all your other read collections. */ - "database": "Squidex" + "database": "Squidex", + /* + * Indicate wheter the connection string is for cosmos db. + */ + "isCosmosDB": "false" } }, @@ -291,48 +295,48 @@ /* * Enable password auth. Set this to false if you want to disable local login, leaving only 3rd party login options. */ - "allowPasswordAuth": true, - /* + "allowPasswordAuth": true, + /* * Initial admin user. */ - "adminEmail": "", - "adminPassword": "", - /* + "adminEmail": "", + "adminPassword": "", + /* * Client with all admin permissions. */ - "adminClientId": "", - "adminClientSecret": "", - /* + "adminClientId": "", + "adminClientSecret": "", + /* * Settings for Google auth (keep empty to disable). */ - "googleClient": "1006817248705-t3lb3ge808m9am4t7upqth79hulk456l.apps.googleusercontent.com", - "googleSecret": "QsEi-fHqkGw2_PjJmtNHf2wg", - /* + "googleClient": "1006817248705-t3lb3ge808m9am4t7upqth79hulk456l.apps.googleusercontent.com", + "googleSecret": "QsEi-fHqkGw2_PjJmtNHf2wg", + /* * Settings for Github auth (keep empty to disable). */ - "githubClient": "211ea00e726baf754c78", - "githubSecret": "d0a0d0fe2c26469ae20987ac265b3a339fd73132", - /* + "githubClient": "211ea00e726baf754c78", + "githubSecret": "d0a0d0fe2c26469ae20987ac265b3a339fd73132", + /* * Settings for Microsoft auth (keep empty to disable). */ - "microsoftClient": "b55da740-6648-4502-8746-b9003f29d5f1", - "microsoftSecret": "idWbANxNYEF4cB368WXJhjN", - /* + "microsoftClient": "b55da740-6648-4502-8746-b9003f29d5f1", + "microsoftSecret": "idWbANxNYEF4cB368WXJhjN", + /* * Settings for your custom oidc server. */ - "oidcName": "OIDC", - "oidcAuthority": "", - "oidcClient": "", - "oidcSecret": "", - /* + "oidcName": "OIDC", + "oidcAuthority": "", + "oidcClient": "", + "oidcSecret": "", + /* * Lock new users automatically, the administrator must unlock them. */ - "lockAutomatically": false, - /* + "lockAutomatically": false, + /* * The url to you privacy statements, if you host squidex by yourself. */ - "privacyUrl": "https://squidex.io/privacy" - }, + "privacyUrl": "https://squidex.io/privacy" + }, "news": { /* diff --git a/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests.cs b/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests.cs index dd009aa55..1cbcb18c8 100644 --- a/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests.cs +++ b/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests.cs @@ -8,9 +8,8 @@ using System; using System.Collections.Generic; using System.Threading.Tasks; -using Squidex.Domain.Apps.Core; using Squidex.Domain.Apps.Core.Contents; -using Squidex.Domain.Apps.Core.Schemas; +using Squidex.Infrastructure; using Squidex.Infrastructure.Assets; using Xunit; @@ -18,10 +17,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { public class TextIndexerGrainTests : IDisposable { - private readonly Schema schema = - new Schema("test") - .AddString(1, "test", Partitioning.Invariant) - .AddString(2, "localized", Partitioning.Language); private readonly Guid schemaId = Guid.NewGuid(); private readonly List ids1 = new List { Guid.NewGuid() }; private readonly List ids2 = new List { Guid.NewGuid() }; @@ -33,10 +28,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { context = new SearchContext { - AppVersion = 1, - Schema = schema, - SchemaVersion = 1, - AppLanguages = new List { "de", "en" } + Languages = new HashSet { "de", "en" } }; sut = new TextIndexerGrain(assetStore); @@ -60,13 +52,11 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { await other.ActivateAsync(schemaId); - var helloIds = await other.SearchAsync("Hello", context); + var foundHello = await other.SearchAsync("Hello", context); + var foundWorld = await other.SearchAsync("World", context); - Assert.Equal(ids1, helloIds); - - var worldIds = await other.SearchAsync("World", context); - - Assert.Equal(ids2, worldIds); + Assert.Equal(ids1, foundHello); + Assert.Equal(ids2, foundWorld); } finally { @@ -79,13 +69,23 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { await AddInvariantContent(); - var helloIds = await sut.SearchAsync("Hello", context); + var foundHello = await sut.SearchAsync("Hello", context); + var foundWorld = await sut.SearchAsync("World", context); - Assert.Equal(ids1, helloIds); + Assert.Equal(ids1, foundHello); + Assert.Equal(ids2, foundWorld); + } - var worldIds = await sut.SearchAsync("World", context); + [Fact] + public async Task Should_index_invariant_content_and_retrieve_with_fuzzy() + { + await AddInvariantContent(); - Assert.Equal(ids2, worldIds); + var foundHello = await sut.SearchAsync("helo~", context); + var foundWorld = await sut.SearchAsync("wold~", context); + + Assert.Equal(ids1, foundHello); + Assert.Equal(ids2, foundWorld); } [Fact] @@ -98,10 +98,9 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text var helloIds = await sut.SearchAsync("Hello", context); - Assert.Empty(helloIds); - var worldIds = await sut.SearchAsync("World", context); + Assert.Empty(helloIds); Assert.Equal(ids2, worldIds); } @@ -117,7 +116,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text Assert.Equal(ids1, german1); Assert.Equal(ids1, german2); - Assert.Equal(ids2, germanStopwordsIds); var english1 = await sut.SearchAsync("City", context); @@ -127,10 +125,17 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text Assert.Equal(ids2, english1); Assert.Equal(ids2, english2); - Assert.Equal(ids1, englishStopwordsIds); } + [Fact] + public async Task Should_throw_exception_for_invalid_query() + { + await AddInvariantContent(); + + await Assert.ThrowsAsync(() => sut.SearchAsync("~hello", context)); + } + private async Task AddLocalizedContent() { var germanData = diff --git a/tests/Squidex.Domain.Users.Tests/AssetUserPictureStoreTests.cs b/tests/Squidex.Domain.Users.Tests/AssetUserPictureStoreTests.cs index 69193a0c2..939b1b6fe 100644 --- a/tests/Squidex.Domain.Users.Tests/AssetUserPictureStoreTests.cs +++ b/tests/Squidex.Domain.Users.Tests/AssetUserPictureStoreTests.cs @@ -35,7 +35,7 @@ namespace Squidex.Domain.Users await sut.UploadAsync(userId, stream); - A.CallTo(() => assetStore.UploadAsync(userId, 0, "picture", stream, CancellationToken.None)).MustHaveHappened(); + A.CallTo(() => assetStore.UploadAsync(userId, 0, "picture", stream, false, CancellationToken.None)).MustHaveHappened(); } [Fact]