From b6a0029fd9fb383581bd1ec0f8bd524f27961988 Mon Sep 17 00:00:00 2001 From: Sebastian Stehle Date: Sun, 14 Apr 2019 10:55:21 +0200 Subject: [PATCH] Performance improvements. --- .../Contents/Text/IndexState.cs | 104 ++++++++++++++++-- .../Contents/Text/TextIndexContent.cs | 45 ++++---- .../Contents/Text/TextIndexerGrain.cs | 65 ++++++----- .../Contents/Text/TextIndexerGrainTests.cs | 10 +- 4 files changed, 158 insertions(+), 66 deletions(-) diff --git a/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexState.cs b/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexState.cs index 7be407a84..029f601fc 100644 --- a/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexState.cs +++ b/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexState.cs @@ -5,20 +5,34 @@ // All rights reserved. Licensed under the MIT license. // ========================================================================== +using System; +using System.Collections.Generic; +using System.Linq; using Lucene.Net.Documents; using Lucene.Net.Index; +using Lucene.Net.Search; using Lucene.Net.Util; namespace Squidex.Domain.Apps.Entities.Contents.Text { internal sealed class IndexState { + private const int NotFound = -1; private const string MetaFor = "_fd"; + private readonly IndexSearcher indexSearcher; private readonly IndexWriter indexWriter; private readonly BinaryDocValues binaryValues; + private readonly Dictionary<(Guid, byte), BytesRef> changes = new Dictionary<(Guid, byte), BytesRef>(); + private bool isClosed; - public IndexState(IndexReader indexReader, IndexWriter indexWriter) + public int Changes { + get { return changes.Count; } + } + + public IndexState(IndexWriter indexWriter, IndexReader indexReader = null, IndexSearcher indexSearcher = null) + { + this.indexSearcher = indexSearcher; this.indexWriter = indexWriter; if (indexReader != null) @@ -27,27 +41,52 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text } } - public void Index(Document document, byte forDraft, byte forPublished) + public void Index(Guid id, byte draft, Document document, byte forDraft, byte forPublished) { + var value = GetValue(forDraft, forPublished); + document.RemoveField(MetaFor); - document.AddBinaryDocValuesField(MetaFor, GetValue(forDraft, forPublished)); + document.AddBinaryDocValuesField(MetaFor, value); + + changes[(id, draft)] = value; } - public void Index(Term term, byte forDraft, byte forPublished) + public void Index(Guid id, byte draft, Term term, byte forDraft, byte forPublished) { - indexWriter.UpdateBinaryDocValue(term, MetaFor, GetValue(forDraft, forPublished)); + var value = GetValue(forDraft, forPublished); + + indexWriter.UpdateBinaryDocValue(term, MetaFor, value); + + changes[(id, draft)] = value; } - public bool TryGet(int docId, out byte forDraft, out byte forPublished) + public bool HasBeenAdded(Guid id, byte draft, Term term, out int docId) { - var forValue = new BytesRef(); + docId = 0; + + if (changes.ContainsKey((id, draft))) + { + return true; + } + if (indexSearcher != null && !isClosed) + { + var docs = indexSearcher.Search(new TermQuery(term), 1); + + docId = docs?.ScoreDocs.FirstOrDefault()?.Doc ?? NotFound; + + return docId > NotFound; + } + + return false; + } + + public bool TryGet(Guid id, byte draft, int docId, out byte forDraft, out byte forPublished) + { forDraft = 0; forPublished = 0; - binaryValues?.Get(docId, forValue); - - if (forValue.Bytes.Length == 2) + if (changes.TryGetValue((id, draft), out var forValue)) { forDraft = forValue.Bytes[0]; forPublished = forValue.Bytes[1]; @@ -55,6 +94,46 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text return true; } + if (!isClosed && docId != NotFound) + { + forValue = new BytesRef(); + + binaryValues?.Get(docId, forValue); + + if (forValue.Bytes.Length == 2) + { + forDraft = forValue.Bytes[0]; + forPublished = forValue.Bytes[1]; + + changes[(id, draft)] = forValue; + + return true; + } + } + + return false; + } + + public bool TryGet(int docId, out byte forDraft, out byte forPublished) + { + forDraft = 0; + forPublished = 0; + + if (!isClosed && docId != NotFound) + { + var forValue = new BytesRef(); + + binaryValues?.Get(docId, forValue); + + if (forValue.Bytes.Length == 2) + { + forDraft = forValue.Bytes[0]; + forPublished = forValue.Bytes[1]; + + return true; + } + } + return false; } @@ -62,5 +141,10 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { return new BytesRef(new[] { forDraft, forPublished }); } + + public void CloseReader() + { + isClosed = true; + } } } diff --git a/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexContent.cs b/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexContent.cs index 010498515..08e49388b 100644 --- a/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexContent.cs +++ b/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexContent.cs @@ -23,14 +23,12 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text private const string MetaId = "_id"; private const string MetaKey = "_key"; private readonly IndexWriter indexWriter; - private readonly IndexSearcher indexSearcher; private readonly IndexState indexState; private readonly Guid id; - public TextIndexContent(IndexWriter indexWriter, IndexSearcher indexSearcher, IndexState indexState, Guid id) + public TextIndexContent(IndexWriter indexWriter, IndexState indexState, Guid id) { this.indexWriter = indexWriter; - this.indexSearcher = indexSearcher; this.indexState = indexState; this.id = id; @@ -78,21 +76,20 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text Upsert(converted, 1, 1, 0); - var docId = GetPublishedDocument(); + var isPublishDocumentAdded = IsAdded(0, out var docId); + var isPublishForPublished = IsForPublished(0, docId); - var isPublished = IsForPublished(docId); - - if (!onlyDraft && docId > 0 && isPublished) + if (!onlyDraft && isPublishDocumentAdded && isPublishForPublished) { Upsert(converted, 0, 0, 1); } - else if (!onlyDraft || docId == 0) + else if (!onlyDraft || !isPublishDocumentAdded) { Upsert(converted, 0, 0, 0); } else { - Update(0, 0, isPublished ? (byte)1 : (byte)0); + UpdateFor(0, 0, isPublishForPublished ? (byte)1 : (byte)0); } } @@ -100,13 +97,13 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { if (fromDraft) { - Update(1, 1, 0); - Update(0, 0, 1); + UpdateFor(1, 1, 0); + UpdateFor(0, 0, 1); } else { - Update(1, 0, 0); - Update(0, 1, 1); + UpdateFor(1, 0, 0); + UpdateFor(0, 1, 1); } } @@ -149,18 +146,11 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text return document; } - private void Update(byte draft, byte forDraft, byte forPublished) + private void UpdateFor(byte draft, byte forDraft, byte forPublished) { var term = new Term(MetaKey, BuildKey(draft)); - indexState.Index(term, forDraft, forPublished); - } - - private int GetPublishedDocument() - { - var docs = indexSearcher?.Search(new TermQuery(new Term(MetaKey, BuildKey(0))), 1); - - return docs?.ScoreDocs.FirstOrDefault()?.Doc ?? 0; + indexState.Index(id, draft, term, forDraft, forPublished); } private void Upsert(Document document, byte draft, byte forDraft, byte forPublished) @@ -176,7 +166,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text document.AddStringField(MetaId, contentId, Field.Store.YES); document.AddStringField(MetaKey, contentKey, Field.Store.YES); - indexState.Index(document, forDraft, forPublished); + indexState.Index(id, draft, document, forDraft, forPublished); indexWriter.UpdateDocument(new Term(MetaKey, contentKey), document); } @@ -204,9 +194,14 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text } } - private bool IsForPublished(int docId) + private bool IsAdded(byte draft, out int docId) + { + return indexState.HasBeenAdded(id, draft, new Term(MetaKey, BuildKey(draft)), out docId); + } + + private bool IsForPublished(byte draft, int docId) { - return indexState.TryGet(docId, out _, out var p) && p == 1; + return indexState.TryGet(id, draft, docId, out _, out var p) && p == 1; } private string BuildKey(byte draft) diff --git a/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs b/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs index b2398267b..f11cc98f4 100644 --- a/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs +++ b/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs @@ -27,7 +27,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { private const LuceneVersion Version = LuceneVersion.LUCENE_48; private const int MaxResults = 2000; - private const int MaxUpdates = 100; + private const int MaxUpdates = 400; private static readonly TimeSpan CommitDelay = TimeSpan.FromSeconds(10); private static readonly Analyzer Analyzer = new MultiLanguageAnalyzer(Version); private static readonly string[] Invariant = { InvariantPartitioning.Instance.Master.Key }; @@ -41,7 +41,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text private IndexState indexState; private QueryParser queryParser; private HashSet currentLanguages; - private long updates; public TextIndexerGrain(IAssetStore assetStore) { @@ -74,13 +73,13 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text } else { - indexState = new IndexState(indexReader, indexWriter); + indexState = new IndexState(indexWriter); } } public Task DeleteAsync(Guid id) { - var content = new TextIndexContent(indexWriter, indexSearcher, indexState, id); + var content = new TextIndexContent(indexWriter, indexState, id); content.Delete(); @@ -89,7 +88,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text public Task IndexAsync(Guid id, J data, bool onlyDraft) { - var content = new TextIndexContent(indexWriter, indexSearcher, indexState, id); + var content = new TextIndexContent(indexWriter, indexState, id); content.Index(data.Value.Data, onlyDraft); @@ -98,7 +97,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text public Task CopyAsync(Guid id, bool fromDraft) { - var content = new TextIndexContent(indexWriter, indexSearcher, indexState, id); + var content = new TextIndexContent(indexWriter, indexState, id); content.Copy(fromDraft); @@ -107,21 +106,31 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text public Task> SearchAsync(string queryText, SearchContext context) { - var result = new HashSet(); + var result = new List(); if (!string.IsNullOrWhiteSpace(queryText)) { var query = BuildQuery(queryText, context); + if (indexReader == null && indexWriter.NumDocs > 0) + { + OpenReader(); + } + if (indexReader != null) { + var found = new HashSet(); + var hits = indexSearcher.Search(query, MaxResults).ScoreDocs; foreach (var hit in hits) { if (TextIndexContent.TryGetId(hit.Doc, context.Scope, indexReader, indexState, out var id)) { - result.Add(id); + if (found.Add(id)) + { + result.Add(id); + } } } } @@ -153,17 +162,15 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text private async Task TryFlushAsync() { - updates++; + timer?.Dispose(); - if (updates >= MaxUpdates) + if (indexState.Changes >= MaxUpdates) { await FlushAsync(); } else { - OpenReader(); - - timer?.Dispose(); + CleanReader(); try { @@ -178,12 +185,12 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text public async Task FlushAsync() { - if (updates > 0 && indexWriter != null) + if (indexState.Changes > 0 && indexWriter != null) { indexWriter.Commit(); indexWriter.Flush(true, true); - OpenReader(); + CleanReader(); var commit = snapshotter.Snapshot(); try @@ -194,22 +201,15 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { snapshotter.Release(commit); } - - updates = 0; } - - timer?.Dispose(); } public async Task DeactivateAsync(bool deleteFolder = false) { await FlushAsync(); - indexWriter?.Dispose(); - indexWriter = null; - - indexReader?.Dispose(); - indexReader = null; + CleanWriter(); + CleanReader(); if (deleteFolder && directory.Exists) { @@ -219,10 +219,23 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text private void OpenReader() { - indexReader?.Dispose(); indexReader = indexWriter.GetReader(true); indexSearcher = new IndexSearcher(indexReader); - indexState = new IndexState(indexReader, indexWriter); + indexState = new IndexState(indexWriter, indexReader, indexSearcher); + } + + private void CleanReader() + { + indexReader?.Dispose(); + indexReader = null; + indexSearcher = null; + indexState?.CloseReader(); + } + + private void CleanWriter() + { + indexWriter?.Dispose(); + indexWriter = null; } } } diff --git a/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests.cs b/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests.cs index 33596f736..1970a0663 100644 --- a/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests.cs +++ b/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests.cs @@ -51,7 +51,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { await AddInvariantContent("Hello", "World", false); - await sut.DeactivateAsync(); + await sut.DeactivateAsync(true); var other = new TextIndexerGrain(assetStore); try @@ -137,13 +137,13 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text await TestSearchAsync(null, "Hallo", Scope.Draft); await TestSearchAsync(null, "Hallo", Scope.Published); - await AddInvariantContent("Hallo", "Welt", true); + await AddInvariantContent("Guten Morgen", "Welt", true); await TestSearchAsync(null, "Hello", Scope.Draft); await TestSearchAsync(ids1, "Hello", Scope.Published); - await TestSearchAsync(ids1, "Hallo", Scope.Draft); - await TestSearchAsync(null, "Hallo", Scope.Published); + await TestSearchAsync(ids1, "Guten Morgen", Scope.Draft); + await TestSearchAsync(null, "Guten Morgen", Scope.Published); } [Fact] @@ -154,7 +154,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text await TestSearchAsync(ids1, "Hello", Scope.Draft); await TestSearchAsync(null, "Hello", Scope.Published); - await CopyAsync(false); + await CopyAsync(true); await TestSearchAsync(ids1, "Hello", Scope.Draft); await TestSearchAsync(ids1, "Hello", Scope.Published);