From feb1b3083995d86c399c7d5420a41c46f6d70913 Mon Sep 17 00:00:00 2001 From: Sebastian Date: Thu, 5 Dec 2019 10:10:40 +0100 Subject: [PATCH] More optimizations. --- .../Contents/Text/Extensions.cs | 38 +++++++++++ .../Contents/Text/IndexState.cs | 65 ++++++++----------- .../Contents/Text/DocValuesTests.cs | 55 ++++++++++++++++ .../Contents/Text/TextIndexerBenchmark.cs | 2 +- 4 files changed, 121 insertions(+), 39 deletions(-) create mode 100644 backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/DocValuesTests.cs diff --git a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Extensions.cs b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Extensions.cs index 1f93d890d..e458c09a0 100644 --- a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Extensions.cs +++ b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Extensions.cs @@ -9,6 +9,7 @@ using System; using System.Collections.Generic; using System.Text; using Lucene.Net.Documents; +using Lucene.Net.Index; using Lucene.Net.Util; using Squidex.Domain.Apps.Core.Contents; using Squidex.Infrastructure; @@ -98,5 +99,42 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text } } } + + public static BytesRef GetBinaryValue(this IndexReader? reader, string field, int docId, BytesRef? result = null) + { + if (result != null) + { + Array.Clear(result.Bytes, 0, result.Bytes.Length); + } + else + { + result = new BytesRef(); + } + + if (reader == null) + { + return result; + } + + var leaves = reader.Leaves; + + if (leaves.Count == 1) + { + var docValues = leaves[0].AtomicReader.GetBinaryDocValues(field); + + docValues.Get(docId, result); + } + else if (leaves.Count > 1) + { + var subIndex = ReaderUtil.SubIndex(docId, leaves); + + var subLeave = leaves[subIndex]; + var subValues = subLeave.AtomicReader.GetBinaryDocValues(field); + + subValues.Get(docId - subLeave.DocBase, result); + } + + return result; + } } } diff --git a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexState.cs b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexState.cs index e5004c4d2..95c2e9818 100644 --- a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexState.cs +++ b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexState.cs @@ -17,12 +17,10 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { internal sealed class IndexState { - private const int NotFound = -1; private const string MetaFor = "_fd"; - private readonly Dictionary<(Guid, Scope), BytesRef> lastChanges = new Dictionary<(Guid, Scope), BytesRef>(); + private readonly Dictionary<(Guid, Scope), (bool, bool)> lastChanges = new Dictionary<(Guid, Scope), (bool, bool)>(); + private readonly BytesRef bytesRef = new BytesRef(2); private readonly IIndex index; - private IndexReader? lastReader; - private BinaryDocValues binaryValues; public IndexState(IIndex index) { @@ -35,7 +33,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text document.SetBinaryDocValue(MetaFor, value); - lastChanges[(id, scope)] = value; + lastChanges[(id, scope)] = (forDraft, forPublished); } public void Index(Guid id, Scope scope, Term term, bool forDraft, bool forPublished) @@ -44,30 +42,42 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text index.Writer.UpdateBinaryDocValue(term, MetaFor, value); - lastChanges[(id, scope)] = value; + lastChanges[(id, scope)] = (forDraft, forPublished); } public bool HasBeenAdded(Guid id, Scope scope, Term term, out int docId) { - docId = 0; + docId = -1; if (lastChanges.ContainsKey((id, scope))) { return true; } - var docs = index.Searcher?.Search(new TermQuery(term), 1); + if (index.Searcher == null) + { + return false; + } + + var docs = index.Searcher.Search(new TermQuery(term), 1); - docId = docs?.ScoreDocs.FirstOrDefault()?.Doc ?? NotFound; + var found = docs.ScoreDocs.FirstOrDefault(); - return docId > NotFound; + if (found != null) + { + docId = found.Doc; + + return true; + } + + return false; } public void Get(Guid id, Scope scope, int docId, out bool forDraft, out bool forPublished) { if (lastChanges.TryGetValue((id, scope), out var forValue)) { - (forDraft, forPublished) = ToFlags(forValue); + (forDraft, forPublished) = forValue; } else { @@ -84,37 +94,16 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text private BytesRef GetForValues(int docId) { - var reader = index.Reader; - - if (lastReader != reader) - { - lastChanges.Clear(); - lastReader = reader; - - if (reader != null) - { - binaryValues = MultiDocValues.GetBinaryValues(reader, MetaFor); - } - } - - var result = new BytesRef(2); - - if (docId != NotFound) - { - binaryValues?.Get(docId, result); - } - - return result; + return index.Reader.GetBinaryValue(MetaFor, docId, bytesRef); } private static BytesRef GetValue(bool forDraft, bool forPublished) { - return GetValue((byte)(forDraft ? 1 : 0), (byte)(forPublished ? 1 : 0)); - } - - private static BytesRef GetValue(byte forDraft, byte forPublished) - { - return new BytesRef(new[] { forDraft, forPublished }); + return new BytesRef(new[] + { + (byte)(forDraft ? 1 : 0), + (byte)(forPublished ? 1 : 0) + }); } private static (bool, bool) ToFlags(BytesRef bytes) diff --git a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/DocValuesTests.cs b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/DocValuesTests.cs new file mode 100644 index 000000000..2c24bd107 --- /dev/null +++ b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/DocValuesTests.cs @@ -0,0 +1,55 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +using Lucene.Net.Analysis.Standard; +using Lucene.Net.Documents; +using Lucene.Net.Index; +using Lucene.Net.Store; +using Lucene.Net.Util; +using Xunit; + +namespace Squidex.Domain.Apps.Entities.Contents.Text +{ + public class DocValuesTests + { + [Fact] + public void Should_read_and_write_doc_values() + { + var version = LuceneVersion.LUCENE_48; + + var indexWriter = + new IndexWriter(new RAMDirectory(), + new IndexWriterConfig(version, new StandardAnalyzer(version))); + + using (indexWriter) + { + for (byte i = 0; i < 255; i++) + { + var document = new Document(); + + document.AddBinaryDocValuesField("field", new BytesRef(new byte[] { i })); + + indexWriter.AddDocument(document); + } + + indexWriter.Commit(); + + using (var reader = indexWriter.GetReader(true)) + { + var bytesRef = new BytesRef(1); + + for (byte i = 0; i < 255; i++) + { + reader.GetBinaryValue("field", i, bytesRef); + + Assert.Equal(i, bytesRef.Bytes[0]); + } + } + } + } + } +} diff --git a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerBenchmark.cs b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerBenchmark.cs index 4e7b997e8..5dc9f60d0 100644 --- a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerBenchmark.cs +++ b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerBenchmark.cs @@ -28,7 +28,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text sut.ActivateAsync(schemaId).Wait(); } - [Fact]// (Skip = "Only used for benchmarks")] + [Fact(Skip = "Only used for benchmarks")] public async Task Should_index_many_documents() { var text = new Dictionary