From 16de2769659194664cca9185ebe4182638212e4f Mon Sep 17 00:00:00 2001 From: Sebastian Stehle Date: Mon, 2 Dec 2019 18:01:37 +0100 Subject: [PATCH] Text indexer (#454) * Text indexer improved. --- .../Contents/Visitors/Adapt.cs | 4 +- .../FullText/MongoDirectory.cs | 150 +++++++++++++++ .../FullText/MongoDirectoryFactory.cs | 35 ++++ .../FullText/MongoIndexInput.cs | 105 +++++++++++ .../FullText/MongoIndexOutput.cs | 114 ++++++++++++ .../Contents/Text/Extensions.cs | 8 + .../Contents/Text/FSDirectoryFactory.cs | 26 +++ .../Contents/Text/GrainTextIndexer.cs | 11 +- .../Contents/Text/IDirectoryFactory.cs | 17 ++ .../Contents/Text/ITextIndexerGrain.cs | 3 +- .../Contents/Text/IndexHolder.cs | 157 ++++++++++++++++ .../Contents/Text/IndexHolderFactory.cs | 87 +++++++++ .../Contents/Text/IndexState.cs | 117 +++++------- .../Contents/Text/PersistenceHelper.cs | 94 ---------- .../Contents/Text/TextContent.cs | 91 +++++++++ .../Contents/Text/TextIndexContent.cs | 176 +++++++----------- .../Contents/Text/TextIndexerGrain.cs | 172 +++++------------ .../Contents/Text/Update.cs | 5 +- .../Squidex/Config/Domain/ContentsServices.cs | 3 + .../Squidex/Config/Domain/StoreServices.cs | 14 ++ .../Contents/Text/GrainTextIndexerTests.cs | 14 +- .../Contents/Text/TextIndexerBenchmark.cs | 59 ++++++ ...nTests.cs => TextIndexerGrainTestsBase.cs} | 73 ++++---- .../Contents/Text/TextIndexerGrainTests_FS.cs | 21 +++ .../Text/TextIndexerGrainTests_Mongo.cs | 33 ++++ 25 files changed, 1135 insertions(+), 454 deletions(-) create mode 100644 backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoDirectory.cs create mode 100644 backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoDirectoryFactory.cs create mode 100644 backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoIndexInput.cs create mode 100644 backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoIndexOutput.cs create mode 100644 backend/src/Squidex.Domain.Apps.Entities/Contents/Text/FSDirectoryFactory.cs create mode 100644 backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IDirectoryFactory.cs create mode 100644 backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexHolder.cs create mode 100644 backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexHolderFactory.cs delete mode 100644 backend/src/Squidex.Domain.Apps.Entities/Contents/Text/PersistenceHelper.cs create mode 100644 backend/src/Squidex.Domain.Apps.Entities/Contents/Text/TextContent.cs create mode 100644 backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerBenchmark.cs rename backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/{TextIndexerGrainTests.cs => TextIndexerGrainTestsBase.cs} (80%) create mode 100644 backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests_FS.cs create mode 100644 backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests_Mongo.cs diff --git a/backend/src/Squidex.Domain.Apps.Entities.MongoDb/Contents/Visitors/Adapt.cs b/backend/src/Squidex.Domain.Apps.Entities.MongoDb/Contents/Visitors/Adapt.cs index 677cc20ba..3d3f706b4 100644 --- a/backend/src/Squidex.Domain.Apps.Entities.MongoDb/Contents/Visitors/Adapt.cs +++ b/backend/src/Squidex.Domain.Apps.Entities.MongoDb/Contents/Visitors/Adapt.cs @@ -19,8 +19,8 @@ namespace Squidex.Domain.Apps.Entities.MongoDb.Contents.Visitors public static class Adapt { private static readonly Dictionary PropertyMap = - typeof(MongoContentEntity).GetProperties() - .ToDictionary(x => x.Name, x => x.GetCustomAttribute()?.ElementName ?? x.Name, StringComparer.OrdinalIgnoreCase); + typeof(MongoContentEntity).GetProperties() + .ToDictionary(x => x.Name, x => x.GetCustomAttribute()?.ElementName ?? x.Name, StringComparer.OrdinalIgnoreCase); public static Func Path(Schema schema, bool inDraft) { diff --git a/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoDirectory.cs b/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoDirectory.cs new file mode 100644 index 000000000..d861fd8c3 --- /dev/null +++ b/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoDirectory.cs @@ -0,0 +1,150 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using Lucene.Net.Store; +using MongoDB.Bson; +using MongoDB.Driver; +using MongoDB.Driver.GridFS; +using LuceneDirectory = Lucene.Net.Store.Directory; + +namespace Squidex.Domain.Apps.Entities.MongoDb.FullText +{ + public sealed class MongoDirectory : BaseDirectory + { + private readonly IGridFSBucket bucket; + private readonly string directory; + private readonly DirectoryInfo cacheDirectoryInfo; + private readonly LuceneDirectory cacheDirectory; + private bool isDisposed; + + public LuceneDirectory CacheDirectory + { + get { return cacheDirectory; } + } + + public DirectoryInfo CacheDirectoryInfo + { + get { return cacheDirectoryInfo; } + } + + public IGridFSBucket Bucket + { + get { return bucket; } + } + + public MongoDirectory(IGridFSBucket bucket, string directory, DirectoryInfo cacheDirectoryInfo) + { + this.bucket = bucket; + + this.directory = directory; + + this.cacheDirectoryInfo = cacheDirectoryInfo; + + cacheDirectoryInfo.Create(); + cacheDirectory = FSDirectory.Open(cacheDirectoryInfo); + + SetLockFactory(new NativeFSLockFactory(cacheDirectoryInfo)); + } + + protected override void Dispose(bool disposing) + { + if (disposing) + { + isDisposed = true; + + cacheDirectory.Dispose(); + } + } + + public override string GetLockID() + { + return cacheDirectory.GetLockID(); + } + + public override IndexOutput CreateOutput(string name, IOContext context) + { + return new MongoIndexOutput(this, context, name); + } + + public override IndexInput OpenInput(string name, IOContext context) + { + return new MongoIndexInput(this, context, name); + } + + public override void DeleteFile(string name) + { + EnsureNotDisposed(); + + var fullName = GetFullName(name); + + try + { + Bucket.Delete(fullName); + } + catch (GridFSFileNotFoundException) + { + } + } + + public override long FileLength(string name) + { + EnsureNotDisposed(); + + var file = FindFile(name) ?? throw new FileNotFoundException(); + + return file.Length; + } + + public override string[] ListAll() + { + EnsureNotDisposed(); + + var files = Bucket.Find(Builders>.Filter.Regex(x => x.Id, new BsonRegularExpression($"^{directory}/"))).ToList(); + + return files.Select(x => x.Filename).ToArray(); + } + + public GridFSFileInfo? FindFile(string name) + { + var fullName = GetFullName(name); + + return Bucket.Find(Builders>.Filter.Eq(x => x.Id, fullName)).FirstOrDefault(); + } + + public override void Sync(ICollection names) + { + } + + [Obsolete] + public override bool FileExists(string name) + { + throw new NotSupportedException(); + } + + public string GetFullName(string name) + { + return $"{directory}/{name}"; + } + + public string GetFullPath(string name) + { + return Path.Combine(cacheDirectoryInfo.FullName, name); + } + + private void EnsureNotDisposed() + { + if (isDisposed) + { + throw new ObjectDisposedException(GetType().FullName); + } + } + } +} diff --git a/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoDirectoryFactory.cs b/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoDirectoryFactory.cs new file mode 100644 index 000000000..0cd1725bb --- /dev/null +++ b/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoDirectoryFactory.cs @@ -0,0 +1,35 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +using System; +using System.IO; +using MongoDB.Driver.GridFS; +using Squidex.Domain.Apps.Entities.Contents.Text; +using LuceneDirectory = Lucene.Net.Store.Directory; + +namespace Squidex.Domain.Apps.Entities.MongoDb.FullText +{ + public sealed class MongoDirectoryFactory : IDirectoryFactory + { + private readonly IGridFSBucket bucket; + + public MongoDirectoryFactory(IGridFSBucket bucket) + { + this.bucket = bucket; + } + + public LuceneDirectory Create(Guid schemaId) + { + var folderName = schemaId.ToString(); + + var tempFolder = Path.Combine(Path.GetTempPath(), folderName); + var tempDirectory = new DirectoryInfo(tempFolder); + + return new MongoDirectory(bucket, folderName, tempDirectory); + } + } +} diff --git a/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoIndexInput.cs b/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoIndexInput.cs new file mode 100644 index 000000000..18710ce67 --- /dev/null +++ b/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoIndexInput.cs @@ -0,0 +1,105 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +using System.IO; +using Lucene.Net.Store; +using MongoDB.Driver.GridFS; + +namespace Squidex.Domain.Apps.Entities.MongoDb.FullText +{ + public sealed class MongoIndexInput : IndexInput + { + private readonly IndexInput cacheInput; + private readonly MongoDirectory indexDirectory; + private readonly IOContext context; + private readonly string indexFileName; + + public override long Length + { + get { return cacheInput.Length; } + } + + public MongoIndexInput(MongoDirectory indexDirectory, IOContext context, string indexFileName) + : base(indexDirectory.GetFullName(indexFileName)) + { + this.indexDirectory = indexDirectory; + this.indexFileName = indexFileName; + + this.context = context; + + try + { + var file = indexDirectory.FindFile(indexFileName); + + if (file != null) + { + var fileInfo = new FileInfo(indexDirectory.GetFullPath(indexFileName)); + + var writtenTime = file.Metadata["WrittenTime"].ToUniversalTime(); + + if (!fileInfo.Exists || fileInfo.LastWriteTimeUtc < writtenTime) + { + using (var fs = new FileStream(fileInfo.FullName, FileMode.Create, FileAccess.Write)) + { + var fullName = indexDirectory.GetFullName(indexFileName); + + indexDirectory.Bucket.DownloadToStream(fullName, fs); + } + } + } + } + catch (GridFSFileNotFoundException) + { + throw new FileNotFoundException(); + } + + cacheInput = indexDirectory.CacheDirectory.OpenInput(indexFileName, context); + } + + public MongoIndexInput(MongoIndexInput source) + : base("clone") + { + cacheInput = (IndexInput)source.cacheInput.Clone(); + context = source.context; + indexDirectory = source.indexDirectory; + indexFileName = source.indexFileName; + } + + protected override void Dispose(bool disposing) + { + if (disposing) + { + cacheInput.Dispose(); + } + } + + public override long GetFilePointer() + { + return cacheInput.GetFilePointer(); + } + + public override byte ReadByte() + { + return cacheInput.ReadByte(); + } + + public override void ReadBytes(byte[] b, int offset, int len) + { + cacheInput.ReadBytes(b, offset, len); + } + + public override void Seek(long pos) + { + cacheInput.Seek(pos); + } + + public override object Clone() + { + return new MongoIndexInput(indexDirectory, context, indexFileName); + } + } +} diff --git a/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoIndexOutput.cs b/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoIndexOutput.cs new file mode 100644 index 000000000..51c3fc88d --- /dev/null +++ b/backend/src/Squidex.Domain.Apps.Entities.MongoDb/FullText/MongoIndexOutput.cs @@ -0,0 +1,114 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +using System; +using System.IO; +using System.Linq; +using Lucene.Net.Store; +using MongoDB.Bson; +using MongoDB.Driver; +using MongoDB.Driver.GridFS; + +namespace Squidex.Domain.Apps.Entities.MongoDb.FullText +{ + public sealed class MongoIndexOutput : IndexOutput + { + private readonly IndexOutput cacheOutput; + private readonly MongoDirectory indexDirectory; + private readonly string indexFileName; + private bool isFlushed; + private bool isWritten; + + public override long Length + { + get { return cacheOutput.Length; } + } + + public override long Checksum + { + get { return cacheOutput.Checksum; } + } + + public MongoIndexOutput(MongoDirectory indexDirectory, IOContext context, string indexFileName) + { + this.indexDirectory = indexDirectory; + this.indexFileName = indexFileName; + + cacheOutput = indexDirectory.CacheDirectory.CreateOutput(indexFileName, context); + } + + protected override void Dispose(bool disposing) + { + if (disposing) + { + Flush(); + + cacheOutput.Dispose(); + + if (isWritten && isFlushed) + { + var fileInfo = new FileInfo(indexDirectory.GetFullPath(indexFileName)); + + using (var fs = new FileStream(indexDirectory.GetFullPath(indexFileName), FileMode.Open, FileAccess.Read)) + { + var fullName = indexDirectory.GetFullName(indexFileName); + + var options = new GridFSUploadOptions + { + Metadata = new BsonDocument + { + ["WrittenTime"] = fileInfo.LastWriteTimeUtc + } + }; + + try + { + indexDirectory.Bucket.UploadFromStream(fullName, indexFileName, fs, options); + } + catch (MongoBulkWriteException ex) when (ex.WriteErrors.Any(x => x.Code == 11000)) + { + indexDirectory.Bucket.Delete(fullName); + indexDirectory.Bucket.UploadFromStream(fullName, indexFileName, fs, options); + } + } + } + } + } + + public override long GetFilePointer() + { + return cacheOutput.GetFilePointer(); + } + + public override void Flush() + { + cacheOutput.Flush(); + + isFlushed = true; + } + + public override void WriteByte(byte b) + { + cacheOutput.WriteByte(b); + + isWritten = true; + } + + public override void WriteBytes(byte[] b, int offset, int length) + { + cacheOutput.WriteBytes(b, offset, length); + + isWritten = true; + } + + [Obsolete] + public override void Seek(long pos) + { + cacheOutput.Seek(pos); + } + } +} diff --git a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Extensions.cs b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Extensions.cs index 3869cc01a..b94322dde 100644 --- a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Extensions.cs +++ b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Extensions.cs @@ -15,7 +15,15 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text public static void SetBinaryDocValue(this Document document, string name, BytesRef value) { document.RemoveField(name); + document.AddBinaryDocValuesField(name, value); } + + public static void SetField(this Document document, string name, string value) + { + document.RemoveField(name); + + document.AddStringField(name, value, Field.Store.YES); + } } } diff --git a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/FSDirectoryFactory.cs b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/FSDirectoryFactory.cs new file mode 100644 index 000000000..63bc8a4e6 --- /dev/null +++ b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/FSDirectoryFactory.cs @@ -0,0 +1,26 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +using System; +using System.IO; +using Lucene.Net.Store; +using LuceneDirectory = Lucene.Net.Store.Directory; + +namespace Squidex.Domain.Apps.Entities.Contents.Text +{ + public sealed class FSDirectoryFactory : IDirectoryFactory + { + public LuceneDirectory Create(Guid schemaId) + { + var folderName = $"Indexes/{schemaId}"; + + var tempFolder = Path.Combine(Path.GetTempPath(), folderName); + + return FSDirectory.Open(tempFolder); + } + } +} diff --git a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/GrainTextIndexer.cs b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/GrainTextIndexer.cs index dc2263426..6a17a63da 100644 --- a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/GrainTextIndexer.cs +++ b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/GrainTextIndexer.cs @@ -16,7 +16,6 @@ using Squidex.Domain.Apps.Events.Contents; using Squidex.Infrastructure; using Squidex.Infrastructure.EventSourcing; using Squidex.Infrastructure.Log; -using Squidex.Infrastructure.Orleans; using Squidex.Infrastructure.Tasks; namespace Squidex.Domain.Apps.Entities.Contents.Text @@ -27,7 +26,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text public string Name { - get { return "TextIndexer"; } + get { return "TextIndexer2"; } } public string EventsFilter @@ -78,6 +77,8 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text await index.CopyAsync(id, false); break; case ContentChangesPublished _: + await index.CopyAsync(id, true); + break; case ContentStatusChanged contentStatusChanged when contentStatusChanged.Status == Status.Published: await index.CopyAsync(id, true); break; @@ -85,9 +86,11 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text } } - private static J Data(Guid contentId, NamedContentData data, bool onlySelf) + private static Update Data(Guid contentId, NamedContentData data, bool onlyDraft) { - return new Update { Id = contentId, Data = data, OnlyDraft = onlySelf }; + var text = new TextContent(data); + + return new Update { Id = contentId, Text = text, OnlyDraft = onlyDraft }; } public async Task?> SearchAsync(string? queryText, IAppEntity app, Guid schemaId, Scope scope = Scope.Published) diff --git a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IDirectoryFactory.cs b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IDirectoryFactory.cs new file mode 100644 index 000000000..31fc6b213 --- /dev/null +++ b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IDirectoryFactory.cs @@ -0,0 +1,17 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +using System; +using Lucene.Net.Store; + +namespace Squidex.Domain.Apps.Entities.Contents.Text +{ + public interface IDirectoryFactory + { + Directory Create(Guid schemaId); + } +} diff --git a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexerGrain.cs b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexerGrain.cs index 486591d6d..ba5d8db1f 100644 --- a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexerGrain.cs +++ b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexerGrain.cs @@ -9,7 +9,6 @@ using System; using System.Collections.Generic; using System.Threading.Tasks; using Orleans; -using Squidex.Infrastructure.Orleans; namespace Squidex.Domain.Apps.Entities.Contents.Text { @@ -19,7 +18,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text Task CopyAsync(Guid id, bool fromDraft); - Task IndexAsync(J update); + Task IndexAsync(Update update); Task> SearchAsync(string queryText, SearchContext context); } diff --git a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexHolder.cs b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexHolder.cs new file mode 100644 index 000000000..547dded4b --- /dev/null +++ b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexHolder.cs @@ -0,0 +1,157 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +using System; +using Lucene.Net.Analysis; +using Lucene.Net.Index; +using Lucene.Net.Search; +using Lucene.Net.Store; +using Lucene.Net.Util; +using Squidex.Infrastructure; + +namespace Squidex.Domain.Apps.Entities.Contents.Text +{ + public sealed class IndexHolder : DisposableObjectBase + { + private const LuceneVersion Version = LuceneVersion.LUCENE_48; + private static readonly MergeScheduler MergeScheduler = new ConcurrentMergeScheduler(); + private static readonly Analyzer SharedAnalyzer = new MultiLanguageAnalyzer(Version); + private readonly SnapshotDeletionPolicy snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); + private readonly Directory directory; + private IndexWriter indexWriter; + private IndexSearcher? indexSearcher; + private DirectoryReader? indexReader; + + public Analyzer Analyzer + { + get + { + ThrowIfDisposed(); + + return SharedAnalyzer; + } + } + + public SnapshotDeletionPolicy Snapshotter + { + get + { + ThrowIfDisposed(); + + return snapshotter; + } + } + + public IndexWriter Writer + { + get + { + ThrowIfDisposed(); + + return indexWriter; + } + } + + public IndexReader? Reader + { + get + { + ThrowIfDisposed(); + + return indexReader; + } + } + + public IndexSearcher? Searcher + { + get + { + ThrowIfDisposed(); + + return indexSearcher; + } + } + + public IndexHolder(IDirectoryFactory directoryFactory, Guid schemaId) + { + directory = directoryFactory.Create(schemaId); + } + + public void Open() + { + RecreateIndexWriter(); + + if (indexWriter.NumDocs > 0) + { + EnsureReader(); + } + } + + protected override void DisposeObject(bool disposing) + { + if (disposing) + { + indexWriter.Dispose(); + } + } + + private void RecreateIndexWriter() + { + var config = new IndexWriterConfig(Version, Analyzer) + { + IndexDeletionPolicy = snapshotter, + MergePolicy = new TieredMergePolicy(), + MergeScheduler = MergeScheduler + }; + + indexWriter = new IndexWriter(directory, config); + + MarkStale(); + } + + public void EnsureReader() + { + ThrowIfDisposed(); + + if (indexReader == null) + { + indexReader = indexWriter.GetReader(true); + indexSearcher = new IndexSearcher(indexReader); + } + } + + public void MarkStale() + { + ThrowIfDisposed(); + + if (indexReader != null) + { + indexReader.Dispose(); + indexReader = null; + indexSearcher = null; + } + } + + public void Commit() + { + ThrowIfDisposed(); + + try + { + MarkStale(); + + indexWriter.Commit(); + } + catch (OutOfMemoryException) + { + RecreateIndexWriter(); + + throw; + } + } + } +} diff --git a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexHolderFactory.cs b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexHolderFactory.cs new file mode 100644 index 000000000..2ce30ffa3 --- /dev/null +++ b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexHolderFactory.cs @@ -0,0 +1,87 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +using System; +using System.Collections.Generic; +using Squidex.Infrastructure; +using Squidex.Infrastructure.Log; + +namespace Squidex.Domain.Apps.Entities.Contents.Text +{ + public sealed class IndexHolderFactory : DisposableObjectBase + { + private readonly Dictionary indices = new Dictionary(); + private readonly IDirectoryFactory directoryFactory; + private readonly ISemanticLog log; + + public IndexHolderFactory(IDirectoryFactory directoryFactory, ISemanticLog log) + { + Guard.NotNull(directoryFactory); + Guard.NotNull(log); + + this.directoryFactory = directoryFactory; + + this.log = log; + } + + protected override void DisposeObject(bool disposing) + { + if (disposing) + { + lock (indices) + { + if (indices.Count > 0) + { + log.LogWarning(w => w + .WriteProperty("message", "Unreleased indices found.") + .WriteProperty("count", indices.Count)); + + foreach (var index in indices) + { + index.Value.Dispose(); + } + + indices.Clear(); + } + } + } + } + + public IndexHolder Acquire(Guid schemaId) + { + IndexHolder? index; + + lock (indices) + { + if (indices.TryGetValue(schemaId, out index)) + { + log.LogWarning(w => w + .WriteProperty("message", "Unreleased index found.") + .WriteProperty("schemaId", schemaId.ToString())); + + index.Dispose(); + } + + index = new IndexHolder(directoryFactory, schemaId); + + indices[schemaId] = index; + } + + index.Open(); + + return index; + } + + public void Release(Guid id) + { + lock (indices) + { + indices.Remove(id); + } + } + } +} diff --git a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexState.cs b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexState.cs index 18d3956d4..a2416ce12 100644 --- a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexState.cs +++ b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/IndexState.cs @@ -19,116 +19,97 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { private const int NotFound = -1; private const string MetaFor = "_fd"; - private readonly IndexSearcher? indexSearcher; - private readonly IndexWriter indexWriter; - private readonly BinaryDocValues binaryValues; - private readonly Dictionary<(Guid, byte), BytesRef> changes = new Dictionary<(Guid, byte), BytesRef>(); - private bool isClosed; + private readonly Dictionary<(Guid, Scope), BytesRef> lastChanges = new Dictionary<(Guid, Scope), BytesRef>(); + private readonly IndexHolder index; + private IndexReader? lastReader; + private BinaryDocValues binaryValues; - public IndexState(IndexWriter indexWriter, IndexReader? indexReader = null, IndexSearcher? indexSearcher = null) + public IndexState(IndexHolder index) { - this.indexSearcher = indexSearcher; - this.indexWriter = indexWriter; - - if (indexReader != null) - { - binaryValues = MultiDocValues.GetBinaryValues(indexReader, MetaFor); - } + this.index = index; } - public void Index(Guid id, byte draft, Document document, byte forDraft, byte forPublished) + public void Index(Guid id, Scope scope, Document document, bool forDraft, bool forPublished) { var value = GetValue(forDraft, forPublished); document.SetBinaryDocValue(MetaFor, value); - changes[(id, draft)] = value; + lastChanges[(id, scope)] = value; } - public void Index(Guid id, byte draft, Term term, byte forDraft, byte forPublished) + public void Index(Guid id, Scope scope, Term term, bool forDraft, bool forPublished) { var value = GetValue(forDraft, forPublished); - indexWriter.UpdateBinaryDocValue(term, MetaFor, value); + index.Writer.UpdateBinaryDocValue(term, MetaFor, value); - changes[(id, draft)] = value; + lastChanges[(id, scope)] = value; } - public bool HasBeenAdded(Guid id, byte draft, Term term, out int docId) + public bool HasBeenAdded(Guid id, Scope scope, Term term, out int docId) { docId = 0; - if (changes.ContainsKey((id, draft))) + if (lastChanges.ContainsKey((id, scope))) { return true; } - if (indexSearcher != null && !isClosed) - { - var docs = indexSearcher.Search(new TermQuery(term), 1); + var docs = index.Searcher?.Search(new TermQuery(term), 1); - docId = docs?.ScoreDocs.FirstOrDefault()?.Doc ?? NotFound; + docId = docs?.ScoreDocs.FirstOrDefault()?.Doc ?? NotFound; - return docId > NotFound; - } - - return false; + return docId > NotFound; } - public bool TryGet(Guid id, byte draft, int docId, out byte forDraft, out byte forPublished) + public void Get(Guid id, Scope scope, int docId, out bool forDraft, out bool forPublished) { - forDraft = 0; - forPublished = 0; - - if (changes.TryGetValue((id, draft), out var forValue)) + if (lastChanges.TryGetValue((id, scope), out var forValue)) { - forDraft = forValue.Bytes[0]; - forPublished = forValue.Bytes[1]; - - return true; + (forDraft, forPublished) = ToFlags(forValue); } - - if (!isClosed && docId != NotFound) + else { - forValue = new BytesRef(); - - binaryValues?.Get(docId, forValue); - - if (forValue.Bytes.Length == 2) - { - forDraft = forValue.Bytes[0]; - forPublished = forValue.Bytes[1]; - - changes[(id, draft)] = forValue; - - return true; - } + Get(docId, out forDraft, out forPublished); } + } - return false; + public void Get(int docId, out bool forDraft, out bool forPublished) + { + var forValue = GetForValues(docId); + + (forDraft, forPublished) = ToFlags(forValue); } - public bool TryGet(int docId, out byte forDraft, out byte forPublished) + private BytesRef GetForValues(int docId) { - forDraft = 0; - forPublished = 0; + var reader = index.Reader; - if (!isClosed && docId != NotFound) + if (lastReader != reader) { - var forValue = new BytesRef(); - - binaryValues?.Get(docId, forValue); + lastChanges.Clear(); + lastReader = reader; - if (forValue.Bytes.Length == 2) + if (reader != null) { - forDraft = forValue.Bytes[0]; - forPublished = forValue.Bytes[1]; - - return true; + binaryValues = MultiDocValues.GetBinaryValues(reader, MetaFor); } } - return false; + var result = new BytesRef(2); + + if (docId != NotFound) + { + binaryValues?.Get(docId, result); + } + + return result; + } + + private static BytesRef GetValue(bool forDraft, bool forPublished) + { + return GetValue((byte)(forDraft ? 1 : 0), (byte)(forPublished ? 1 : 0)); } private static BytesRef GetValue(byte forDraft, byte forPublished) @@ -136,9 +117,9 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text return new BytesRef(new[] { forDraft, forPublished }); } - public void CloseReader() + private static (bool, bool) ToFlags(BytesRef bytes) { - isClosed = true; + return (bytes.Bytes[0] == 1, bytes.Bytes[1] == 1); } } } diff --git a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/PersistenceHelper.cs b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/PersistenceHelper.cs deleted file mode 100644 index d847ccf13..000000000 --- a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/PersistenceHelper.cs +++ /dev/null @@ -1,94 +0,0 @@ -// ========================================================================== -// Squidex Headless CMS -// ========================================================================== -// Copyright (c) Squidex UG (haftungsbeschraenkt) -// All rights reserved. Licensed under the MIT license. -// ========================================================================== - -using System; -using System.IO; -using System.IO.Compression; -using System.Threading.Tasks; -using Lucene.Net.Index; -using Squidex.Infrastructure.Assets; - -namespace Squidex.Domain.Apps.Entities.Contents.Text -{ - public static class PersistenceHelper - { - private const string ArchiveFile = "Archive.zip"; - private const string LockFile = "write.lock"; - - public static async Task UploadDirectoryAsync(this IAssetStore assetStore, DirectoryInfo directory, IndexCommit commit) - { - using (var fileStream = new FileStream( - Path.Combine(directory.FullName, ArchiveFile), - FileMode.Create, - FileAccess.ReadWrite, - FileShare.None, - 4096, - FileOptions.DeleteOnClose)) - { - using (var zipArchive = new ZipArchive(fileStream, ZipArchiveMode.Create, true)) - { - foreach (var fileName in commit.FileNames) - { - var file = new FileInfo(Path.Combine(directory.FullName, fileName)); - - try - { - if (!file.Name.Equals(ArchiveFile, StringComparison.OrdinalIgnoreCase) && - !file.Name.Equals(LockFile, StringComparison.OrdinalIgnoreCase)) - { - zipArchive.CreateEntryFromFile(file.FullName, file.Name); - } - } - catch (IOException) - { - continue; - } - } - } - - fileStream.Position = 0; - - await assetStore.UploadAsync(directory.Name, 0, string.Empty, fileStream, true); - } - } - - public static async Task DownloadAsync(this IAssetStore assetStore, DirectoryInfo directory) - { - if (directory.Exists) - { - directory.Delete(true); - } - - directory.Create(); - - using (var fileStream = new FileStream( - Path.Combine(directory.FullName, ArchiveFile), - FileMode.Create, - FileAccess.ReadWrite, - FileShare.None, - 4096, - FileOptions.DeleteOnClose)) - { - try - { - await assetStore.DownloadAsync(directory.Name, 0, string.Empty, fileStream); - - fileStream.Position = 0; - - using (var zipArchive = new ZipArchive(fileStream, ZipArchiveMode.Read, true)) - { - zipArchive.ExtractToDirectory(directory.FullName); - } - } - catch (AssetNotFoundException) - { - return; - } - } - } - } -} diff --git a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/TextContent.cs b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/TextContent.cs new file mode 100644 index 000000000..c038d99e8 --- /dev/null +++ b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/TextContent.cs @@ -0,0 +1,91 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +using System; +using System.Collections.Generic; +using System.Text; +using Squidex.Domain.Apps.Core.Contents; +using Squidex.Infrastructure; +using Squidex.Infrastructure.Json.Objects; + +#pragma warning disable ORL1001 + +namespace Squidex.Domain.Apps.Entities.Contents.Text +{ + [Serializable] + public sealed class TextContent : Dictionary + { + public TextContent() + { + } + + public TextContent(NamedContentData data) + { + if (data == null) + { + return; + } + + var languages = new Dictionary(); + + void AppendText(string language, string text) + { + if (!string.IsNullOrWhiteSpace(text)) + { + var sb = languages.GetOrAddNew(language); + + if (sb.Length > 0) + { + sb.Append(" "); + } + + sb.Append(text); + } + } + + foreach (var field in data) + { + if (field.Value != null) + { + foreach (var fieldValue in field.Value) + { + var appendText = new Action(text => AppendText(fieldValue.Key, text)); + + AppendJsonText(fieldValue.Value, appendText); + } + } + } + + foreach (var kvp in languages) + { + this[kvp.Key] = kvp.Value.ToString(); + } + } + + private static void AppendJsonText(IJsonValue value, Action appendText) + { + if (value.Type == JsonValueType.String) + { + appendText(value.ToString()); + } + else if (value is JsonArray array) + { + foreach (var item in array) + { + AppendJsonText(item, appendText); + } + } + else if (value is JsonObject obj) + { + foreach (var item in obj.Values) + { + AppendJsonText(item, appendText); + } + } + } + } +} diff --git a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexContent.cs b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexContent.cs index 83f701b7c..a10603e3c 100644 --- a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexContent.cs +++ b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexContent.cs @@ -6,13 +6,8 @@ // ========================================================================== using System; -using System.Collections.Generic; -using System.Text; using Lucene.Net.Documents; using Lucene.Net.Index; -using Squidex.Domain.Apps.Core.Contents; -using Squidex.Infrastructure; -using Squidex.Infrastructure.Json.Objects; namespace Squidex.Domain.Apps.Entities.Contents.Text { @@ -20,13 +15,13 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { private const string MetaId = "_id"; private const string MetaKey = "_key"; - private readonly IndexWriter indexWriter; + private readonly IndexHolder index; private readonly IndexState indexState; private readonly Guid id; - public TextIndexContent(IndexWriter indexWriter, IndexState indexState, Guid id) + public TextIndexContent(IndexHolder index, IndexState indexState, Guid id) { - this.indexWriter = indexWriter; + this.index = index; this.indexState = indexState; this.id = id; @@ -34,60 +29,68 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text public void Delete() { - indexWriter.DeleteDocuments(new Term(MetaId, id.ToString())); + index.Writer.DeleteDocuments(new Term(MetaId, id.ToString())); } - public static bool TryGetId(int docId, Scope scope, IndexReader reader, IndexState indexState, out Guid result) + public static bool TryGetId(int docId, Scope scope, IndexHolder index, IndexState indexState, out Guid result) { result = Guid.Empty; - if (!indexState.TryGet(docId, out var draft, out var published)) - { - return false; - } + indexState.Get(docId, out var draft, out var published); - if (scope == Scope.Draft && draft != 1) + if (scope == Scope.Draft && !draft) { return false; } - if (scope == Scope.Published && published != 1) + if (scope == Scope.Published && !published) { return false; } - var document = reader.Document(docId); + var document = index.Searcher?.Doc(docId); - var idString = document.Get(MetaId); - - if (!Guid.TryParse(idString, out result)) + if (document != null) { - return false; + var idString = document.Get(MetaId); + + if (!Guid.TryParse(idString, out result)) + { + return false; + } } return true; } - public void Index(NamedContentData data, bool onlyDraft) + public void Index(TextContent text, bool onlyDraft) { - var converted = CreateDocument(data); + var converted = CreateDocument(text); - Upsert(converted, 1, 1, 0); + Upsert(converted, Scope.Draft, + forDraft: true, + forPublished: false); - var isPublishDocumentAdded = IsAdded(0, out var docId); - var isPublishForPublished = IsForPublished(0, docId); + var isPublishDocumentAdded = IsAdded(Scope.Published, out var docId); + var isPublishForPublished = IsForPublished(Scope.Published, docId); if (!onlyDraft && isPublishDocumentAdded && isPublishForPublished) { - Upsert(converted, 0, 0, 1); + Upsert(converted, Scope.Published, + forDraft: false, + forPublished: true); } else if (!onlyDraft || !isPublishDocumentAdded) { - Upsert(converted, 0, 0, 0); + Upsert(converted, Scope.Published, + forDraft: false, + forPublished: false); } else { - UpdateFor(0, 0, isPublishForPublished ? (byte)1 : (byte)0); + UpdateFor(Scope.Published, + forDraft: false, + forPublished: isPublishForPublished); } } @@ -95,119 +98,74 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { if (fromDraft) { - UpdateFor(1, 1, 0); - UpdateFor(0, 0, 1); + UpdateFor(Scope.Draft, + forDraft: true, + forPublished: false); + + UpdateFor(Scope.Published, + forDraft: false, + forPublished: true); } else { - UpdateFor(1, 0, 0); - UpdateFor(0, 1, 1); + UpdateFor(Scope.Draft, + forDraft: false, + forPublished: false); + + UpdateFor(Scope.Published, + forDraft: true, + forPublished: true); } } - private static Document CreateDocument(NamedContentData data) + private static Document CreateDocument(TextContent text) { - var languages = new Dictionary(); - - void AppendText(string language, string text) - { - if (!string.IsNullOrWhiteSpace(text)) - { - var sb = languages.GetOrAddNew(language); - - if (sb.Length > 0) - { - sb.Append(" "); - } - - sb.Append(text); - } - } - - foreach (var field in data) - { - if (field.Value != null) - { - foreach (var fieldValue in field.Value) - { - var appendText = new Action(text => AppendText(fieldValue.Key, text)); - - AppendJsonText(fieldValue.Value, appendText); - } - } - } - var document = new Document(); - foreach (var field in languages) + foreach (var field in text) { - document.AddTextField(field.Key, field.Value.ToString(), Field.Store.NO); + document.AddTextField(field.Key, field.Value, Field.Store.NO); } return document; } - private void UpdateFor(byte draft, byte forDraft, byte forPublished) + private void UpdateFor(Scope scope, bool forDraft, bool forPublished) { - var term = new Term(MetaKey, BuildKey(draft)); + var term = new Term(MetaKey, BuildKey(scope)); - indexState.Index(id, draft, term, forDraft, forPublished); + indexState.Index(id, scope, term, forDraft, forPublished); } - private void Upsert(Document document, byte draft, byte forDraft, byte forPublished) + private void Upsert(Document document, Scope draft, bool forDraft, bool forPublished) { - if (document != null) - { - document.RemoveField(MetaId); - document.RemoveField(MetaKey); + var contentKey = BuildKey(draft); - var contentId = id.ToString(); - var contentKey = BuildKey(draft); + document.SetField(MetaId, id.ToString()); + document.SetField(MetaKey, contentKey); - document.AddStringField(MetaId, contentId, Field.Store.YES); - document.AddStringField(MetaKey, contentKey, Field.Store.YES); + indexState.Index(id, draft, document, forDraft, forPublished); - indexState.Index(id, draft, document, forDraft, forPublished); - - indexWriter.UpdateDocument(new Term(MetaKey, contentKey), document); - } + index.Writer.UpdateDocument(new Term(MetaKey, contentKey), document); } - private static void AppendJsonText(IJsonValue value, Action appendText) + private bool IsAdded(Scope scope, out int docId) { - if (value.Type == JsonValueType.String) - { - appendText(value.ToString()); - } - else if (value is JsonArray array) - { - foreach (var item in array) - { - AppendJsonText(item, appendText); - } - } - else if (value is JsonObject obj) - { - foreach (var item in obj.Values) - { - AppendJsonText(item, appendText); - } - } - } + var term = new Term(MetaKey, BuildKey(scope)); - private bool IsAdded(byte draft, out int docId) - { - return indexState.HasBeenAdded(id, draft, new Term(MetaKey, BuildKey(draft)), out docId); + return indexState.HasBeenAdded(id, scope, term, out docId); } - private bool IsForPublished(byte draft, int docId) + private bool IsForPublished(Scope scope, int docId) { - return indexState.TryGet(id, draft, docId, out _, out var p) && p == 1; + indexState.Get(id, scope, docId, out _, out var forPublished); + + return forPublished; } - private string BuildKey(byte draft) + private string BuildKey(Scope scope) { - return $"{id}_{draft}"; + return $"{id}_{(scope == Scope.Draft ? 1 : 0)}"; } } } diff --git a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs index 6730a6203..6ca66341e 100644 --- a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs +++ b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs @@ -7,19 +7,15 @@ using System; using System.Collections.Generic; -using System.IO; using System.Linq; using System.Threading.Tasks; -using Lucene.Net.Analysis; -using Lucene.Net.Index; using Lucene.Net.QueryParsers.Classic; using Lucene.Net.Search; -using Lucene.Net.Store; using Lucene.Net.Util; using Squidex.Domain.Apps.Core; using Squidex.Infrastructure; -using Squidex.Infrastructure.Assets; using Squidex.Infrastructure.Orleans; +using Squidex.Infrastructure.Tasks; using Squidex.Infrastructure.Validation; namespace Squidex.Domain.Apps.Entities.Contents.Text @@ -30,95 +26,61 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text private const int MaxResults = 2000; private const int MaxUpdates = 400; private static readonly TimeSpan CommitDelay = TimeSpan.FromSeconds(10); - private static readonly MergeScheduler MergeScheduler = new ConcurrentMergeScheduler(); - private static readonly Analyzer Analyzer = new MultiLanguageAnalyzer(Version); private static readonly string[] Invariant = { InvariantPartitioning.Key }; - private readonly SnapshotDeletionPolicy snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); - private readonly IAssetStore assetStore; + private readonly IndexHolderFactory indexHolderFactory; private IDisposable? timer; - private DirectoryInfo directory; - private IndexWriter? indexWriter; - private IndexReader? indexReader; - private IndexSearcher? indexSearcher; - private IndexState? indexState; + private IndexHolder index; + private IndexState indexState; private QueryParser? queryParser; private HashSet? currentLanguages; private int updates; - public TextIndexerGrain(IAssetStore assetStore) + public TextIndexerGrain(IndexHolderFactory indexHolderFactory) { - Guard.NotNull(assetStore); + Guard.NotNull(indexHolderFactory); - this.assetStore = assetStore; + this.indexHolderFactory = indexHolderFactory; } - public override async Task OnDeactivateAsync() + public override Task OnDeactivateAsync() { - await DeactivateAsync(true); - } - - protected override async Task OnActivateAsync(Guid key) - { - directory = new DirectoryInfo(Path.Combine(Path.GetTempPath(), $"Index_{key}")); - - await assetStore.DownloadAsync(directory); - - var config = new IndexWriterConfig(Version, Analyzer) - { - IndexDeletionPolicy = snapshotter, - MergePolicy = new TieredMergePolicy(), - MergeScheduler = MergeScheduler - }; + index?.Dispose(); + indexHolderFactory.Release(Key); - indexWriter = new IndexWriter(FSDirectory.Open(directory), config); - - if (indexWriter.NumDocs > 0) - { - OpenReader(); - } - else - { - indexState = new IndexState(indexWriter); - } + return Task.CompletedTask; } - public Task IndexAsync(J update) + protected override Task OnActivateAsync(Guid key) { - return IndexInternalAsync(update); + index = indexHolderFactory.Acquire(key); + indexState = new IndexState(index); + + return TaskHelper.Done; } - private Task IndexInternalAsync(Update update) + public Task IndexAsync(Update update) { - if (indexWriter != null && indexState != null) - { - var content = new TextIndexContent(indexWriter, indexState, update.Id); + var content = new TextIndexContent(index, indexState, update.Id); - content.Index(update.Data, update.OnlyDraft); - } + content.Index(update.Text, update.OnlyDraft); return TryFlushAsync(); } public Task CopyAsync(Guid id, bool fromDraft) { - if (indexWriter != null && indexState != null) - { - var content = new TextIndexContent(indexWriter, indexState, id); + var content = new TextIndexContent(index, indexState, id); - content.Copy(fromDraft); - } + content.Copy(fromDraft); return TryFlushAsync(); } public Task DeleteAsync(Guid id) { - if (indexWriter != null && indexState != null) - { - var content = new TextIndexContent(indexWriter, indexState, id); + var content = new TextIndexContent(index, indexState, id); - content.Delete(); - } + content.Delete(); return TryFlushAsync(); } @@ -129,33 +91,33 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text if (!string.IsNullOrWhiteSpace(queryText)) { - var query = BuildQuery(queryText, context); - - if (indexReader == null && indexWriter?.NumDocs > 0) - { - OpenReader(); - } + index.EnsureReader(); - if (indexReader != null && indexSearcher != null && indexState != null) + if (index.Searcher != null) { - var found = new HashSet(); + var query = BuildQuery(queryText, context); - var hits = indexSearcher.Search(query, MaxResults).ScoreDocs; + var hits = index.Searcher.Search(query, MaxResults).ScoreDocs; - foreach (var hit in hits) + if (hits.Length > 0) { - if (TextIndexContent.TryGetId(hit.Doc, context.Scope, indexReader, indexState, out var id)) + var found = new HashSet(); + + foreach (var hit in hits) { - if (found.Add(id)) + if (TextIndexContent.TryGetId(hit.Doc, context.Scope, index, indexState, out var id)) { - result.Add(id); + if (found.Add(id)) + { + result.Add(id); + } } } } } } - return Task.FromResult(result.ToList()); + return Task.FromResult(result); } private Query BuildQuery(string query, SearchContext context) @@ -164,7 +126,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { var fields = context.Languages.Union(Invariant).ToArray(); - queryParser = new MultiFieldQueryParser(Version, fields, Analyzer); + queryParser = new MultiFieldQueryParser(Version, fields, index.Analyzer); currentLanguages = context.Languages; } @@ -193,7 +155,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text } else { - CleanReader(); + index.MarkStale(); try { @@ -208,64 +170,16 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text return false; } - public async Task FlushAsync() + public Task FlushAsync() { - if (updates > 0 && indexWriter != null) + if (updates > 0) { - indexWriter.Commit(); - indexWriter.Flush(true, true); - - CleanReader(); - - var commit = snapshotter.Snapshot(); - try - { - await assetStore.UploadDirectoryAsync(directory, commit); - } - finally - { - snapshotter.Release(commit); - } + index.Commit(); updates = 0; } - } - - public async Task DeactivateAsync(bool deleteFolder = false) - { - await FlushAsync(); - CleanWriter(); - CleanReader(); - - if (deleteFolder && directory.Exists) - { - directory.Delete(true); - } - } - - private void OpenReader() - { - if (indexWriter != null) - { - indexReader = indexWriter!.GetReader(true); - indexSearcher = new IndexSearcher(indexReader); - indexState = new IndexState(indexWriter, indexReader, indexSearcher); - } - } - - private void CleanReader() - { - indexReader?.Dispose(); - indexReader = null; - indexSearcher = null; - indexState?.CloseReader(); - } - - private void CleanWriter() - { - indexWriter?.Dispose(); - indexWriter = null; + return TaskHelper.Done; } } } diff --git a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Update.cs b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Update.cs index 263ff50ad..e9cb850aa 100644 --- a/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Update.cs +++ b/backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Update.cs @@ -6,15 +6,16 @@ // ========================================================================== using System; -using Squidex.Domain.Apps.Core.Contents; +using Orleans.Concurrency; namespace Squidex.Domain.Apps.Entities.Contents.Text { + [Immutable] public sealed class Update { public Guid Id { get; set; } - public NamedContentData Data { get; set; } + public TextContent Text { get; set; } public bool OnlyDraft { get; set; } } diff --git a/backend/src/Squidex/Config/Domain/ContentsServices.cs b/backend/src/Squidex/Config/Domain/ContentsServices.cs index 381f9e496..a0f3beb64 100644 --- a/backend/src/Squidex/Config/Domain/ContentsServices.cs +++ b/backend/src/Squidex/Config/Domain/ContentsServices.cs @@ -51,6 +51,9 @@ namespace Squidex.Config.Domain services.AddSingletonAs() .As().As(); + services.AddSingletonAs() + .AsSelf(); + services.AddSingletonAs>() .AsSelf(); } diff --git a/backend/src/Squidex/Config/Domain/StoreServices.cs b/backend/src/Squidex/Config/Domain/StoreServices.cs index 783788626..5bb0f26ef 100644 --- a/backend/src/Squidex/Config/Domain/StoreServices.cs +++ b/backend/src/Squidex/Config/Domain/StoreServices.cs @@ -13,6 +13,7 @@ using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Migrate_01.Migrations.MongoDb; using MongoDB.Driver; +using MongoDB.Driver.GridFS; using Squidex.Domain.Apps.Entities; using Squidex.Domain.Apps.Entities.Assets.Repositories; using Squidex.Domain.Apps.Entities.Assets.State; @@ -22,6 +23,7 @@ using Squidex.Domain.Apps.Entities.Contents.Text; using Squidex.Domain.Apps.Entities.History.Repositories; using Squidex.Domain.Apps.Entities.MongoDb.Assets; using Squidex.Domain.Apps.Entities.MongoDb.Contents; +using Squidex.Domain.Apps.Entities.MongoDb.FullText; using Squidex.Domain.Apps.Entities.MongoDb.History; using Squidex.Domain.Apps.Entities.MongoDb.Rules; using Squidex.Domain.Apps.Entities.Rules.Repositories; @@ -121,6 +123,18 @@ namespace Squidex.Config.Domain services.AddSingletonAs() .As(); } + + services.AddSingletonAs(c => + { + var database = c.GetRequiredService(); + + var mongoBucket = new GridFSBucket(database, new GridFSBucketOptions + { + BucketName = "fullText" + }); + + return new MongoDirectoryFactory(mongoBucket); + }).As(); } }); diff --git a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/GrainTextIndexerTests.cs b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/GrainTextIndexerTests.cs index 6b15aa70b..18d886d09 100644 --- a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/GrainTextIndexerTests.cs +++ b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/GrainTextIndexerTests.cs @@ -16,7 +16,6 @@ using Squidex.Domain.Apps.Entities.TestHelpers; using Squidex.Domain.Apps.Events.Contents; using Squidex.Infrastructure; using Squidex.Infrastructure.EventSourcing; -using Squidex.Infrastructure.Orleans; using Xunit; namespace Squidex.Domain.Apps.Entities.Contents.Text @@ -29,7 +28,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text private readonly Guid contentId = Guid.NewGuid(); private readonly NamedId appId = NamedId.Of(Guid.NewGuid(), "my-app"); private readonly NamedId schemaId = NamedId.Of(Guid.NewGuid(), "my-schema"); - private readonly NamedContentData data = new NamedContentData(); private readonly GrainTextIndexer sut; public GrainTextIndexerTests() @@ -54,27 +52,27 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text [Fact] public async Task Should_call_grain_when_content_created() { - await sut.On(E(new ContentCreated { Data = data })); + await sut.On(E(new ContentCreated())); - A.CallTo(() => grain.IndexAsync(A>.That.Matches(x => x.Value.Data == data && x.Value.Id == contentId && x.Value.OnlyDraft))) + A.CallTo(() => grain.IndexAsync(A.That.Matches(x => x.Text.Count == 0 && x.Id == contentId && x.OnlyDraft))) .MustHaveHappened(); } [Fact] public async Task Should_call_grain_when_content_updated() { - await sut.On(E(new ContentUpdated { Data = data })); + await sut.On(E(new ContentUpdated())); - A.CallTo(() => grain.IndexAsync(A>.That.Matches(x => x.Value.Data == data && x.Value.Id == contentId && !x.Value.OnlyDraft))) + A.CallTo(() => grain.IndexAsync(A.That.Matches(x => x.Text.Count == 0 && x.Id == contentId && !x.OnlyDraft))) .MustHaveHappened(); } [Fact] public async Task Should_call_grain_when_content_change_proposed() { - await sut.On(E(new ContentUpdateProposed { Data = data })); + await sut.On(E(new ContentUpdateProposed())); - A.CallTo(() => grain.IndexAsync(A>.That.Matches(x => x.Value.Data == data && x.Value.Id == contentId && x.Value.OnlyDraft))) + A.CallTo(() => grain.IndexAsync(A.That.Matches(x => x.Text.Count == 0 && x.Id == contentId && x.OnlyDraft))) .MustHaveHappened(); } diff --git a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerBenchmark.cs b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerBenchmark.cs new file mode 100644 index 000000000..bc6624ecd --- /dev/null +++ b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerBenchmark.cs @@ -0,0 +1,59 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +using System; +using System.Threading.Tasks; +using FakeItEasy; +using Squidex.Infrastructure; +using Squidex.Infrastructure.Log; +using Xunit; + +namespace Squidex.Domain.Apps.Entities.Contents.Text +{ + public class TextIndexerBenchmark + { + private readonly Guid schemaId = Guid.NewGuid(); + private readonly TextIndexerGrain sut; + + public TextIndexerBenchmark() + { + var factory = new IndexHolderFactory(new FSDirectoryFactory(), A.Fake()); + + sut = new TextIndexerGrain(factory); + sut.ActivateAsync(schemaId).Wait(); + } + + [Fact(Skip = "Only used for benchmarks")] + public async Task Should_index_many_documents() + { + var text = new TextContent + { + ["iv"] = "Hallo Welt" + }; + + var ids = new Guid[10000]; + + for (var i = 0; i < ids.Length; i++) + { + ids[i] = Guid.NewGuid(); + } + + var watch = ValueStopwatch.StartNew(); + + foreach (var id in ids) + { + await sut.IndexAsync(new Update { Text = text, Id = id }); + } + + sut.OnDeactivateAsync().Wait(); + + var elapsed = watch.Stop(); + + Assert.InRange(elapsed, 0, 1); + } + } +} diff --git a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests.cs b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTestsBase.cs similarity index 80% rename from backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests.cs rename to backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTestsBase.cs index f370f6190..127dbf74d 100644 --- a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests.cs +++ b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTestsBase.cs @@ -8,30 +8,35 @@ using System; using System.Collections.Generic; using System.Threading.Tasks; -using Squidex.Domain.Apps.Core.Contents; -using Squidex.Infrastructure.Assets; +using FakeItEasy; +using Squidex.Infrastructure.Log; using Squidex.Infrastructure.Validation; using Xunit; +#pragma warning disable RECS0021 // Warns about calls to virtual member functions occuring in the constructor + namespace Squidex.Domain.Apps.Entities.Contents.Text { - public class TextIndexerGrainTests : IDisposable + public abstract class TextIndexerGrainTestsBase : IDisposable { private readonly Guid schemaId = Guid.NewGuid(); private readonly List ids1 = new List { Guid.NewGuid() }; private readonly List ids2 = new List { Guid.NewGuid() }; private readonly SearchContext context; - private readonly IAssetStore assetStore = new MemoryAssetStore(); private readonly TextIndexerGrain sut; - public TextIndexerGrainTests() + public abstract IDirectoryFactory DirectoryFactory { get; } + + protected TextIndexerGrainTestsBase() { context = new SearchContext { Languages = new HashSet { "de", "en" } }; - sut = new TextIndexerGrain(assetStore); + var factory = new IndexHolderFactory(DirectoryFactory, A.Fake()); + + sut = new TextIndexerGrain(factory); sut.ActivateAsync(schemaId).Wait(); } @@ -51,9 +56,9 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text { await AddInvariantContent("Hello", "World", false); - await sut.DeactivateAsync(true); + await sut.OnDeactivateAsync(); - var other = new TextIndexerGrain(assetStore); + var other = new TextIndexerGrain(new IndexHolderFactory(DirectoryFactory, A.Fake())); try { await other.ActivateAsync(schemaId); @@ -199,38 +204,34 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text private async Task AddLocalizedContent() { - var germanData = - new NamedContentData() - .AddField("localized", - new ContentFieldData() - .AddValue("de", "Stadt und Umgebung and whatever")); - - var englishData = - new NamedContentData() - .AddField("localized", - new ContentFieldData() - .AddValue("en", "City and Surroundings und sonstiges")); - - await sut.IndexAsync(new Update { Id = ids1[0], Data = germanData, OnlyDraft = true }); - await sut.IndexAsync(new Update { Id = ids2[0], Data = englishData, OnlyDraft = true }); + var germanText = new TextContent + { + ["de"] = "Stadt und Umgebung and whatever" + }; + + var englishText = new TextContent + { + ["en"] = "City and Surroundings und sonstiges" + }; + + await sut.IndexAsync(new Update { Id = ids1[0], Text = germanText, OnlyDraft = true }); + await sut.IndexAsync(new Update { Id = ids2[0], Text = englishText, OnlyDraft = true }); } private async Task AddInvariantContent(string text1, string text2, bool onlyDraft = false) { - var data1 = - new NamedContentData() - .AddField("test", - new ContentFieldData() - .AddValue("iv", text1)); - - var data2 = - new NamedContentData() - .AddField("test", - new ContentFieldData() - .AddValue("iv", text2)); - - await sut.IndexAsync(new Update { Id = ids1[0], Data = data1, OnlyDraft = onlyDraft }); - await sut.IndexAsync(new Update { Id = ids2[0], Data = data2, OnlyDraft = onlyDraft }); + var content1 = new TextContent + { + ["iv"] = text1 + }; + + var content2 = new TextContent + { + ["iv"] = text2 + }; + + await sut.IndexAsync(new Update { Id = ids1[0], Text = content1, OnlyDraft = onlyDraft }); + await sut.IndexAsync(new Update { Id = ids2[0], Text = content2, OnlyDraft = onlyDraft }); } private async Task DeleteAsync(Guid id) diff --git a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests_FS.cs b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests_FS.cs new file mode 100644 index 000000000..f785a59ff --- /dev/null +++ b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests_FS.cs @@ -0,0 +1,21 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +namespace Squidex.Domain.Apps.Entities.Contents.Text +{ + public class TextIndexerGrainTests_FS : TextIndexerGrainTestsBase + { + public override IDirectoryFactory DirectoryFactory => CreateFactory(); + + private static IDirectoryFactory CreateFactory() + { + var directoryFactory = new FSDirectoryFactory(); + + return directoryFactory; + } + } +} diff --git a/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests_Mongo.cs b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests_Mongo.cs new file mode 100644 index 000000000..ac0572a7e --- /dev/null +++ b/backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests_Mongo.cs @@ -0,0 +1,33 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +using MongoDB.Driver; +using MongoDB.Driver.GridFS; +using Squidex.Domain.Apps.Entities.MongoDb.FullText; + +namespace Squidex.Domain.Apps.Entities.Contents.Text +{ + internal class TextIndexerGrainTests_Mongo : TextIndexerGrainTestsBase + { + public override IDirectoryFactory DirectoryFactory => CreateFactory(); + + private static IDirectoryFactory CreateFactory() + { + var mongoClient = new MongoClient("mongodb://localhost"); + var mongoDatabase = mongoClient.GetDatabase("FullText"); + + var mongoBucket = new GridFSBucket(mongoDatabase, new GridFSBucketOptions + { + BucketName = "fs" + }); + + var directoryFactory = new MongoDirectoryFactory(mongoBucket); + + return directoryFactory; + } + } +}