Browse Source

First working version of full text search.

pull/349/head
Sebastian 7 years ago
parent
commit
7f5ff73ef9
  1. 16
      src/Squidex.Domain.Apps.Entities.MongoDb/Contents/MongoContentRepository.cs
  2. 23
      src/Squidex.Domain.Apps.Entities/Contents/Text/GrainTextIndexer.cs
  3. 2
      src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexer.cs
  4. 2
      src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexerGrain.cs
  5. 3
      src/Squidex.Domain.Apps.Entities/Contents/Text/PersistenceHelper.cs
  6. 6
      src/Squidex.Domain.Apps.Entities/Contents/Text/SearchContext.cs
  7. 85
      src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs
  8. 1
      src/Squidex.Domain.Apps.Entities/Squidex.Domain.Apps.Entities.csproj
  9. 3
      src/Squidex/Config/Domain/StoreServices.cs
  10. 6
      src/Squidex/appsettings.json
  11. 53
      tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests.cs
  12. 2
      tests/Squidex.Domain.Users.Tests/AssetUserPictureStoreTests.cs

16
src/Squidex.Domain.Apps.Entities.MongoDb/Contents/MongoContentRepository.cs

@ -55,18 +55,16 @@ namespace Squidex.Domain.Apps.Entities.MongoDb.Contents
{ {
using (Profiler.TraceMethod<MongoContentRepository>("QueryAsyncByQuery")) using (Profiler.TraceMethod<MongoContentRepository>("QueryAsyncByQuery"))
{ {
if (RequiresPublished(status)) var useDraft = RequiresPublished(status);
{
var ids = await indexer.SearchAsync(query.FullText, app, schema);
return await contents.QueryAsync(app, schema, query, ids); var fullTextIds = await indexer.SearchAsync(query.FullText, app, schema, useDraft);
}
else
{
var ids = await indexer.SearchAsync(query.FullText, app, schema, true);
return await contents.QueryAsync(app, schema, query, ids, status, true); if (fullTextIds?.Count == 0)
{
return ResultList.Create<IContentEntity>(0);
} }
return await contents.QueryAsync(app, schema, query, fullTextIds, status, true);
} }
} }

23
src/Squidex.Domain.Apps.Entities/Contents/Text/GrainTextIndexer.cs

@ -14,6 +14,7 @@ using Squidex.Domain.Apps.Core.Contents;
using Squidex.Domain.Apps.Entities.Apps; using Squidex.Domain.Apps.Entities.Apps;
using Squidex.Domain.Apps.Entities.Schemas; using Squidex.Domain.Apps.Entities.Schemas;
using Squidex.Infrastructure; using Squidex.Infrastructure;
using Squidex.Infrastructure.Log;
namespace Squidex.Domain.Apps.Entities.Contents.Text namespace Squidex.Domain.Apps.Entities.Contents.Text
{ {
@ -41,12 +42,12 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
if (data != null) if (data != null)
{ {
await index.IndexAsync(id, new IndexData { }); await index.IndexAsync(id, new IndexData { Data = data });
} }
if (dataDraft != null) if (dataDraft != null)
{ {
await index.IndexAsync(id, new IndexData { IsDraft = true }); await index.IndexAsync(id, new IndexData { Data = dataDraft, IsDraft = true });
} }
} }
@ -59,17 +60,19 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
var index = grainFactory.GetGrain<ITextIndexerGrain>(schema.Id); var index = grainFactory.GetGrain<ITextIndexerGrain>(schema.Id);
var languages = app.LanguagesConfig.Select(x => x.Key).ToList(); using (Profiler.TraceMethod<GrainTextIndexer>("SearchAsync"))
var context = new SearchContext
{ {
AppVersion = app.Version, var context = CreateContext(app, useDraft);
AppLanguages = languages,
SchemaVersion = schema.Version,
IsDraft = useDraft
};
return await index.SearchAsync(queryText, context); return await index.SearchAsync(queryText, context);
} }
} }
private static SearchContext CreateContext(IAppEntity app, bool useDraft)
{
var languages = new HashSet<string>(app.LanguagesConfig.Select(x => x.Key));
return new SearchContext { Languages = languages, IsDraft = useDraft };
}
}
} }

2
src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexer.cs

@ -20,6 +20,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
Task IndexAsync(Guid schemaId, Guid id, NamedContentData data, NamedContentData dataDraft); Task IndexAsync(Guid schemaId, Guid id, NamedContentData data, NamedContentData dataDraft);
Task<List<Guid>> SearchAsync(string queryText, IAppEntity appEntity, ISchemaEntity schemaEntity, bool useDraft = false); Task<List<Guid>> SearchAsync(string queryText, IAppEntity app, ISchemaEntity schema, bool useDraft = false);
} }
} }

2
src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexerGrain.cs

@ -19,6 +19,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
Task IndexAsync(Guid id, J<IndexData> data); Task IndexAsync(Guid id, J<IndexData> data);
Task<List<Guid>> SearchAsync(string queryText, J<SearchContext> context); Task<List<Guid>> SearchAsync(string queryText, SearchContext context);
} }
} }

3
src/Squidex.Domain.Apps.Entities/Contents/Text/PersistenceHelper.cs

@ -34,7 +34,8 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{ {
try try
{ {
if (!file.Name.Equals(ArchiveFile, StringComparison.OrdinalIgnoreCase) && !file.Name.Equals(LockFile, StringComparison.OrdinalIgnoreCase)) if (!file.Name.Equals(ArchiveFile, StringComparison.OrdinalIgnoreCase) &&
!file.Name.Equals(LockFile, StringComparison.OrdinalIgnoreCase))
{ {
zipArchive.CreateEntryFromFile(file.FullName, file.Name); zipArchive.CreateEntryFromFile(file.FullName, file.Name);
} }

6
src/Squidex.Domain.Apps.Entities/Contents/Text/SearchContext.cs

@ -13,10 +13,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{ {
public bool IsDraft { get; set; } public bool IsDraft { get; set; }
public long AppVersion { get; set; } public HashSet<string> Languages { get; set; }
public long SchemaVersion { get; set; }
public List<string> AppLanguages { get; set; }
} }
} }

85
src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs

@ -14,6 +14,7 @@ using System.Threading.Tasks;
using Lucene.Net.Analysis; using Lucene.Net.Analysis;
using Lucene.Net.Documents; using Lucene.Net.Documents;
using Lucene.Net.Index; using Lucene.Net.Index;
using Lucene.Net.Queries;
using Lucene.Net.QueryParsers.Classic; using Lucene.Net.QueryParsers.Classic;
using Lucene.Net.Search; using Lucene.Net.Search;
using Lucene.Net.Store; using Lucene.Net.Store;
@ -30,15 +31,15 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
private const LuceneVersion Version = LuceneVersion.LUCENE_48; private const LuceneVersion Version = LuceneVersion.LUCENE_48;
private const int MaxResults = 2000; private const int MaxResults = 2000;
private const int MaxUpdates = 100; private const int MaxUpdates = 100;
private static readonly HashSet<string> IdFields = new HashSet<string>(); private static readonly TimeSpan CommitDelay = TimeSpan.FromSeconds(30);
private static readonly Analyzer Analyzer = new MultiLanguageAnalyzer(Version); private static readonly Analyzer Analyzer = new MultiLanguageAnalyzer(Version);
private readonly IAssetStore assetStore; private readonly IAssetStore assetStore;
private IDisposable timer;
private DirectoryInfo directory; private DirectoryInfo directory;
private IndexWriter indexWriter; private IndexWriter indexWriter;
private IndexReader indexReader; private IndexReader indexReader;
private QueryParser queryParser; private QueryParser queryParser;
private long currentAppVersion; private HashSet<string> currentLanguages;
private long currentSchemaVersion;
private long updates; private long updates;
public TextIndexerGrain(IAssetStore assetStore) public TextIndexerGrain(IAssetStore assetStore)
@ -48,13 +49,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
this.assetStore = assetStore; this.assetStore = assetStore;
} }
public override Task OnActivateAsync()
{
RegisterTimer(_ => FlushAsync(), null, TimeSpan.Zero, TimeSpan.FromMinutes(10));
return base.OnActivateAsync();
}
public override async Task OnDeactivateAsync() public override async Task OnDeactivateAsync()
{ {
await DeactivateAsync(true); await DeactivateAsync(true);
@ -79,16 +73,13 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
public Task IndexAsync(Guid id, J<IndexData> data) public Task IndexAsync(Guid id, J<IndexData> data)
{ {
string idString = id.ToString(), draft = data.Value.IsDraft.ToString(); var docId = id.ToString();
var docDraft = data.Value.IsDraft.ToString();
var docKey = $"{docId}_{docDraft}";
indexWriter.DeleteDocuments( var query = new BooleanQuery();
new Term("id", idString),
new Term("dd", draft));
var document = new Document(); indexWriter.DeleteDocuments(new Term("key", docKey));
document.AddStringField("id", idString, Field.Store.YES);
document.AddStringField("dd", draft, Field.Store.YES);
var languages = new Dictionary<string, StringBuilder>(); var languages = new Dictionary<string, StringBuilder>();
@ -117,12 +108,23 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
} }
} }
if (languages.Count > 0)
{
var document = new Document();
document.AddStringField("id", docId, Field.Store.YES);
document.AddStringField("key", docKey, Field.Store.YES);
document.AddStringField("draft", docDraft, Field.Store.YES);
foreach (var field in languages) foreach (var field in languages)
{ {
document.AddTextField(field.Key, field.Value.ToString(), Field.Store.NO); var fieldName = BuildFieldName(field.Key);
document.AddTextField(fieldName, field.Value.ToString(), Field.Store.NO);
} }
indexWriter.AddDocument(document); indexWriter.AddDocument(document);
}
return TryFlushAsync(); return TryFlushAsync();
} }
@ -131,9 +133,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{ {
if (value.Type == JsonValueType.String) if (value.Type == JsonValueType.String)
{ {
var text = value.ToString(); appendText(value.ToString());
appendText(text);
} }
else if (value is JsonArray array) else if (value is JsonArray array)
{ {
@ -151,7 +151,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
} }
} }
public Task<List<Guid>> SearchAsync(string queryText, J<SearchContext> context) public Task<List<Guid>> SearchAsync(string queryText, SearchContext context)
{ {
var result = new HashSet<Guid>(); var result = new HashSet<Guid>();
@ -161,9 +161,9 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
if (indexReader != null) if (indexReader != null)
{ {
var filter = new QueryWrapperFilter(new TermQuery(new Term("dd", context.Value.IsDraft.ToString()))); var filter = new TermsFilter(new Term("draft", context.IsDraft.ToString()));
var hits = new IndexSearcher(indexReader).Search(query, MaxResults).ScoreDocs; var hits = new IndexSearcher(indexReader).Search(query, filter, MaxResults).ScoreDocs;
foreach (var hit in hits) foreach (var hit in hits)
{ {
@ -184,18 +184,26 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
private Query BuildQuery(string query, SearchContext context) private Query BuildQuery(string query, SearchContext context)
{ {
if (queryParser == null || currentAppVersion != context.AppVersion || currentSchemaVersion != context.SchemaVersion) if (queryParser == null || !currentLanguages.SetEquals(context.Languages))
{ {
var fields = context.AppLanguages.Select(BuildFieldName).ToArray(); var fields =
context.Languages.Select(BuildFieldName)
.Union(Enumerable.Repeat(BuildFieldName("iv"), 1)).ToArray();
queryParser = new MultiFieldQueryParser(Version, fields, Analyzer); queryParser = new MultiFieldQueryParser(Version, fields, Analyzer);
currentAppVersion = context.AppVersion; currentLanguages = context.Languages;
currentSchemaVersion = context.SchemaVersion;
} }
try
{
return queryParser.Parse(query); return queryParser.Parse(query);
} }
catch (ParseException ex)
{
throw new ValidationException(ex.Message);
}
}
private async Task TryFlushAsync() private async Task TryFlushAsync()
{ {
@ -205,6 +213,19 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{ {
await FlushAsync(); await FlushAsync();
} }
else
{
timer?.Dispose();
try
{
timer = RegisterTimer(_ => FlushAsync(), null, CommitDelay, CommitDelay);
}
catch (InvalidOperationException)
{
return;
}
}
} }
public async Task FlushAsync() public async Task FlushAsync()
@ -221,6 +242,10 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
updates = 0; updates = 0;
} }
else
{
timer?.Dispose();
}
} }
public async Task DeactivateAsync(bool deleteFolder = false) public async Task DeactivateAsync(bool deleteFolder = false)
@ -241,7 +266,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
private static string BuildFieldName(string language) private static string BuildFieldName(string language)
{ {
return $"field_{language}"; return $"{language}_field";
} }
} }
} }

1
src/Squidex.Domain.Apps.Entities/Squidex.Domain.Apps.Entities.csproj

@ -18,6 +18,7 @@
<PackageReference Include="GraphQL" Version="2.4.0" /> <PackageReference Include="GraphQL" Version="2.4.0" />
<PackageReference Include="Lucene.Net" Version="4.8.0-beta00005" /> <PackageReference Include="Lucene.Net" Version="4.8.0-beta00005" />
<PackageReference Include="Lucene.Net.Analysis.Common" Version="4.8.0-beta00005" /> <PackageReference Include="Lucene.Net.Analysis.Common" Version="4.8.0-beta00005" />
<PackageReference Include="Lucene.Net.Queries" Version="4.8.0-beta00005" />
<PackageReference Include="Lucene.Net.QueryParser" Version="4.8.0-beta00005" /> <PackageReference Include="Lucene.Net.QueryParser" Version="4.8.0-beta00005" />
<PackageReference Include="Microsoft.Orleans.CodeGenerator.MSBuild" Version="2.2.3"> <PackageReference Include="Microsoft.Orleans.CodeGenerator.MSBuild" Version="2.2.3">
<PrivateAssets>all</PrivateAssets> <PrivateAssets>all</PrivateAssets>

3
src/Squidex/Config/Domain/StoreServices.cs

@ -18,6 +18,7 @@ using Squidex.Domain.Apps.Entities.Assets.Repositories;
using Squidex.Domain.Apps.Entities.Assets.State; using Squidex.Domain.Apps.Entities.Assets.State;
using Squidex.Domain.Apps.Entities.Contents.Repositories; using Squidex.Domain.Apps.Entities.Contents.Repositories;
using Squidex.Domain.Apps.Entities.Contents.State; using Squidex.Domain.Apps.Entities.Contents.State;
using Squidex.Domain.Apps.Entities.Contents.Text;
using Squidex.Domain.Apps.Entities.History.Repositories; using Squidex.Domain.Apps.Entities.History.Repositories;
using Squidex.Domain.Apps.Entities.MongoDb.Assets; using Squidex.Domain.Apps.Entities.MongoDb.Assets;
using Squidex.Domain.Apps.Entities.MongoDb.Contents; using Squidex.Domain.Apps.Entities.MongoDb.Contents;
@ -104,7 +105,7 @@ namespace Squidex.Config.Domain
c.GetRequiredService<IMongoClient>().GetDatabase(mongoContentDatabaseName), c.GetRequiredService<IMongoClient>().GetDatabase(mongoContentDatabaseName),
c.GetRequiredService<IAppProvider>(), c.GetRequiredService<IAppProvider>(),
c.GetRequiredService<IJsonSerializer>(), c.GetRequiredService<IJsonSerializer>(),
c.GetRequiredService<IOptions<MongoDbOptions>>())) c.GetRequiredService<ITextIndexer>()))
.AsOptional<IContentRepository>() .AsOptional<IContentRepository>()
.AsOptional<ISnapshotStore<ContentState, Guid>>() .AsOptional<ISnapshotStore<ContentState, Guid>>()
.AsOptional<IEventConsumer>(); .AsOptional<IEventConsumer>();

6
src/Squidex/appsettings.json

@ -283,7 +283,11 @@
/* /*
* The database for all your other read collections. * The database for all your other read collections.
*/ */
"database": "Squidex" "database": "Squidex",
/*
* Indicate wheter the connection string is for cosmos db.
*/
"isCosmosDB": "false"
} }
}, },

53
tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests.cs

@ -8,9 +8,8 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Threading.Tasks; using System.Threading.Tasks;
using Squidex.Domain.Apps.Core;
using Squidex.Domain.Apps.Core.Contents; using Squidex.Domain.Apps.Core.Contents;
using Squidex.Domain.Apps.Core.Schemas; using Squidex.Infrastructure;
using Squidex.Infrastructure.Assets; using Squidex.Infrastructure.Assets;
using Xunit; using Xunit;
@ -18,10 +17,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{ {
public class TextIndexerGrainTests : IDisposable public class TextIndexerGrainTests : IDisposable
{ {
private readonly Schema schema =
new Schema("test")
.AddString(1, "test", Partitioning.Invariant)
.AddString(2, "localized", Partitioning.Language);
private readonly Guid schemaId = Guid.NewGuid(); private readonly Guid schemaId = Guid.NewGuid();
private readonly List<Guid> ids1 = new List<Guid> { Guid.NewGuid() }; private readonly List<Guid> ids1 = new List<Guid> { Guid.NewGuid() };
private readonly List<Guid> ids2 = new List<Guid> { Guid.NewGuid() }; private readonly List<Guid> ids2 = new List<Guid> { Guid.NewGuid() };
@ -33,10 +28,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{ {
context = new SearchContext context = new SearchContext
{ {
AppVersion = 1, Languages = new HashSet<string> { "de", "en" }
Schema = schema,
SchemaVersion = 1,
AppLanguages = new List<string> { "de", "en" }
}; };
sut = new TextIndexerGrain(assetStore); sut = new TextIndexerGrain(assetStore);
@ -60,13 +52,11 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{ {
await other.ActivateAsync(schemaId); await other.ActivateAsync(schemaId);
var helloIds = await other.SearchAsync("Hello", context); var foundHello = await other.SearchAsync("Hello", context);
var foundWorld = await other.SearchAsync("World", context);
Assert.Equal(ids1, helloIds); Assert.Equal(ids1, foundHello);
Assert.Equal(ids2, foundWorld);
var worldIds = await other.SearchAsync("World", context);
Assert.Equal(ids2, worldIds);
} }
finally finally
{ {
@ -79,13 +69,23 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{ {
await AddInvariantContent(); await AddInvariantContent();
var helloIds = await sut.SearchAsync("Hello", context); var foundHello = await sut.SearchAsync("Hello", context);
var foundWorld = await sut.SearchAsync("World", context);
Assert.Equal(ids1, helloIds); Assert.Equal(ids1, foundHello);
Assert.Equal(ids2, foundWorld);
}
var worldIds = await sut.SearchAsync("World", context); [Fact]
public async Task Should_index_invariant_content_and_retrieve_with_fuzzy()
{
await AddInvariantContent();
Assert.Equal(ids2, worldIds); var foundHello = await sut.SearchAsync("helo~", context);
var foundWorld = await sut.SearchAsync("wold~", context);
Assert.Equal(ids1, foundHello);
Assert.Equal(ids2, foundWorld);
} }
[Fact] [Fact]
@ -98,10 +98,9 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
var helloIds = await sut.SearchAsync("Hello", context); var helloIds = await sut.SearchAsync("Hello", context);
Assert.Empty(helloIds);
var worldIds = await sut.SearchAsync("World", context); var worldIds = await sut.SearchAsync("World", context);
Assert.Empty(helloIds);
Assert.Equal(ids2, worldIds); Assert.Equal(ids2, worldIds);
} }
@ -117,7 +116,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
Assert.Equal(ids1, german1); Assert.Equal(ids1, german1);
Assert.Equal(ids1, german2); Assert.Equal(ids1, german2);
Assert.Equal(ids2, germanStopwordsIds); Assert.Equal(ids2, germanStopwordsIds);
var english1 = await sut.SearchAsync("City", context); var english1 = await sut.SearchAsync("City", context);
@ -127,10 +125,17 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
Assert.Equal(ids2, english1); Assert.Equal(ids2, english1);
Assert.Equal(ids2, english2); Assert.Equal(ids2, english2);
Assert.Equal(ids1, englishStopwordsIds); Assert.Equal(ids1, englishStopwordsIds);
} }
[Fact]
public async Task Should_throw_exception_for_invalid_query()
{
await AddInvariantContent();
await Assert.ThrowsAsync<ValidationException>(() => sut.SearchAsync("~hello", context));
}
private async Task AddLocalizedContent() private async Task AddLocalizedContent()
{ {
var germanData = var germanData =

2
tests/Squidex.Domain.Users.Tests/AssetUserPictureStoreTests.cs

@ -35,7 +35,7 @@ namespace Squidex.Domain.Users
await sut.UploadAsync(userId, stream); await sut.UploadAsync(userId, stream);
A.CallTo(() => assetStore.UploadAsync(userId, 0, "picture", stream, CancellationToken.None)).MustHaveHappened(); A.CallTo(() => assetStore.UploadAsync(userId, 0, "picture", stream, false, CancellationToken.None)).MustHaveHappened();
} }
[Fact] [Fact]

Loading…
Cancel
Save