Browse Source

First working version of full text search.

pull/349/head
Sebastian 7 years ago
parent
commit
7f5ff73ef9
  1. 16
      src/Squidex.Domain.Apps.Entities.MongoDb/Contents/MongoContentRepository.cs
  2. 25
      src/Squidex.Domain.Apps.Entities/Contents/Text/GrainTextIndexer.cs
  3. 2
      src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexer.cs
  4. 2
      src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexerGrain.cs
  5. 3
      src/Squidex.Domain.Apps.Entities/Contents/Text/PersistenceHelper.cs
  6. 6
      src/Squidex.Domain.Apps.Entities/Contents/Text/SearchContext.cs
  7. 93
      src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs
  8. 1
      src/Squidex.Domain.Apps.Entities/Squidex.Domain.Apps.Entities.csproj
  9. 3
      src/Squidex/Config/Domain/StoreServices.cs
  10. 58
      src/Squidex/appsettings.json
  11. 53
      tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests.cs
  12. 2
      tests/Squidex.Domain.Users.Tests/AssetUserPictureStoreTests.cs

16
src/Squidex.Domain.Apps.Entities.MongoDb/Contents/MongoContentRepository.cs

@ -55,18 +55,16 @@ namespace Squidex.Domain.Apps.Entities.MongoDb.Contents
{
using (Profiler.TraceMethod<MongoContentRepository>("QueryAsyncByQuery"))
{
if (RequiresPublished(status))
{
var ids = await indexer.SearchAsync(query.FullText, app, schema);
var useDraft = RequiresPublished(status);
return await contents.QueryAsync(app, schema, query, ids);
}
else
{
var ids = await indexer.SearchAsync(query.FullText, app, schema, true);
var fullTextIds = await indexer.SearchAsync(query.FullText, app, schema, useDraft);
return await contents.QueryAsync(app, schema, query, ids, status, true);
if (fullTextIds?.Count == 0)
{
return ResultList.Create<IContentEntity>(0);
}
return await contents.QueryAsync(app, schema, query, fullTextIds, status, true);
}
}

25
src/Squidex.Domain.Apps.Entities/Contents/Text/GrainTextIndexer.cs

@ -14,6 +14,7 @@ using Squidex.Domain.Apps.Core.Contents;
using Squidex.Domain.Apps.Entities.Apps;
using Squidex.Domain.Apps.Entities.Schemas;
using Squidex.Infrastructure;
using Squidex.Infrastructure.Log;
namespace Squidex.Domain.Apps.Entities.Contents.Text
{
@ -41,12 +42,12 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
if (data != null)
{
await index.IndexAsync(id, new IndexData { });
await index.IndexAsync(id, new IndexData { Data = data });
}
if (dataDraft != null)
{
await index.IndexAsync(id, new IndexData { IsDraft = true });
await index.IndexAsync(id, new IndexData { Data = dataDraft, IsDraft = true });
}
}
@ -59,17 +60,19 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
var index = grainFactory.GetGrain<ITextIndexerGrain>(schema.Id);
var languages = app.LanguagesConfig.Select(x => x.Key).ToList();
var context = new SearchContext
using (Profiler.TraceMethod<GrainTextIndexer>("SearchAsync"))
{
AppVersion = app.Version,
AppLanguages = languages,
SchemaVersion = schema.Version,
IsDraft = useDraft
};
var context = CreateContext(app, useDraft);
return await index.SearchAsync(queryText, context);
}
}
private static SearchContext CreateContext(IAppEntity app, bool useDraft)
{
var languages = new HashSet<string>(app.LanguagesConfig.Select(x => x.Key));
return await index.SearchAsync(queryText, context);
return new SearchContext { Languages = languages, IsDraft = useDraft };
}
}
}

2
src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexer.cs

@ -20,6 +20,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
Task IndexAsync(Guid schemaId, Guid id, NamedContentData data, NamedContentData dataDraft);
Task<List<Guid>> SearchAsync(string queryText, IAppEntity appEntity, ISchemaEntity schemaEntity, bool useDraft = false);
Task<List<Guid>> SearchAsync(string queryText, IAppEntity app, ISchemaEntity schema, bool useDraft = false);
}
}

2
src/Squidex.Domain.Apps.Entities/Contents/Text/ITextIndexerGrain.cs

@ -19,6 +19,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
Task IndexAsync(Guid id, J<IndexData> data);
Task<List<Guid>> SearchAsync(string queryText, J<SearchContext> context);
Task<List<Guid>> SearchAsync(string queryText, SearchContext context);
}
}

3
src/Squidex.Domain.Apps.Entities/Contents/Text/PersistenceHelper.cs

@ -34,7 +34,8 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{
try
{
if (!file.Name.Equals(ArchiveFile, StringComparison.OrdinalIgnoreCase) && !file.Name.Equals(LockFile, StringComparison.OrdinalIgnoreCase))
if (!file.Name.Equals(ArchiveFile, StringComparison.OrdinalIgnoreCase) &&
!file.Name.Equals(LockFile, StringComparison.OrdinalIgnoreCase))
{
zipArchive.CreateEntryFromFile(file.FullName, file.Name);
}

6
src/Squidex.Domain.Apps.Entities/Contents/Text/SearchContext.cs

@ -13,10 +13,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{
public bool IsDraft { get; set; }
public long AppVersion { get; set; }
public long SchemaVersion { get; set; }
public List<string> AppLanguages { get; set; }
public HashSet<string> Languages { get; set; }
}
}

93
src/Squidex.Domain.Apps.Entities/Contents/Text/TextIndexerGrain.cs

@ -14,6 +14,7 @@ using System.Threading.Tasks;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Queries;
using Lucene.Net.QueryParsers.Classic;
using Lucene.Net.Search;
using Lucene.Net.Store;
@ -30,15 +31,15 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
private const LuceneVersion Version = LuceneVersion.LUCENE_48;
private const int MaxResults = 2000;
private const int MaxUpdates = 100;
private static readonly HashSet<string> IdFields = new HashSet<string>();
private static readonly TimeSpan CommitDelay = TimeSpan.FromSeconds(30);
private static readonly Analyzer Analyzer = new MultiLanguageAnalyzer(Version);
private readonly IAssetStore assetStore;
private IDisposable timer;
private DirectoryInfo directory;
private IndexWriter indexWriter;
private IndexReader indexReader;
private QueryParser queryParser;
private long currentAppVersion;
private long currentSchemaVersion;
private HashSet<string> currentLanguages;
private long updates;
public TextIndexerGrain(IAssetStore assetStore)
@ -48,13 +49,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
this.assetStore = assetStore;
}
public override Task OnActivateAsync()
{
RegisterTimer(_ => FlushAsync(), null, TimeSpan.Zero, TimeSpan.FromMinutes(10));
return base.OnActivateAsync();
}
public override async Task OnDeactivateAsync()
{
await DeactivateAsync(true);
@ -79,16 +73,13 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
public Task IndexAsync(Guid id, J<IndexData> data)
{
string idString = id.ToString(), draft = data.Value.IsDraft.ToString();
var docId = id.ToString();
var docDraft = data.Value.IsDraft.ToString();
var docKey = $"{docId}_{docDraft}";
indexWriter.DeleteDocuments(
new Term("id", idString),
new Term("dd", draft));
var query = new BooleanQuery();
var document = new Document();
document.AddStringField("id", idString, Field.Store.YES);
document.AddStringField("dd", draft, Field.Store.YES);
indexWriter.DeleteDocuments(new Term("key", docKey));
var languages = new Dictionary<string, StringBuilder>();
@ -117,12 +108,23 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
}
}
foreach (var field in languages)
if (languages.Count > 0)
{
document.AddTextField(field.Key, field.Value.ToString(), Field.Store.NO);
}
var document = new Document();
indexWriter.AddDocument(document);
document.AddStringField("id", docId, Field.Store.YES);
document.AddStringField("key", docKey, Field.Store.YES);
document.AddStringField("draft", docDraft, Field.Store.YES);
foreach (var field in languages)
{
var fieldName = BuildFieldName(field.Key);
document.AddTextField(fieldName, field.Value.ToString(), Field.Store.NO);
}
indexWriter.AddDocument(document);
}
return TryFlushAsync();
}
@ -131,9 +133,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{
if (value.Type == JsonValueType.String)
{
var text = value.ToString();
appendText(text);
appendText(value.ToString());
}
else if (value is JsonArray array)
{
@ -151,7 +151,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
}
}
public Task<List<Guid>> SearchAsync(string queryText, J<SearchContext> context)
public Task<List<Guid>> SearchAsync(string queryText, SearchContext context)
{
var result = new HashSet<Guid>();
@ -161,9 +161,9 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
if (indexReader != null)
{
var filter = new QueryWrapperFilter(new TermQuery(new Term("dd", context.Value.IsDraft.ToString())));
var filter = new TermsFilter(new Term("draft", context.IsDraft.ToString()));
var hits = new IndexSearcher(indexReader).Search(query, MaxResults).ScoreDocs;
var hits = new IndexSearcher(indexReader).Search(query, filter, MaxResults).ScoreDocs;
foreach (var hit in hits)
{
@ -184,17 +184,25 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
private Query BuildQuery(string query, SearchContext context)
{
if (queryParser == null || currentAppVersion != context.AppVersion || currentSchemaVersion != context.SchemaVersion)
if (queryParser == null || !currentLanguages.SetEquals(context.Languages))
{
var fields = context.AppLanguages.Select(BuildFieldName).ToArray();
var fields =
context.Languages.Select(BuildFieldName)
.Union(Enumerable.Repeat(BuildFieldName("iv"), 1)).ToArray();
queryParser = new MultiFieldQueryParser(Version, fields, Analyzer);
currentAppVersion = context.AppVersion;
currentSchemaVersion = context.SchemaVersion;
currentLanguages = context.Languages;
}
return queryParser.Parse(query);
try
{
return queryParser.Parse(query);
}
catch (ParseException ex)
{
throw new ValidationException(ex.Message);
}
}
private async Task TryFlushAsync()
@ -205,6 +213,19 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{
await FlushAsync();
}
else
{
timer?.Dispose();
try
{
timer = RegisterTimer(_ => FlushAsync(), null, CommitDelay, CommitDelay);
}
catch (InvalidOperationException)
{
return;
}
}
}
public async Task FlushAsync()
@ -221,6 +242,10 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
updates = 0;
}
else
{
timer?.Dispose();
}
}
public async Task DeactivateAsync(bool deleteFolder = false)
@ -241,7 +266,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
private static string BuildFieldName(string language)
{
return $"field_{language}";
return $"{language}_field";
}
}
}

1
src/Squidex.Domain.Apps.Entities/Squidex.Domain.Apps.Entities.csproj

@ -18,6 +18,7 @@
<PackageReference Include="GraphQL" Version="2.4.0" />
<PackageReference Include="Lucene.Net" Version="4.8.0-beta00005" />
<PackageReference Include="Lucene.Net.Analysis.Common" Version="4.8.0-beta00005" />
<PackageReference Include="Lucene.Net.Queries" Version="4.8.0-beta00005" />
<PackageReference Include="Lucene.Net.QueryParser" Version="4.8.0-beta00005" />
<PackageReference Include="Microsoft.Orleans.CodeGenerator.MSBuild" Version="2.2.3">
<PrivateAssets>all</PrivateAssets>

3
src/Squidex/Config/Domain/StoreServices.cs

@ -18,6 +18,7 @@ using Squidex.Domain.Apps.Entities.Assets.Repositories;
using Squidex.Domain.Apps.Entities.Assets.State;
using Squidex.Domain.Apps.Entities.Contents.Repositories;
using Squidex.Domain.Apps.Entities.Contents.State;
using Squidex.Domain.Apps.Entities.Contents.Text;
using Squidex.Domain.Apps.Entities.History.Repositories;
using Squidex.Domain.Apps.Entities.MongoDb.Assets;
using Squidex.Domain.Apps.Entities.MongoDb.Contents;
@ -104,7 +105,7 @@ namespace Squidex.Config.Domain
c.GetRequiredService<IMongoClient>().GetDatabase(mongoContentDatabaseName),
c.GetRequiredService<IAppProvider>(),
c.GetRequiredService<IJsonSerializer>(),
c.GetRequiredService<IOptions<MongoDbOptions>>()))
c.GetRequiredService<ITextIndexer>()))
.AsOptional<IContentRepository>()
.AsOptional<ISnapshotStore<ContentState, Guid>>()
.AsOptional<IEventConsumer>();

58
src/Squidex/appsettings.json

@ -283,7 +283,11 @@
/*
* The database for all your other read collections.
*/
"database": "Squidex"
"database": "Squidex",
/*
* Indicate wheter the connection string is for cosmos db.
*/
"isCosmosDB": "false"
}
},
@ -291,48 +295,48 @@
/*
* Enable password auth. Set this to false if you want to disable local login, leaving only 3rd party login options.
*/
"allowPasswordAuth": true,
/*
"allowPasswordAuth": true,
/*
* Initial admin user.
*/
"adminEmail": "",
"adminPassword": "",
/*
"adminEmail": "",
"adminPassword": "",
/*
* Client with all admin permissions.
*/
"adminClientId": "",
"adminClientSecret": "",
/*
"adminClientId": "",
"adminClientSecret": "",
/*
* Settings for Google auth (keep empty to disable).
*/
"googleClient": "1006817248705-t3lb3ge808m9am4t7upqth79hulk456l.apps.googleusercontent.com",
"googleSecret": "QsEi-fHqkGw2_PjJmtNHf2wg",
/*
"googleClient": "1006817248705-t3lb3ge808m9am4t7upqth79hulk456l.apps.googleusercontent.com",
"googleSecret": "QsEi-fHqkGw2_PjJmtNHf2wg",
/*
* Settings for Github auth (keep empty to disable).
*/
"githubClient": "211ea00e726baf754c78",
"githubSecret": "d0a0d0fe2c26469ae20987ac265b3a339fd73132",
/*
"githubClient": "211ea00e726baf754c78",
"githubSecret": "d0a0d0fe2c26469ae20987ac265b3a339fd73132",
/*
* Settings for Microsoft auth (keep empty to disable).
*/
"microsoftClient": "b55da740-6648-4502-8746-b9003f29d5f1",
"microsoftSecret": "idWbANxNYEF4cB368WXJhjN",
/*
"microsoftClient": "b55da740-6648-4502-8746-b9003f29d5f1",
"microsoftSecret": "idWbANxNYEF4cB368WXJhjN",
/*
* Settings for your custom oidc server.
*/
"oidcName": "OIDC",
"oidcAuthority": "",
"oidcClient": "",
"oidcSecret": "",
/*
"oidcName": "OIDC",
"oidcAuthority": "",
"oidcClient": "",
"oidcSecret": "",
/*
* Lock new users automatically, the administrator must unlock them.
*/
"lockAutomatically": false,
/*
"lockAutomatically": false,
/*
* The url to you privacy statements, if you host squidex by yourself.
*/
"privacyUrl": "https://squidex.io/privacy"
},
"privacyUrl": "https://squidex.io/privacy"
},
"news": {
/*

53
tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerGrainTests.cs

@ -8,9 +8,8 @@
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using Squidex.Domain.Apps.Core;
using Squidex.Domain.Apps.Core.Contents;
using Squidex.Domain.Apps.Core.Schemas;
using Squidex.Infrastructure;
using Squidex.Infrastructure.Assets;
using Xunit;
@ -18,10 +17,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{
public class TextIndexerGrainTests : IDisposable
{
private readonly Schema schema =
new Schema("test")
.AddString(1, "test", Partitioning.Invariant)
.AddString(2, "localized", Partitioning.Language);
private readonly Guid schemaId = Guid.NewGuid();
private readonly List<Guid> ids1 = new List<Guid> { Guid.NewGuid() };
private readonly List<Guid> ids2 = new List<Guid> { Guid.NewGuid() };
@ -33,10 +28,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{
context = new SearchContext
{
AppVersion = 1,
Schema = schema,
SchemaVersion = 1,
AppLanguages = new List<string> { "de", "en" }
Languages = new HashSet<string> { "de", "en" }
};
sut = new TextIndexerGrain(assetStore);
@ -60,13 +52,11 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{
await other.ActivateAsync(schemaId);
var helloIds = await other.SearchAsync("Hello", context);
var foundHello = await other.SearchAsync("Hello", context);
var foundWorld = await other.SearchAsync("World", context);
Assert.Equal(ids1, helloIds);
var worldIds = await other.SearchAsync("World", context);
Assert.Equal(ids2, worldIds);
Assert.Equal(ids1, foundHello);
Assert.Equal(ids2, foundWorld);
}
finally
{
@ -79,13 +69,23 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
{
await AddInvariantContent();
var helloIds = await sut.SearchAsync("Hello", context);
var foundHello = await sut.SearchAsync("Hello", context);
var foundWorld = await sut.SearchAsync("World", context);
Assert.Equal(ids1, helloIds);
Assert.Equal(ids1, foundHello);
Assert.Equal(ids2, foundWorld);
}
var worldIds = await sut.SearchAsync("World", context);
[Fact]
public async Task Should_index_invariant_content_and_retrieve_with_fuzzy()
{
await AddInvariantContent();
Assert.Equal(ids2, worldIds);
var foundHello = await sut.SearchAsync("helo~", context);
var foundWorld = await sut.SearchAsync("wold~", context);
Assert.Equal(ids1, foundHello);
Assert.Equal(ids2, foundWorld);
}
[Fact]
@ -98,10 +98,9 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
var helloIds = await sut.SearchAsync("Hello", context);
Assert.Empty(helloIds);
var worldIds = await sut.SearchAsync("World", context);
Assert.Empty(helloIds);
Assert.Equal(ids2, worldIds);
}
@ -117,7 +116,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
Assert.Equal(ids1, german1);
Assert.Equal(ids1, german2);
Assert.Equal(ids2, germanStopwordsIds);
var english1 = await sut.SearchAsync("City", context);
@ -127,10 +125,17 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
Assert.Equal(ids2, english1);
Assert.Equal(ids2, english2);
Assert.Equal(ids1, englishStopwordsIds);
}
[Fact]
public async Task Should_throw_exception_for_invalid_query()
{
await AddInvariantContent();
await Assert.ThrowsAsync<ValidationException>(() => sut.SearchAsync("~hello", context));
}
private async Task AddLocalizedContent()
{
var germanData =

2
tests/Squidex.Domain.Users.Tests/AssetUserPictureStoreTests.cs

@ -35,7 +35,7 @@ namespace Squidex.Domain.Users
await sut.UploadAsync(userId, stream);
A.CallTo(() => assetStore.UploadAsync(userId, 0, "picture", stream, CancellationToken.None)).MustHaveHappened();
A.CallTo(() => assetStore.UploadAsync(userId, 0, "picture", stream, false, CancellationToken.None)).MustHaveHappened();
}
[Fact]

Loading…
Cancel
Save