Browse Source

Elastic search improvements.

pull/590/head
Sebastian 5 years ago
parent
commit
ce3d26911e
  1. 225
      backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Elastic/ElasticSearchMapping.cs
  2. 231
      backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Elastic/ElasticSearchTextIndex.cs
  3. 24
      backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Extensions.cs
  4. 17
      backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTestsBase.cs
  5. 7
      backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Elastic.cs
  6. 2
      backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Mongo.cs

225
backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Elastic/ElasticSearchMapping.cs

@ -0,0 +1,225 @@
// ==========================================================================
// Squidex Headless CMS
// ==========================================================================
// Copyright (c) Squidex UG (haftungsbeschraenkt)
// All rights reserved. Licensed under the MIT license.
// ==========================================================================
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using Elasticsearch.Net;
namespace Squidex.Domain.Apps.Entities.Contents.Text.Elastic
{
public static class ElasticSearchMapping
{
public static async Task ApplyAsync(IElasticLowLevelClient elastic, string indexName, CancellationToken ct = default)
{
var query = new
{
properties = new Dictionary<string, object>
{
["texts.ar"] = new
{
type = "text",
analyzer = "arabic"
},
["texts.hy"] = new
{
type = "text",
analyzer = "armenian"
},
["texts.eu"] = new
{
type = "text",
analyzer = "basque"
},
["texts.bn"] = new
{
type = "text",
analyzer = "bengali"
},
["texts.br"] = new
{
type = "text",
analyzer = "brazilian"
},
["texts.bg"] = new
{
type = "text",
analyzer = "bulgarian"
},
["texts.ca"] = new
{
type = "text",
analyzer = "catalan"
},
["texts.zh"] = new
{
type = "text",
analyzer = "cjk"
},
["texts.ja"] = new
{
type = "text",
analyzer = "cjk"
},
["texts.ko"] = new
{
type = "text",
analyzer = "cjk"
},
["texts.cs"] = new
{
type = "text",
analyzer = "czech"
},
["texts.da"] = new
{
type = "text",
analyzer = "danish"
},
["texts.nl"] = new
{
type = "text",
analyzer = "dutch"
},
["texts.en"] = new
{
type = "text",
analyzer = "english"
},
["texts.fi"] = new
{
type = "text",
analyzer = "finnish"
},
["texts.fr"] = new
{
type = "text",
analyzer = "french"
},
["texts.gl"] = new
{
type = "text",
analyzer = "galician"
},
["texts.de"] = new
{
type = "text",
analyzer = "german"
},
["texts.el"] = new
{
type = "text",
analyzer = "greek"
},
["texts.hi"] = new
{
type = "text",
analyzer = "hindi"
},
["texts.hu"] = new
{
type = "text",
analyzer = "hungarian"
},
["texts.id"] = new
{
type = "text",
analyzer = "indonesian"
},
["texts.ga"] = new
{
type = "text",
analyzer = "irish"
},
["texts.it"] = new
{
type = "text",
analyzer = "italian"
},
["texts.lv"] = new
{
type = "text",
analyzer = "latvian"
},
["texts.lt"] = new
{
type = "text",
analyzer = "lithuanian"
},
["texts.nb"] = new
{
type = "text",
analyzer = "norwegian"
},
["texts.nn"] = new
{
type = "text",
analyzer = "norwegian"
},
["texts.no"] = new
{
type = "text",
analyzer = "norwegian"
},
["texts.pt"] = new
{
type = "text",
analyzer = "portuguese"
},
["texts.ro"] = new
{
type = "text",
analyzer = "romanian"
},
["texts.ru"] = new
{
type = "text",
analyzer = "russian"
},
["texts.ku"] = new
{
type = "text",
analyzer = "sorani"
},
["texts.es"] = new
{
type = "text",
analyzer = "spanish"
},
["texts.sv"] = new
{
type = "text",
analyzer = "swedish"
},
["texts.tr"] = new
{
type = "text",
analyzer = "turkish"
},
["texts.th"] = new
{
type = "text",
analyzer = "thai"
}
}
};
var result = await elastic.Indices.PutMappingAsync<StringResponse>(indexName, CreatePost(query), ctx: ct);
if (!result.Success)
{
throw new InvalidOperationException($"Failed with ${result.Body}", result.OriginalException);
}
}
private static PostData CreatePost<T>(T data)
{
return new SerializableData<T>(data);
}
}
}

231
backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Elastic/ElasticSearchTextIndex.cs

@ -38,206 +38,9 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text.Elastic
this.waitForTesting = waitForTesting; this.waitForTesting = waitForTesting;
} }
public async Task InitializeAsync(CancellationToken ct = default) public Task InitializeAsync(CancellationToken ct = default)
{ {
var query = new return ElasticSearchMapping.ApplyAsync(client, indexName, ct);
{
properties = new Dictionary<string, object>
{
["texts.ar"] = new
{
type = "text",
analyzer = "arabic"
},
["texts.hy"] = new
{
type = "text",
analyzer = "armenian"
},
["texts.eu"] = new
{
type = "text",
analyzer = "basque"
},
["texts.bn"] = new
{
type = "text",
analyzer = "bengali"
},
["texts.br"] = new
{
type = "text",
analyzer = "brazilian"
},
["texts.bg"] = new
{
type = "text",
analyzer = "bulgarian"
},
["texts.ca"] = new
{
type = "text",
analyzer = "catalan"
},
["texts.zh"] = new
{
type = "text",
analyzer = "cjk"
},
["texts.ja"] = new
{
type = "text",
analyzer = "cjk"
},
["texts.ko"] = new
{
type = "text",
analyzer = "cjk"
},
["texts.cs"] = new
{
type = "text",
analyzer = "czech"
},
["texts.da"] = new
{
type = "text",
analyzer = "danish"
},
["texts.nl"] = new
{
type = "text",
analyzer = "dutch"
},
["texts.en"] = new
{
type = "text",
analyzer = "english"
},
["texts.fi"] = new
{
type = "text",
analyzer = "finnish"
},
["texts.fr"] = new
{
type = "text",
analyzer = "french"
},
["texts.gl"] = new
{
type = "text",
analyzer = "galician"
},
["texts.de"] = new
{
type = "text",
analyzer = "german"
},
["texts.el"] = new
{
type = "text",
analyzer = "greek"
},
["texts.hi"] = new
{
type = "text",
analyzer = "hindi"
},
["texts.hu"] = new
{
type = "text",
analyzer = "hungarian"
},
["texts.id"] = new
{
type = "text",
analyzer = "indonesian"
},
["texts.ga"] = new
{
type = "text",
analyzer = "irish"
},
["texts.it"] = new
{
type = "text",
analyzer = "italian"
},
["texts.lv"] = new
{
type = "text",
analyzer = "latvian"
},
["texts.lt"] = new
{
type = "text",
analyzer = "lithuanian"
},
["texts.nb"] = new
{
type = "text",
analyzer = "norwegian"
},
["texts.nn"] = new
{
type = "text",
analyzer = "norwegian"
},
["texts.no"] = new
{
type = "text",
analyzer = "norwegian"
},
["texts.pt"] = new
{
type = "text",
analyzer = "portuguese"
},
["texts.ro"] = new
{
type = "text",
analyzer = "romanian"
},
["texts.ru"] = new
{
type = "text",
analyzer = "russian"
},
["texts.ku"] = new
{
type = "text",
analyzer = "sorani"
},
["texts.es"] = new
{
type = "text",
analyzer = "spanish"
},
["texts.sv"] = new
{
type = "text",
analyzer = "swedish"
},
["texts.tr"] = new
{
type = "text",
analyzer = "turkish"
},
["texts.th"] = new
{
type = "text",
analyzer = "thai"
}
}
};
var result = await client.Indices.PutMappingAsync<StringResponse>(indexName, CreatePost(query));
if (!result.Success)
{
throw new InvalidOperationException($"Failed with ${result.Body}", result.OriginalException);
}
} }
public Task ClearAsync() public Task ClearAsync()
@ -315,11 +118,6 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text.Elastic
return client.DeleteAsync<StringResponse>(indexName, delete.DocId); return client.DeleteAsync<StringResponse>(indexName, delete.DocId);
} }
private static PostData CreatePost<T>(T data)
{
return new SerializableData<T>(data);
}
public async Task<List<DomainId>?> SearchAsync(string? queryText, IAppEntity app, SearchFilter? filter, SearchScope scope) public async Task<List<DomainId>?> SearchAsync(string? queryText, IAppEntity app, SearchFilter? filter, SearchScope scope)
{ {
if (string.IsNullOrWhiteSpace(queryText)) if (string.IsNullOrWhiteSpace(queryText))
@ -327,11 +125,25 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text.Elastic
return new List<DomainId>(); return new List<DomainId>();
} }
var isFuzzy = queryText.StartsWith("~", StringComparison.OrdinalIgnoreCase); var isFuzzy = queryText.EndsWith("~", StringComparison.OrdinalIgnoreCase);
if (isFuzzy) if (isFuzzy)
{ {
queryText = queryText.Substring(1); queryText = queryText[..^1];
}
var field = "texts.*";
if (queryText.Length >= 4 && queryText.IndexOf(":", StringComparison.OrdinalIgnoreCase) == 2)
{
var candidateLanguage = queryText.Substring(0, 2);
if (Language.IsValidLanguage(candidateLanguage))
{
field = $"texts.{candidateLanguage}";
queryText = queryText.Substring(3);
}
} }
var serveField = GetServeField(scope); var serveField = GetServeField(scope);
@ -365,7 +177,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text.Elastic
fuzziness = isFuzzy ? (object)"AUTO" : 0, fuzziness = isFuzzy ? (object)"AUTO" : 0,
fields = new[] fields = new[]
{ {
"texts.*" field
}, },
query = queryText query = queryText
} }
@ -427,5 +239,10 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text.Elastic
"servePublished" : "servePublished" :
"serveAll"; "serveAll";
} }
private static PostData CreatePost<T>(T data)
{
return new SerializableData<T>(data);
}
} }
} }

24
backend/src/Squidex.Domain.Apps.Entities/Contents/Text/Extensions.cs

@ -6,7 +6,6 @@
// ========================================================================== // ==========================================================================
using System.Collections.Generic; using System.Collections.Generic;
using System.Globalization;
using System.Text; using System.Text;
using Microsoft.Extensions.ObjectPool; using Microsoft.Extensions.ObjectPool;
using Squidex.Domain.Apps.Core.Contents; using Squidex.Domain.Apps.Core.Contents;
@ -94,29 +93,8 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
sb.Append(" "); sb.Append(" ");
} }
foreach (var c in text) sb.Append(text);
{
if (IsCJKLetter(c))
{
sb.Append(c);
sb.Append(" ");
}
else
{
sb.Append(c);
}
}
} }
} }
private static bool IsCJKLetter(char c)
{
return char.IsLetter(c) && char.GetUnicodeCategory(c) == UnicodeCategory.OtherLetter && !IsKatakana(c);
}
private static bool IsKatakana(char c)
{
return c >= '\u30A0' && c <= '\u30FF';
}
} }
} }

17
backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTestsBase.cs

@ -41,7 +41,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
public virtual bool SupportsCleanup { get; set; } = false; public virtual bool SupportsCleanup { get; set; } = false;
public virtual bool SupportsSearchSyntax { get; set; } = true; public virtual bool SupportssQuerySyntax { get; set; } = true;
public virtual InMemoryTextIndexerState State { get; } = new InMemoryTextIndexerState(); public virtual InMemoryTextIndexerState State { get; } = new InMemoryTextIndexerState();
@ -53,21 +53,10 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
Language.EN); Language.EN);
} }
[SkippableFact]
public async Task Should_throw_exception_for_invalid_query()
{
Skip.IfNot(SupportsSearchSyntax);
await Assert.ThrowsAsync<ValidationException>(async () =>
{
await TestCombinations(Search(expected: null, text: "~hello"));
});
}
[SkippableFact] [SkippableFact]
public async Task Should_index_invariant_content_and_retrieve_with_fuzzy() public async Task Should_index_invariant_content_and_retrieve_with_fuzzy()
{ {
Skip.IfNot(SupportsSearchSyntax); Skip.IfNot(SupportssQuerySyntax);
await TestCombinations( await TestCombinations(
Create(ids1[0], "iv", "Hello"), Create(ids1[0], "iv", "Hello"),
@ -81,7 +70,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
[SkippableFact] [SkippableFact]
public async Task Should_search_by_field() public async Task Should_search_by_field()
{ {
Skip.IfNot(SupportsSearchSyntax); Skip.IfNot(SupportssQuerySyntax);
await TestCombinations( await TestCombinations(
Create(ids1[0], "en", "City"), Create(ids1[0], "en", "City"),

7
backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Elastic.cs

@ -38,7 +38,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
public TextIndexerTests_Elastic() public TextIndexerTests_Elastic()
{ {
SupportsSearchSyntax = false; SupportssQuerySyntax = true;
} }
[Fact] [Fact]
@ -57,10 +57,9 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
public async Task Should_index_cjk_content_and_retrieve() public async Task Should_index_cjk_content_and_retrieve()
{ {
await TestCombinations( await TestCombinations(
Create(ids1[0], "zh", "可以将正向最大匹配方法和"), Create(ids1[0], "zh", "東京大学"),
Search(expected: ids1, text: "大"), Search(expected: ids1, text: "東京")
Search(expected: ids1, text: "匹")
); );
} }
} }

2
backend/tests/Squidex.Domain.Apps.Entities.Tests/Contents/Text/TextIndexerTests_Mongo.cs

@ -44,7 +44,7 @@ namespace Squidex.Domain.Apps.Entities.Contents.Text
public TextIndexerTests_Mongo() public TextIndexerTests_Mongo()
{ {
SupportsSearchSyntax = false; SupportssQuerySyntax = false;
} }
[Fact] [Fact]

Loading…
Cancel
Save