Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Delegate getMin/getMax methods for ExitableTerms ([#20775](https://github.com/opensearch-project/OpenSearch/pull/20775))
- Fix terms lookup subquery fetch limit reading from non-existent index setting instead of cluster `max_clause_count` ([#20823](https://github.com/opensearch-project/OpenSearch/pull/20823))
- Fix array_index_out_of_bounds_exception with wildcard and aggregations ([#20842](https://github.com/opensearch-project/OpenSearch/pull/20842))
- Handle dependencies between analyzers ([#19248](https://github.com/opensearch-project/OpenSearch/pull/19248))

### Dependencies
- Bump shadow-gradle-plugin from 8.3.9 to 9.3.1 ([#20569](https://github.com/opensearch-project/OpenSearch/pull/20569))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,18 @@ public TokenFilterFactory getChainAwareTokenFilterFactory(
List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters
) {
final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters, allFilters);
return getChainAwareTokenFilterFactory(tokenizer, charFilters, previousTokenFilters, allFilters, name -> null);
}

@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(
TokenizerFactory tokenizer,
List<CharFilterFactory> charFilters,
List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters,
Function<String, Analyzer> analyzersBuiltSoFar
) {
final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters, allFilters, analyzersBuiltSoFar);
final SynonymMap synonyms = buildSynonyms(analyzer, getRulesFromSettings(environment));
final String name = name();
return new TokenFilterFactory() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,18 @@ public TokenFilterFactory getChainAwareTokenFilterFactory(
List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters
) {
final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters, allFilters);
return getChainAwareTokenFilterFactory(tokenizer, charFilters, previousTokenFilters, allFilters, name -> null);
}

@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(
TokenizerFactory tokenizer,
List<CharFilterFactory> charFilters,
List<TokenFilterFactory> previousTokenFilters,
Function<String, TokenFilterFactory> allFilters,
Function<String, Analyzer> analyzersBuiltSoFar
) {
final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters, allFilters, analyzersBuiltSoFar);
final SynonymMap synonyms = buildSynonyms(analyzer, getRulesFromSettings(environment));
final String name = name();
return new TokenFilterFactory() {
Expand Down Expand Up @@ -147,10 +158,15 @@ Analyzer buildSynonymAnalyzer(
TokenizerFactory tokenizer,
List<CharFilterFactory> charFilters,
List<TokenFilterFactory> tokenFilters,
Function<String, TokenFilterFactory> allFilters
Function<String, TokenFilterFactory> allFilters,
Function<String, Analyzer> analyzersBuiltSoFar
) {
if (synonymAnalyzerName != null) {
Analyzer customSynonymAnalyzer;
// first, check settings analyzers
Analyzer customSynonymAnalyzer = analyzersBuiltSoFar.apply(synonymAnalyzerName);
if (customSynonymAnalyzer != null) {
return customSynonymAnalyzer;
}
try {
customSynonymAnalyzer = analysisRegistry.getAnalyzer(synonymAnalyzerName);
} catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,9 @@

import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.Collections;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.hasToString;

public class EdgeNGramTokenizerTests extends OpenSearchTokenStreamTestCase {

private IndexAnalyzers buildAnalyzers(Version version, String tokenizer) throws IOException {
Expand Down Expand Up @@ -99,7 +97,14 @@ public void testPreConfiguredTokenizer() throws IOException {
IllegalArgumentException.class,
() -> buildAnalyzers(VersionUtils.randomVersionBetween(random(), Version.V_3_0_0, Version.CURRENT), "edgeNGram")
);
assertThat(e, hasToString(containsString("The [edgeNGram] tokenizer name was deprecated pre 1.0.")));

assertTrue(
"expected deprecation message in suppressed causes",
Arrays.stream(e.getSuppressed())
.map(org.opensearch.ExceptionsHelper::unwrapCause)
.map(Throwable::getMessage)
.anyMatch(msg -> msg.contains("The [edgeNGram] tokenizer name was deprecated pre 1.0."))
);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ public void testSynonymWordDeleteByAnalyzer() throws IOException {
fail("fail! due to synonym word deleted by analyzer");
} catch (Exception e) {
assertThat(e, instanceOf(IllegalArgumentException.class));
assertThat(e.getMessage(), startsWith("Failed to build synonyms"));
assertThat(e.getMessage(), startsWith("Failed to build analyzers: [synonymAnalyzerWithStopSynonymBeforeSynonym]"));
}
}

Expand All @@ -141,7 +141,7 @@ public void testExpandSynonymWordDeleteByAnalyzer() throws IOException {
fail("fail! due to synonym word deleted by analyzer");
} catch (Exception e) {
assertThat(e, instanceOf(IllegalArgumentException.class));
assertThat(e.getMessage(), startsWith("Failed to build synonyms"));
assertThat(e.getMessage(), startsWith("Failed to build analyzers: [synonymAnalyzerExpandWithStopBeforeSynonym]"));
}
}

Expand Down Expand Up @@ -269,7 +269,7 @@ public void testTokenFiltersBypassSynonymAnalysis() throws IOException {
TokenFilterFactory tff = plugin.getTokenFilters(analysisModule).get(factory).get(idxSettings, environment, factory, settings);
TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, environment, "keyword", settings);
SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, environment, "synonym", settings, analysisRegistry);
Analyzer analyzer = stff.buildSynonymAnalyzer(tok, Collections.emptyList(), Collections.singletonList(tff), null);
Analyzer analyzer = stff.buildSynonymAnalyzer(tok, Collections.emptyList(), Collections.singletonList(tff), null, null);

try (TokenStream ts = analyzer.tokenStream("field", "text")) {
assertThat(ts, instanceOf(KeywordTokenizer.class));
Expand Down Expand Up @@ -350,7 +350,7 @@ public void testDisallowedTokenFilters() throws IOException {
IllegalArgumentException e = expectThrows(
IllegalArgumentException.class,
"Expected IllegalArgumentException for factory " + factory,
() -> stff.buildSynonymAnalyzer(tok, Collections.emptyList(), Collections.singletonList(tff), null)
() -> stff.buildSynonymAnalyzer(tok, Collections.emptyList(), Collections.singletonList(tff), null, null)
);

assertEquals(factory, "Token filter [" + factory + "] cannot be used to parse synonyms", e.getMessage());
Expand Down Expand Up @@ -443,4 +443,91 @@ public void testSynonymAnalyzerWithWordDelimiter() throws IOException {
assertTokenStreamContents(ts, new String[] { "notebook" }, new int[] { 0 }, new int[] { 6 });
}
}

/**
* Test the core dependency resolution issue from GitHub #18037:
* synonym_graph with custom synonym_analyzer should work even when
* the main analyzer contains word_delimiter_graph that would normally
* cause "cannot be used to parse synonyms" error.
*
* This test intentionally declares the dependent analyzer before the
* synonym analyzer to ensure the system resolves dependencies
* automatically without requiring a manual "order" setting.
*/
public void testSynonymAnalyzerDependencyResolution() throws IOException {
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)

// Declare dependent analyzer FIRST intentionally
.put("index.analysis.analyzer.main_analyzer.type", "custom")
.put("index.analysis.analyzer.main_analyzer.tokenizer", "standard")
.putList("index.analysis.analyzer.main_analyzer.filter", "lowercase", "test_word_delimiter", "test_synonyms")

// Problematic filter for synonym parsing
.put("index.analysis.filter.test_word_delimiter.type", "word_delimiter_graph")
.put("index.analysis.filter.test_word_delimiter.generate_word_parts", true)

// Synonym filter referencing another analyzer
.put("index.analysis.filter.test_synonyms.type", "synonym_graph")
.putList("index.analysis.filter.test_synonyms.synonyms", "laptop,notebook")
.put("index.analysis.filter.test_synonyms.synonym_analyzer", "simple_synonym_analyzer")

// Define the synonym analyzer AFTER the main analyzer
.put("index.analysis.analyzer.simple_synonym_analyzer.type", "custom")
.put("index.analysis.analyzer.simple_synonym_analyzer.tokenizer", "standard")
.build();

IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test_index", settings);

// Should succeed with the fix (would fail before due to registration order)
IndexAnalyzers analyzers = new AnalysisModule(
TestEnvironment.newEnvironment(settings),
Collections.singletonList(new CommonAnalysisModulePlugin())
).getAnalysisRegistry().build(idxSettings);

assertNotNull("main_analyzer should be created", analyzers.get("main_analyzer"));
assertNotNull("simple_synonym_analyzer should be created", analyzers.get("simple_synonym_analyzer"));
}

/**
* Verifies that circular synonym_analyzer dependencies are detected
* and rejected during analyzer construction.
*/
public void testCircularSynonymAnalyzerDependency() throws IOException {
Settings settings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)

// Analyzer A depends on B
.put("index.analysis.analyzer.analyzer_a.type", "custom")
.put("index.analysis.analyzer.analyzer_a.tokenizer", "standard")
.putList("index.analysis.analyzer.analyzer_a.filter", "syn_filter_a")

.put("index.analysis.filter.syn_filter_a.type", "synonym_graph")
.putList("index.analysis.filter.syn_filter_a.synonyms", "foo,bar")
.put("index.analysis.filter.syn_filter_a.synonym_analyzer", "analyzer_b")

// Analyzer B depends on A
.put("index.analysis.analyzer.analyzer_b.type", "custom")
.put("index.analysis.analyzer.analyzer_b.tokenizer", "standard")
.putList("index.analysis.analyzer.analyzer_b.filter", "syn_filter_b")

.put("index.analysis.filter.syn_filter_b.type", "synonym_graph")
.putList("index.analysis.filter.syn_filter_b.synonyms", "baz,qux")
.put("index.analysis.filter.syn_filter_b.synonym_analyzer", "analyzer_a")

.build();

IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test_index", settings);

IllegalArgumentException e = expectThrows(
IllegalArgumentException.class,
() -> new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(new CommonAnalysisModulePlugin()))
.getAnalysisRegistry()
.build(idxSettings)
);

assertThat(e.getMessage(), startsWith("Circular analyzer dependency"));
}
}
Loading
Loading