Skip to content

Commit 9ece656

Browse files
committed
Add jvm aware setting and max num docs settings for batching docs for percolate queries (opensearch-project#1435)
* add jvm aware and max docs settings for batching docs for percolate queries Signed-off-by: Surya Sashank Nistala <snistala@amazon.com> * fix stats logging Signed-off-by: Surya Sashank Nistala <snistala@amazon.com> * add queryfieldnames field in findings mapping Signed-off-by: Surya Sashank Nistala <snistala@amazon.com> --------- Signed-off-by: Surya Sashank Nistala <snistala@amazon.com>
1 parent c1e2316 commit 9ece656

7 files changed

Lines changed: 300 additions & 87 deletions

File tree

alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ import org.opensearch.core.xcontent.XContentParser
9999
import org.opensearch.env.Environment
100100
import org.opensearch.env.NodeEnvironment
101101
import org.opensearch.index.IndexModule
102+
import org.opensearch.monitor.jvm.JvmStats
102103
import org.opensearch.painless.spi.PainlessExtension
103104
import org.opensearch.painless.spi.Whitelist
104105
import org.opensearch.painless.spi.WhitelistLoader
@@ -268,6 +269,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R
268269
.registerTriggerService(TriggerService(scriptService))
269270
.registerAlertService(AlertService(client, xContentRegistry, alertIndices))
270271
.registerDocLevelMonitorQueries(DocLevelMonitorQueries(client, clusterService))
272+
.registerJvmStats(JvmStats.jvmStats())
271273
.registerWorkflowService(WorkflowService(client, xContentRegistry))
272274
.registerConsumers()
273275
.registerDestinationSettings()
@@ -325,6 +327,8 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R
325327
AlertingSettings.ALERT_HISTORY_MAX_DOCS,
326328
AlertingSettings.ALERT_HISTORY_RETENTION_PERIOD,
327329
AlertingSettings.ALERTING_MAX_MONITORS,
330+
AlertingSettings.PERCOLATE_QUERY_DOCS_SIZE_MEMORY_PERCENTAGE_LIMIT,
331+
AlertingSettings.PERCOLATE_QUERY_MAX_NUM_DOCS_IN_MEMORY,
328332
AlertingSettings.REQUEST_TIMEOUT,
329333
AlertingSettings.MAX_ACTION_THROTTLE_VALUE,
330334
AlertingSettings.FILTER_BY_BACKEND_ROLES,

alerting/src/main/kotlin/org/opensearch/alerting/DocumentLevelMonitorRunner.kt

Lines changed: 251 additions & 84 deletions
Large diffs are not rendered by default.

alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerExecutionContext.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import org.opensearch.cluster.service.ClusterService
1818
import org.opensearch.common.settings.Settings
1919
import org.opensearch.common.unit.TimeValue
2020
import org.opensearch.core.xcontent.NamedXContentRegistry
21+
import org.opensearch.monitor.jvm.JvmStats
2122
import org.opensearch.script.ScriptService
2223
import org.opensearch.threadpool.ThreadPool
2324

@@ -36,6 +37,7 @@ data class MonitorRunnerExecutionContext(
3637
var alertService: AlertService? = null,
3738
var docLevelMonitorQueries: DocLevelMonitorQueries? = null,
3839
var workflowService: WorkflowService? = null,
40+
var jvmStats: JvmStats? = null,
3941

4042
@Volatile var retryPolicy: BackoffPolicy? = null,
4143
@Volatile var moveAlertsRetryPolicy: BackoffPolicy? = null,

alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ import org.opensearch.commons.alerting.model.Workflow
5050
import org.opensearch.commons.alerting.model.action.Action
5151
import org.opensearch.commons.alerting.util.isBucketLevelMonitor
5252
import org.opensearch.core.xcontent.NamedXContentRegistry
53+
import org.opensearch.monitor.jvm.JvmStats
5354
import org.opensearch.script.Script
5455
import org.opensearch.script.ScriptService
5556
import org.opensearch.script.TemplateScript
@@ -134,6 +135,11 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon
134135
return this
135136
}
136137

138+
fun registerJvmStats(jvmStats: JvmStats): MonitorRunnerService {
139+
this.monitorCtx.jvmStats = jvmStats
140+
return this
141+
}
142+
137143
// Must be called after registerClusterService and registerSettings in AlertingPlugin
138144
fun registerConsumers(): MonitorRunnerService {
139145
monitorCtx.retryPolicy = BackoffPolicy.constantBackoff(
@@ -258,11 +264,19 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon
258264
when (job) {
259265
is Workflow -> {
260266
launch {
267+
logger.debug(
268+
"PERF_DEBUG: executing workflow ${job.id} on node " +
269+
monitorCtx.clusterService!!.state().nodes().localNode.id
270+
)
261271
runJob(job, periodStart, periodEnd, false)
262272
}
263273
}
264274
is Monitor -> {
265275
launch {
276+
logger.debug(
277+
"PERF_DEBUG: executing ${job.monitorType} ${job.id} on node " +
278+
monitorCtx.clusterService!!.state().nodes().localNode.id
279+
)
266280
runJob(job, periodStart, periodEnd, false)
267281
}
268282
}
@@ -307,7 +321,7 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon
307321
val runResult = if (monitor.isBucketLevelMonitor()) {
308322
BucketLevelMonitorRunner.runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun, executionId = executionId)
309323
} else if (monitor.isDocLevelMonitor()) {
310-
DocumentLevelMonitorRunner.runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun, executionId = executionId)
324+
DocumentLevelMonitorRunner().runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun, executionId = executionId)
311325
} else {
312326
QueryLevelMonitorRunner.runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun, executionId = executionId)
313327
}

alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,29 @@ class AlertingSettings {
2626
Setting.Property.Dynamic
2727
)
2828

29+
/** Defines the threshold percentage of heap size in bytes till which we accumulate docs in memory before we query against percolate query
30+
* index in document level monitor execution.
31+
*/
32+
val PERCOLATE_QUERY_DOCS_SIZE_MEMORY_PERCENTAGE_LIMIT = Setting.intSetting(
33+
"plugins.alerting.monitor.percolate_query_docs_size_memory_percentage_limit",
34+
10,
35+
0,
36+
100,
37+
Setting.Property.NodeScope, Setting.Property.Dynamic
38+
)
39+
40+
/** Defines the threshold of the maximum number of docs accumulated in memory to query against percolate query index in document
41+
* level monitor execution. The docs are being collected from searching on shards of indices mentioned in the
42+
* monitor input indices field. When the number of in-memory docs reaches or exceeds threshold we immediately perform percolate
43+
* query with the current set of docs and clear the cache and repeat the process till we have queried all indices in current
44+
* execution
45+
*/
46+
val PERCOLATE_QUERY_MAX_NUM_DOCS_IN_MEMORY = Setting.intSetting(
47+
"plugins.alerting.monitor.percolate_query_max_num_docs_in_memory",
48+
300000, 1000,
49+
Setting.Property.NodeScope, Setting.Property.Dynamic
50+
)
51+
2952
val INPUT_TIMEOUT = Setting.positiveTimeSetting(
3053
"plugins.alerting.input_timeout",
3154
LegacyOpenDistroAlertingSettings.INPUT_TIMEOUT,

alerting/src/main/kotlin/org/opensearch/alerting/workflow/CompositeWorkflowRunner.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ object CompositeWorkflowRunner : WorkflowRunner() {
244244
executionId
245245
)
246246
} else if (delegateMonitor.isDocLevelMonitor()) {
247-
return DocumentLevelMonitorRunner.runMonitor(
247+
return DocumentLevelMonitorRunner().runMonitor(
248248
delegateMonitor,
249249
monitorCtx,
250250
periodStart,

alerting/src/main/resources/org/opensearch/alerting/alerts/finding_mapping.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"dynamic": "strict",
33
"_meta" : {
4-
"schema_version": 3
4+
"schema_version": 4
55
},
66
"properties": {
77
"schema_version": {
@@ -46,6 +46,9 @@
4646
"type" : "keyword"
4747
}
4848
}
49+
},
50+
"query_field_names": {
51+
"type": "keyword"
4952
}
5053
}
5154
},

0 commit comments

Comments
 (0)