Skip to content

Commit c369090

Browse files
authored
[Remote Store] Add Remote store stats api (opensearch-project#7441)
Signed-off-by: Varun Bansal <bansvaru@amazon.com>
1 parent be0047e commit c369090

File tree

20 files changed

+1446
-0
lines changed

20 files changed

+1446
-0
lines changed

client/rest-high-level/src/test/java/org/opensearch/client/RestHighLevelClientTests.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -875,6 +875,7 @@ public void testApiNamingConventions() throws Exception {
875875
"nodes.reload_secure_settings",
876876
"search_shards",
877877
"remote_store.restore",
878+
"remote_store.stats",
878879
"cluster.put_weighted_routing",
879880
"cluster.get_weighted_routing",
880881
"cluster.delete_weighted_routing",
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
{
2+
"remote_store.stats":{
3+
"documentation":{
4+
"url": "https://opensearch.org/docs/latest/tuning-your-cluster/availability-and-recovery/remote",
5+
"description":"Stats for remote store."
6+
},
7+
"stability":"experimental",
8+
"url":{
9+
"paths":[
10+
{
11+
"path":"/_remotestore/stats/{index}",
12+
"methods":[
13+
"GET"
14+
],
15+
"parts":{
16+
"index":{
17+
"type":"string",
18+
"description": "Index name to fetch stats"
19+
}
20+
}
21+
},
22+
{
23+
"path":"/_remotestore/stats/{index}/{shard_id}",
24+
"methods":[
25+
"GET"
26+
],
27+
"parts":{
28+
"index":{
29+
"type":"string",
30+
"description":"Index name to fetch stats"
31+
},
32+
"shard_id":{
33+
"type":"string",
34+
"description":"Specific shard to fetch stats"
35+
}
36+
}
37+
}
38+
]
39+
},
40+
"params":{
41+
"timeout":{
42+
"type":"time",
43+
"default":"10s",
44+
"description":"Max time each individual stats request should take."
45+
}
46+
}
47+
}
48+
}

server/src/main/java/org/opensearch/action/ActionModule.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@
5656
import org.opensearch.action.admin.cluster.node.reload.NodesReloadSecureSettingsAction;
5757
import org.opensearch.action.admin.cluster.node.reload.TransportNodesReloadSecureSettingsAction;
5858
import org.opensearch.action.admin.cluster.node.stats.NodesStatsAction;
59+
import org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsAction;
5960
import org.opensearch.action.admin.cluster.node.stats.TransportNodesStatsAction;
61+
import org.opensearch.action.admin.cluster.remotestore.stats.TransportRemoteStoreStatsAction;
6062
import org.opensearch.action.admin.cluster.node.tasks.cancel.CancelTasksAction;
6163
import org.opensearch.action.admin.cluster.node.tasks.cancel.TransportCancelTasksAction;
6264
import org.opensearch.action.admin.cluster.node.tasks.get.GetTaskAction;
@@ -348,6 +350,7 @@
348350
import org.opensearch.rest.action.admin.cluster.RestPutStoredScriptAction;
349351
import org.opensearch.rest.action.admin.cluster.RestReloadSecureSettingsAction;
350352
import org.opensearch.rest.action.admin.cluster.RestRemoteClusterInfoAction;
353+
import org.opensearch.rest.action.admin.cluster.RestRemoteStoreStatsAction;
351354
import org.opensearch.rest.action.admin.cluster.RestRestoreRemoteStoreAction;
352355
import org.opensearch.rest.action.admin.cluster.RestRestoreSnapshotAction;
353356
import org.opensearch.rest.action.admin.cluster.RestSnapshotsStatusAction;
@@ -582,6 +585,7 @@ public <Request extends ActionRequest, Response extends ActionResponse> void reg
582585
actions.register(NodesInfoAction.INSTANCE, TransportNodesInfoAction.class);
583586
actions.register(RemoteInfoAction.INSTANCE, TransportRemoteInfoAction.class);
584587
actions.register(NodesStatsAction.INSTANCE, TransportNodesStatsAction.class);
588+
actions.register(RemoteStoreStatsAction.INSTANCE, TransportRemoteStoreStatsAction.class);
585589
actions.register(NodesUsageAction.INSTANCE, TransportNodesUsageAction.class);
586590
actions.register(NodesHotThreadsAction.INSTANCE, TransportNodesHotThreadsAction.class);
587591
actions.register(ListTasksAction.INSTANCE, TransportListTasksAction.class);
@@ -958,6 +962,7 @@ public void initRestHandlers(Supplier<DiscoveryNodes> nodesInCluster) {
958962

959963
// Remote Store APIs
960964
if (FeatureFlags.isEnabled(FeatureFlags.REMOTE_STORE)) {
965+
registerHandler.accept(new RestRemoteStoreStatsAction());
961966
registerHandler.accept(new RestRestoreRemoteStoreAction());
962967
}
963968
}
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.action.admin.cluster.remotestore.stats;
10+
11+
import org.opensearch.common.io.stream.StreamInput;
12+
import org.opensearch.common.io.stream.StreamOutput;
13+
import org.opensearch.common.io.stream.Writeable;
14+
import org.opensearch.core.xcontent.ToXContentFragment;
15+
import org.opensearch.core.xcontent.XContentBuilder;
16+
import org.opensearch.index.remote.RemoteRefreshSegmentTracker;
17+
18+
import java.io.IOException;
19+
20+
/**
21+
* Encapsulates all remote store stats
22+
*
23+
* @opensearch.internal
24+
*/
25+
public class RemoteStoreStats implements Writeable, ToXContentFragment {
26+
27+
private final RemoteRefreshSegmentTracker.Stats remoteSegmentUploadShardStats;
28+
29+
public RemoteStoreStats(RemoteRefreshSegmentTracker.Stats remoteSegmentUploadShardStats) {
30+
this.remoteSegmentUploadShardStats = remoteSegmentUploadShardStats;
31+
}
32+
33+
public RemoteStoreStats(StreamInput in) throws IOException {
34+
remoteSegmentUploadShardStats = in.readOptionalWriteable(RemoteRefreshSegmentTracker.Stats::new);
35+
}
36+
37+
public RemoteRefreshSegmentTracker.Stats getStats() {
38+
return remoteSegmentUploadShardStats;
39+
}
40+
41+
@Override
42+
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
43+
builder.startObject()
44+
.field(Fields.SHARD_ID, remoteSegmentUploadShardStats.shardId)
45+
46+
.field(Fields.LOCAL_REFRESH_TIMESTAMP, remoteSegmentUploadShardStats.localRefreshTimeMs)
47+
.field(Fields.REMOTE_REFRESH_TIMESTAMP, remoteSegmentUploadShardStats.remoteRefreshTimeMs)
48+
.field(Fields.REFRESH_LAG, remoteSegmentUploadShardStats.localRefreshNumber - remoteSegmentUploadShardStats.remoteRefreshNumber)
49+
.field(Fields.BYTES_LAG, remoteSegmentUploadShardStats.bytesLag)
50+
51+
.field(Fields.BACKPRESSURE_REJECTION_COUNT, remoteSegmentUploadShardStats.rejectionCount)
52+
.field(Fields.CONSECUTIVE_FAILURE_COUNT, remoteSegmentUploadShardStats.consecutiveFailuresCount);
53+
54+
builder.startObject(Fields.TOTAL_REMOTE_REFRESH);
55+
builder.field(SubFields.STARTED, remoteSegmentUploadShardStats.totalUploadsStarted)
56+
.field(SubFields.SUCCEEDED, remoteSegmentUploadShardStats.totalUploadsSucceeded)
57+
.field(SubFields.FAILED, remoteSegmentUploadShardStats.totalUploadsFailed);
58+
builder.endObject();
59+
60+
builder.startObject(Fields.TOTAL_UPLOADS_IN_BYTES);
61+
builder.field(SubFields.STARTED, remoteSegmentUploadShardStats.uploadBytesStarted)
62+
.field(SubFields.SUCCEEDED, remoteSegmentUploadShardStats.uploadBytesSucceeded)
63+
.field(SubFields.FAILED, remoteSegmentUploadShardStats.uploadBytesFailed);
64+
builder.endObject();
65+
66+
builder.startObject(Fields.REMOTE_REFRESH_SIZE_IN_BYTES);
67+
builder.field(SubFields.LAST_SUCCESSFUL, remoteSegmentUploadShardStats.lastSuccessfulRemoteRefreshBytes);
68+
builder.field(SubFields.MOVING_AVG, remoteSegmentUploadShardStats.uploadBytesMovingAverage);
69+
builder.endObject();
70+
71+
builder.startObject(Fields.UPLOAD_LATENCY_IN_BYTES_PER_SEC);
72+
builder.field(SubFields.MOVING_AVG, remoteSegmentUploadShardStats.uploadBytesPerSecMovingAverage);
73+
builder.endObject();
74+
builder.startObject(Fields.REMOTE_REFRESH_LATENCY_IN_MILLIS);
75+
builder.field(SubFields.MOVING_AVG, remoteSegmentUploadShardStats.uploadTimeMovingAverage);
76+
builder.endObject();
77+
builder.endObject();
78+
79+
return builder;
80+
}
81+
82+
@Override
83+
public void writeTo(StreamOutput out) throws IOException {
84+
out.writeOptionalWriteable(remoteSegmentUploadShardStats);
85+
}
86+
87+
/**
88+
* Fields for remote store stats response
89+
*/
90+
static final class Fields {
91+
static final String SHARD_ID = "shard_id";
92+
93+
/**
94+
* Last successful local refresh timestamp in milliseconds
95+
*/
96+
static final String LOCAL_REFRESH_TIMESTAMP = "local_refresh_timestamp_in_millis";
97+
98+
/**
99+
* Last successful remote refresh timestamp in milliseconds
100+
*/
101+
static final String REMOTE_REFRESH_TIMESTAMP = "remote_refresh_timestamp_in_millis";
102+
103+
/**
104+
* Lag in terms of bytes b/w local and remote store
105+
*/
106+
static final String BYTES_LAG = "bytes_lag";
107+
108+
/**
109+
* No of refresh remote store is lagging behind local
110+
*/
111+
static final String REFRESH_LAG = "refresh_lag";
112+
113+
/**
114+
* Total write rejections due to remote store backpressure kick in
115+
*/
116+
static final String BACKPRESSURE_REJECTION_COUNT = "backpressure_rejection_count";
117+
118+
/**
119+
* No of consecutive remote refresh failures without a single success since the first failures
120+
*/
121+
static final String CONSECUTIVE_FAILURE_COUNT = "consecutive_failure_count";
122+
123+
/**
124+
* Represents the number of remote refreshes
125+
*/
126+
static final String TOTAL_REMOTE_REFRESH = "total_remote_refresh";
127+
128+
/**
129+
* Represents the total uploads to remote store in bytes
130+
*/
131+
static final String TOTAL_UPLOADS_IN_BYTES = "total_uploads_in_bytes";
132+
133+
/**
134+
* Represents the size of new data to be uploaded as part of a refresh
135+
*/
136+
static final String REMOTE_REFRESH_SIZE_IN_BYTES = "remote_refresh_size_in_bytes";
137+
138+
/**
139+
* Represents the speed of remote store uploads in bytes per sec
140+
*/
141+
static final String UPLOAD_LATENCY_IN_BYTES_PER_SEC = "upload_latency_in_bytes_per_sec";
142+
143+
/**
144+
* Time taken by a single remote refresh
145+
*/
146+
static final String REMOTE_REFRESH_LATENCY_IN_MILLIS = "remote_refresh_latency_in_millis";
147+
}
148+
149+
/**
150+
* Reusable sub fields for {@link Fields}
151+
*/
152+
static final class SubFields {
153+
static final String STARTED = "started";
154+
static final String SUCCEEDED = "succeeded";
155+
static final String FAILED = "failed";
156+
157+
/**
158+
* Moving avg over last N values stat for a {@link Fields}
159+
*/
160+
static final String MOVING_AVG = "moving_avg";
161+
162+
/**
163+
* Most recent successful attempt stat for a {@link Fields}
164+
*/
165+
static final String LAST_SUCCESSFUL = "last_successful";
166+
}
167+
168+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.action.admin.cluster.remotestore.stats;
10+
11+
import org.opensearch.action.ActionType;
12+
13+
/**
14+
* Remote store stats action
15+
*
16+
* @opensearch.internal
17+
*/
18+
public class RemoteStoreStatsAction extends ActionType<RemoteStoreStatsResponse> {
19+
20+
public static final RemoteStoreStatsAction INSTANCE = new RemoteStoreStatsAction();
21+
public static final String NAME = "cluster:monitor/_remotestore/stats";
22+
23+
private RemoteStoreStatsAction() {
24+
super(NAME, RemoteStoreStatsResponse::new);
25+
}
26+
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.action.admin.cluster.remotestore.stats;
10+
11+
import org.opensearch.action.support.broadcast.BroadcastRequest;
12+
import org.opensearch.common.io.stream.StreamInput;
13+
import org.opensearch.common.io.stream.StreamOutput;
14+
15+
import java.io.IOException;
16+
17+
/**
18+
* Encapsulates all remote store stats
19+
*
20+
* @opensearch.internal
21+
*/
22+
public class RemoteStoreStatsRequest extends BroadcastRequest<RemoteStoreStatsRequest> {
23+
24+
private String[] shards;
25+
private boolean local = false;
26+
27+
public RemoteStoreStatsRequest() {
28+
super((String[]) null);
29+
shards = new String[0];
30+
}
31+
32+
public RemoteStoreStatsRequest(StreamInput in) throws IOException {
33+
super(in);
34+
shards = in.readStringArray();
35+
local = in.readBoolean();
36+
}
37+
38+
@Override
39+
public void writeTo(StreamOutput out) throws IOException {
40+
super.writeTo(out);
41+
out.writeStringArray(shards);
42+
out.writeBoolean(local);
43+
}
44+
45+
public RemoteStoreStatsRequest shards(String... shards) {
46+
this.shards = shards;
47+
return this;
48+
}
49+
50+
public String[] shards() {
51+
return this.shards;
52+
}
53+
54+
public void local(boolean local) {
55+
this.local = local;
56+
}
57+
58+
public boolean local() {
59+
return local;
60+
}
61+
}

0 commit comments

Comments
 (0)