Skip to content

Commit cd03525

Browse files
Enhancing FS stats to include read / write time, io time and queue size
Signed-off-by: Bharathwaj G <bharath78910@gmail.com>
1 parent 562e3b2 commit cd03525

6 files changed

Lines changed: 272 additions & 5 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1414
- Implement Visitor Design pattern in QueryBuilder to enable the capability to traverse through the complex QueryBuilder tree. ([#10110](https://github.com/opensearch-project/OpenSearch/pull/10110))
1515
- Provide service accounts tokens to extensions ([#9618](https://github.com/opensearch-project/OpenSearch/pull/9618))
1616
- Configurable merge policy for index with an option to choose from LogByteSize and Tiered merge policy ([#9992](https://github.com/opensearch-project/OpenSearch/pull/9992))
17+
- [AdmissionControl] Add enhancements to FS stats to include read/write time, queue size and IO time ([#10541](https://github.com/opensearch-project/OpenSearch/pull/10541))
1718

1819
### Dependencies
1920
- Bump `log4j-core` from 2.18.0 to 2.19.0

server/src/main/java/org/opensearch/monitor/fs/FsInfo.java

Lines changed: 161 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,14 @@ public static class DeviceStats implements Writeable, ToXContentFragment {
235235
final long previousWritesCompleted;
236236
final long currentSectorsWritten;
237237
final long previousSectorsWritten;
238+
final long currentReadTime;
239+
final long previousReadTime;
240+
final long currentWriteTime;
241+
final long previousWriteTime;
242+
final long currentQueueSize;
243+
final long previousQueueSize;
244+
final long currentIOTime;
245+
final long previousIOTime;
238246

239247
public DeviceStats(
240248
final int majorDeviceNumber,
@@ -244,6 +252,10 @@ public DeviceStats(
244252
final long currentSectorsRead,
245253
final long currentWritesCompleted,
246254
final long currentSectorsWritten,
255+
final long currentReadTime,
256+
final long currentWriteTime,
257+
final long currrentQueueSize,
258+
final long currentIOTime,
247259
final DeviceStats previousDeviceStats
248260
) {
249261
this(
@@ -257,7 +269,15 @@ public DeviceStats(
257269
currentSectorsRead,
258270
previousDeviceStats != null ? previousDeviceStats.currentSectorsRead : -1,
259271
currentWritesCompleted,
260-
previousDeviceStats != null ? previousDeviceStats.currentWritesCompleted : -1
272+
previousDeviceStats != null ? previousDeviceStats.currentWritesCompleted : -1,
273+
currentReadTime,
274+
previousDeviceStats != null ? previousDeviceStats.currentReadTime : -1,
275+
currentWriteTime,
276+
previousDeviceStats != null ? previousDeviceStats.currentWriteTime : -1,
277+
currrentQueueSize,
278+
previousDeviceStats != null ? previousDeviceStats.currentQueueSize : -1,
279+
currentIOTime,
280+
previousDeviceStats != null ? previousDeviceStats.currentIOTime : -1
261281
);
262282
}
263283

@@ -272,7 +292,15 @@ private DeviceStats(
272292
final long currentSectorsRead,
273293
final long previousSectorsRead,
274294
final long currentWritesCompleted,
275-
final long previousWritesCompleted
295+
final long previousWritesCompleted,
296+
final long currentReadTime,
297+
final long previousReadTime,
298+
final long currentWriteTime,
299+
final long previousWriteTime,
300+
final long currentQueueSize,
301+
final long previousQueueSize,
302+
final long currentIOTime,
303+
final long previousIOTime
276304
) {
277305
this.majorDeviceNumber = majorDeviceNumber;
278306
this.minorDeviceNumber = minorDeviceNumber;
@@ -285,6 +313,14 @@ private DeviceStats(
285313
this.previousSectorsRead = previousSectorsRead;
286314
this.currentSectorsWritten = currentSectorsWritten;
287315
this.previousSectorsWritten = previousSectorsWritten;
316+
this.currentReadTime = currentReadTime;
317+
this.previousReadTime = previousReadTime;
318+
this.currentWriteTime = currentWriteTime;
319+
this.previousWriteTime = previousWriteTime;
320+
this.currentQueueSize = currentQueueSize;
321+
this.previousQueueSize = previousQueueSize;
322+
this.currentIOTime = currentIOTime;
323+
this.previousIOTime = previousIOTime;
288324
}
289325

290326
public DeviceStats(StreamInput in) throws IOException {
@@ -299,6 +335,14 @@ public DeviceStats(StreamInput in) throws IOException {
299335
previousSectorsRead = in.readLong();
300336
currentSectorsWritten = in.readLong();
301337
previousSectorsWritten = in.readLong();
338+
currentReadTime = in.readLong();
339+
previousReadTime = in.readLong();
340+
currentWriteTime = in.readLong();
341+
previousWriteTime = in.readLong();
342+
currentQueueSize = in.readLong();
343+
previousQueueSize = in.readLong();
344+
currentIOTime = in.readLong();
345+
previousIOTime = in.readLong();
302346
}
303347

304348
@Override
@@ -314,6 +358,14 @@ public void writeTo(StreamOutput out) throws IOException {
314358
out.writeLong(previousSectorsRead);
315359
out.writeLong(currentSectorsWritten);
316360
out.writeLong(previousSectorsWritten);
361+
out.writeLong(currentReadTime);
362+
out.writeLong(previousReadTime);
363+
out.writeLong(currentWriteTime);
364+
out.writeLong(previousWriteTime);
365+
out.writeLong(currentQueueSize);
366+
out.writeLong(previousQueueSize);
367+
out.writeLong(currentIOTime);
368+
out.writeLong(previousIOTime);
317369
}
318370

319371
public long operations() {
@@ -346,6 +398,39 @@ public long writeKilobytes() {
346398
return (currentSectorsWritten - previousSectorsWritten) / 2;
347399
}
348400

401+
/**
402+
* Total time taken for all read operations
403+
*/
404+
public long readTime() {
405+
if (previousReadTime == -1) return -1;
406+
return currentReadTime - previousReadTime;
407+
}
408+
409+
/**
410+
* Total time taken for all write operations
411+
*/
412+
public long writeTime() {
413+
if (previousWriteTime == -1) return -1;
414+
return currentWriteTime - previousWriteTime;
415+
}
416+
417+
/**
418+
* Queue size based on weighted time spent doing I/Os
419+
*/
420+
public long queueSize() {
421+
if (previousQueueSize == -1) return -1;
422+
return currentQueueSize - previousQueueSize;
423+
}
424+
425+
/**
426+
* Total time spent doing I/Os
427+
*/
428+
public long ioTimeInMillis() {
429+
if (previousIOTime == -1) return -1;
430+
431+
return (currentIOTime - previousIOTime);
432+
}
433+
349434
@Override
350435
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
351436
builder.field("device_name", deviceName);
@@ -354,9 +439,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
354439
builder.field(IoStats.WRITE_OPERATIONS, writeOperations());
355440
builder.field(IoStats.READ_KILOBYTES, readKilobytes());
356441
builder.field(IoStats.WRITE_KILOBYTES, writeKilobytes());
442+
builder.field(IoStats.READ_TIME, readTime());
443+
builder.field(IoStats.WRITE_TIME, writeTime());
444+
builder.field(IoStats.QUEUE_SIZE, queueSize());
445+
builder.field(IoStats.IO_TIME_MS, ioTimeInMillis());
357446
return builder;
358447
}
359-
360448
}
361449

362450
/**
@@ -371,13 +459,21 @@ public static class IoStats implements Writeable, ToXContentFragment {
371459
private static final String WRITE_OPERATIONS = "write_operations";
372460
private static final String READ_KILOBYTES = "read_kilobytes";
373461
private static final String WRITE_KILOBYTES = "write_kilobytes";
462+
private static final String READ_TIME = "read_time";
463+
private static final String WRITE_TIME = "write_time";
464+
private static final String QUEUE_SIZE = "queue_size";
465+
private static final String IO_TIME_MS = "io_time_in_millis";
374466

375467
final DeviceStats[] devicesStats;
376468
final long totalOperations;
377469
final long totalReadOperations;
378470
final long totalWriteOperations;
379471
final long totalReadKilobytes;
380472
final long totalWriteKilobytes;
473+
final long totalReadTime;
474+
final long totalWriteTime;
475+
final long totalQueueSize;
476+
final long totalIOTimeInMillis;
381477

382478
public IoStats(final DeviceStats[] devicesStats) {
383479
this.devicesStats = devicesStats;
@@ -386,18 +482,30 @@ public IoStats(final DeviceStats[] devicesStats) {
386482
long totalWriteOperations = 0;
387483
long totalReadKilobytes = 0;
388484
long totalWriteKilobytes = 0;
485+
long totalReadTime = 0;
486+
long totalWriteTime = 0;
487+
long totalQueueSize = 0;
488+
long totalIOTimeInMillis = 0;
389489
for (DeviceStats deviceStats : devicesStats) {
390490
totalOperations += deviceStats.operations() != -1 ? deviceStats.operations() : 0;
391491
totalReadOperations += deviceStats.readOperations() != -1 ? deviceStats.readOperations() : 0;
392492
totalWriteOperations += deviceStats.writeOperations() != -1 ? deviceStats.writeOperations() : 0;
393493
totalReadKilobytes += deviceStats.readKilobytes() != -1 ? deviceStats.readKilobytes() : 0;
394494
totalWriteKilobytes += deviceStats.writeKilobytes() != -1 ? deviceStats.writeKilobytes() : 0;
495+
totalReadTime += deviceStats.readTime() != -1 ? deviceStats.readTime() : 0;
496+
totalWriteTime += deviceStats.writeTime() != -1 ? deviceStats.writeTime() : 0;
497+
totalQueueSize += deviceStats.queueSize() != -1 ? deviceStats.queueSize() : 0;
498+
totalIOTimeInMillis += deviceStats.ioTimeInMillis() != -1 ? deviceStats.ioTimeInMillis() : 0;
395499
}
396500
this.totalOperations = totalOperations;
397501
this.totalReadOperations = totalReadOperations;
398502
this.totalWriteOperations = totalWriteOperations;
399503
this.totalReadKilobytes = totalReadKilobytes;
400504
this.totalWriteKilobytes = totalWriteKilobytes;
505+
this.totalReadTime = totalReadTime;
506+
this.totalWriteTime = totalWriteTime;
507+
this.totalQueueSize = totalQueueSize;
508+
this.totalIOTimeInMillis = totalIOTimeInMillis;
401509
}
402510

403511
public IoStats(StreamInput in) throws IOException {
@@ -412,6 +520,17 @@ public IoStats(StreamInput in) throws IOException {
412520
this.totalWriteOperations = in.readLong();
413521
this.totalReadKilobytes = in.readLong();
414522
this.totalWriteKilobytes = in.readLong();
523+
if (in.getVersion().onOrAfter(Version.V_3_0_0)) {
524+
this.totalReadTime = in.readLong();
525+
this.totalWriteTime = in.readLong();
526+
this.totalQueueSize = in.readLong();
527+
this.totalIOTimeInMillis = in.readLong();
528+
} else {
529+
this.totalReadTime = 0;
530+
this.totalWriteTime = 0;
531+
this.totalQueueSize = 0;
532+
this.totalIOTimeInMillis = 0;
533+
}
415534
}
416535

417536
@Override
@@ -425,6 +544,12 @@ public void writeTo(StreamOutput out) throws IOException {
425544
out.writeLong(totalWriteOperations);
426545
out.writeLong(totalReadKilobytes);
427546
out.writeLong(totalWriteKilobytes);
547+
if (out.getVersion().onOrAfter(Version.V_3_0_0)) {
548+
out.writeLong(totalReadTime);
549+
out.writeLong(totalWriteTime);
550+
out.writeLong(totalQueueSize);
551+
out.writeLong(totalIOTimeInMillis);
552+
}
428553
}
429554

430555
public DeviceStats[] getDevicesStats() {
@@ -451,6 +576,34 @@ public long getTotalWriteKilobytes() {
451576
return totalWriteKilobytes;
452577
}
453578

579+
/**
580+
* Sum of read time across all devices
581+
*/
582+
public long getTotalReadTime() {
583+
return totalReadTime;
584+
}
585+
586+
/**
587+
* Sum of write time across all devices
588+
*/
589+
public long getTotalWriteTime() {
590+
return totalWriteTime;
591+
}
592+
593+
/**
594+
* Sum of queue size across all devices
595+
*/
596+
public long getTotalQueueSize() {
597+
return totalQueueSize;
598+
}
599+
600+
/**
601+
* Sum of IO time across all devices
602+
*/
603+
public long getTotalIOTimeMillis() {
604+
return totalIOTimeInMillis;
605+
}
606+
454607
@Override
455608
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
456609
if (devicesStats.length > 0) {
@@ -468,11 +621,15 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
468621
builder.field(WRITE_OPERATIONS, totalWriteOperations);
469622
builder.field(READ_KILOBYTES, totalReadKilobytes);
470623
builder.field(WRITE_KILOBYTES, totalWriteKilobytes);
624+
625+
builder.field(READ_TIME, totalReadTime);
626+
builder.field(WRITE_TIME, totalWriteTime);
627+
builder.field(QUEUE_SIZE, totalQueueSize);
628+
builder.field(IO_TIME_MS, totalIOTimeInMillis);
471629
builder.endObject();
472630
}
473631
return builder;
474632
}
475-
476633
}
477634

478635
private final long timestamp;

server/src/main/java/org/opensearch/monitor/fs/FsProbe.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,25 @@ final FsInfo.IoStats ioStats(final Set<Tuple<Integer, Integer>> devicesNumbers,
109109

110110
List<FsInfo.DeviceStats> devicesStats = new ArrayList<>();
111111

112+
/**
113+
* The /proc/diskstats file displays the I/O statistics of block devices.
114+
* Each line contains the following 14 fields: ( + additional fields )
115+
*
116+
* 1 major number
117+
* 2 minor number
118+
* 3 device name
119+
* 4 reads completed successfully
120+
* 5 reads merged
121+
* 6 sectors read
122+
* 7 time spent reading (ms)
123+
* 8 writes completed
124+
* 9 writes merged
125+
* 10 sectors written
126+
* 11 time spent writing (ms)
127+
* 12 I/Os currently in progress
128+
* 13 time spent doing I/Os (ms) ---- IO use percent
129+
* 14 weighted time spent doing I/Os (ms) ---- Queue size
130+
*/
112131
List<String> lines = readProcDiskStats();
113132
if (!lines.isEmpty()) {
114133
for (String line : lines) {
@@ -123,6 +142,12 @@ final FsInfo.IoStats ioStats(final Set<Tuple<Integer, Integer>> devicesNumbers,
123142
final long sectorsRead = Long.parseLong(fields[5]);
124143
final long writesCompleted = Long.parseLong(fields[7]);
125144
final long sectorsWritten = Long.parseLong(fields[9]);
145+
// readTime and writeTime calculates the total read/write time taken for each request to complete
146+
// ioTime calculates actual time queue and disks are busy
147+
final long readTime = Long.parseLong(fields[6]);
148+
final long writeTime = Long.parseLong(fields[10]);
149+
final long ioTime = fields.length > 12 ? Long.parseLong(fields[12]) : 0;
150+
final long queueSize = fields.length > 13 ? Long.parseLong(fields[13]) : 0;
126151
final FsInfo.DeviceStats deviceStats = new FsInfo.DeviceStats(
127152
majorDeviceNumber,
128153
minorDeviceNumber,
@@ -131,6 +156,10 @@ final FsInfo.IoStats ioStats(final Set<Tuple<Integer, Integer>> devicesNumbers,
131156
sectorsRead,
132157
writesCompleted,
133158
sectorsWritten,
159+
readTime,
160+
writeTime,
161+
queueSize,
162+
ioTime,
134163
deviceMap.get(Tuple.tuple(majorDeviceNumber, minorDeviceNumber))
135164
);
136165
devicesStats.add(deviceStats);

server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,10 @@ public void testSerialization() throws IOException {
282282
assertEquals(ioStats.getTotalReadOperations(), deserializedIoStats.getTotalReadOperations());
283283
assertEquals(ioStats.getTotalWriteKilobytes(), deserializedIoStats.getTotalWriteKilobytes());
284284
assertEquals(ioStats.getTotalWriteOperations(), deserializedIoStats.getTotalWriteOperations());
285+
assertEquals(ioStats.getTotalReadTime(), deserializedIoStats.getTotalReadTime());
286+
assertEquals(ioStats.getTotalWriteTime(), deserializedIoStats.getTotalWriteTime());
287+
assertEquals(ioStats.getTotalQueueSize(), deserializedIoStats.getTotalQueueSize());
288+
assertEquals(ioStats.getTotalIOTimeMillis(), deserializedIoStats.getTotalIOTimeMillis());
285289
assertEquals(ioStats.getDevicesStats().length, deserializedIoStats.getDevicesStats().length);
286290
for (int i = 0; i < ioStats.getDevicesStats().length; i++) {
287291
FsInfo.DeviceStats deviceStats = ioStats.getDevicesStats()[i];
@@ -625,6 +629,10 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) {
625629
randomNonNegativeLong(),
626630
randomNonNegativeLong(),
627631
randomNonNegativeLong(),
632+
randomNonNegativeLong(),
633+
randomNonNegativeLong(),
634+
randomNonNegativeLong(),
635+
randomNonNegativeLong(),
628636
null
629637
);
630638
deviceStatsArray[i] = new FsInfo.DeviceStats(
@@ -635,6 +643,10 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) {
635643
randomNonNegativeLong(),
636644
randomNonNegativeLong(),
637645
randomNonNegativeLong(),
646+
randomNonNegativeLong(),
647+
randomNonNegativeLong(),
648+
randomNonNegativeLong(),
649+
randomNonNegativeLong(),
638650
previousDeviceStats
639651
);
640652
}

0 commit comments

Comments
 (0)