Skip to content

Commit 7ded5ca

Browse files
nits and tests
Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>
1 parent 9250579 commit 7ded5ca

9 files changed

Lines changed: 415 additions & 44 deletions

File tree

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ public Long toStarTreeNumericTypeValue(Long value) {
6565
}
6666

6767
@Override
68-
public long getIdempotentMetricValue() {
68+
public long getIdentityMetricValue() {
6969
return 0L;
7070
}
7171
}

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ public Double toStarTreeNumericTypeValue(Long value) {
7474
}
7575

7676
@Override
77-
public long getIdempotentMetricValue() {
77+
public long getIdentityMetricValue() {
7878
return Long.MIN_VALUE;
7979
}
8080
}

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MinValueAggregator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ public Double toStarTreeNumericTypeValue(Long value) {
7474
}
7575

7676
@Override
77-
public long getIdempotentMetricValue() {
77+
public long getIdentityMetricValue() {
7878
return Long.MAX_VALUE;
7979
}
8080
}

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public Double toStarTreeNumericTypeValue(Long value) {
9696
}
9797

9898
@Override
99-
public long getIdempotentMetricValue() {
99+
public long getIdentityMetricValue() {
100100
return 0;
101101
}
102102
}

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,5 +65,5 @@ public interface ValueAggregator<A> {
6565
/**
6666
* Fetches a value that does not alter the result of aggregations
6767
*/
68-
long getIdempotentMetricValue();
68+
long getIdentityMetricValue();
6969
}

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java

Lines changed: 2 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import org.apache.lucene.store.IndexOutput;
2424
import org.apache.lucene.util.Counter;
2525
import org.apache.lucene.util.NumericUtils;
26-
import org.opensearch.common.time.DateUtils;
2726
import org.opensearch.index.compositeindex.datacube.Dimension;
2827
import org.opensearch.index.compositeindex.datacube.Metric;
2928
import org.opensearch.index.compositeindex.datacube.MetricStat;
@@ -42,7 +41,6 @@
4241
import org.opensearch.index.mapper.NumberFieldMapper;
4342

4443
import java.io.IOException;
45-
import java.time.temporal.ChronoField;
4644
import java.util.ArrayList;
4745
import java.util.Arrays;
4846
import java.util.Collections;
@@ -269,35 +267,6 @@ public void build(
269267
serializeStarTree(numSegmentStarTreeDocument);
270268
}
271269

272-
private long getTimeStampVal(final String fieldName, final long val) {
273-
long roundedDate = 0;
274-
long ratio = 0;
275-
276-
switch (fieldName) {
277-
278-
case "@timestamp":
279-
ratio = ChronoField.MINUTE_OF_HOUR.getBaseUnit().getDuration().toMillis();
280-
roundedDate = DateUtils.roundFloor(val, ratio);
281-
return roundedDate;
282-
case "hour":
283-
ratio = ChronoField.HOUR_OF_DAY.getBaseUnit().getDuration().toMillis();
284-
roundedDate = DateUtils.roundFloor(val, ratio);
285-
return roundedDate;
286-
case "day":
287-
ratio = ChronoField.DAY_OF_MONTH.getBaseUnit().getDuration().toMillis();
288-
roundedDate = DateUtils.roundFloor(val, ratio);
289-
return roundedDate;
290-
case "month":
291-
roundedDate = DateUtils.roundMonthOfYear(val);
292-
return roundedDate;
293-
case "year":
294-
roundedDate = DateUtils.roundYear(val);
295-
return roundedDate;
296-
default:
297-
return val;
298-
}
299-
}
300-
301270
private void serializeStarTree(int numSegmentStarTreeDocument) throws IOException {
302271
// serialize the star tree data
303272
long dataFilePointer = dataOut.getFilePointer();
@@ -581,7 +550,7 @@ protected StarTreeDocument reduceSegmentStarTreeDocuments(
581550
metrics[i] = metricValueAggregator.getInitialAggregatedValue(segmentDocument.metrics[i]);
582551
} else {
583552
metrics[i] = metricValueAggregator.getInitialAggregatedValueForSegmentDocValue(
584-
getLong(segmentDocument.metrics[i], metricValueAggregator.getIdempotentMetricValue()),
553+
getLong(segmentDocument.metrics[i], metricValueAggregator.getIdentityMetricValue()),
585554
starTreeNumericType
586555
);
587556
}
@@ -605,7 +574,7 @@ protected StarTreeDocument reduceSegmentStarTreeDocuments(
605574
} else {
606575
aggregatedSegmentDocument.metrics[i] = metricValueAggregator.mergeAggregatedValueAndSegmentValue(
607576
aggregatedSegmentDocument.metrics[i],
608-
getLong(segmentDocument.metrics[i], metricValueAggregator.getIdempotentMetricValue()),
577+
getLong(segmentDocument.metrics[i], metricValueAggregator.getIdentityMetricValue()),
609578
starTreeNumericType
610579
);
611580
}

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,15 @@ public class StarTreeDataWriter {
4343
* @throws IOException if an I/O error occurs while writing the star-tree data
4444
*/
4545
public static long serializeStarTree(IndexOutput indexOutput, TreeNode rootNode, int numNodes) throws IOException {
46-
int headerSizeInBytes = computeStarTreeDataHeaderByteSize();
47-
long totalSizeInBytes = headerSizeInBytes + (long) numNodes * SERIALIZABLE_DATA_SIZE_IN_BYTES;
48-
49-
logger.debug("Star tree size in bytes : {}", totalSizeInBytes);
46+
long totalDataSizeInBytes = (long) numNodes * SERIALIZABLE_DATA_SIZE_IN_BYTES;
47+
if (logger.isDebugEnabled()) {
48+
int headerSizeInBytes = computeStarTreeDataHeaderByteSize();
49+
logger.debug("Star tree size in bytes : {}", headerSizeInBytes + totalDataSizeInBytes);
50+
}
5051

5152
writeStarTreeHeader(indexOutput, numNodes);
5253
writeStarTreeNodes(indexOutput, rootNode);
53-
return totalSizeInBytes;
54+
return totalDataSizeInBytes;
5455
}
5556

5657
/**
@@ -95,7 +96,7 @@ private static void writeStarTreeNodes(IndexOutput output, TreeNode rootNode) th
9596
while (!queue.isEmpty()) {
9697
TreeNode node = queue.remove();
9798

98-
if (node.children == null) {
99+
if (node.children == null || node.children.isEmpty()) {
99100
writeStarTreeNode(output, node, ALL, ALL);
100101
} else {
101102

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.compositeindex.datacube.startree.meta;
10+
11+
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
12+
import org.apache.lucene.index.DocValuesType;
13+
import org.apache.lucene.index.FieldInfo;
14+
import org.apache.lucene.index.FieldInfos;
15+
import org.apache.lucene.index.IndexOptions;
16+
import org.apache.lucene.index.SegmentInfo;
17+
import org.apache.lucene.index.SegmentWriteState;
18+
import org.apache.lucene.index.VectorEncoding;
19+
import org.apache.lucene.index.VectorSimilarityFunction;
20+
import org.apache.lucene.store.Directory;
21+
import org.apache.lucene.store.IOContext;
22+
import org.apache.lucene.store.IndexInput;
23+
import org.apache.lucene.store.IndexOutput;
24+
import org.apache.lucene.util.InfoStream;
25+
import org.apache.lucene.util.Version;
26+
import org.opensearch.index.compositeindex.CompositeIndexMetadata;
27+
import org.opensearch.index.compositeindex.datacube.Dimension;
28+
import org.opensearch.index.compositeindex.datacube.Metric;
29+
import org.opensearch.index.compositeindex.datacube.MetricStat;
30+
import org.opensearch.index.compositeindex.datacube.NumericDimension;
31+
import org.opensearch.index.compositeindex.datacube.startree.StarTreeField;
32+
import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration;
33+
import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo;
34+
import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricEntry;
35+
import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeBuilderUtils;
36+
import org.opensearch.index.fielddata.IndexNumericFieldData;
37+
import org.opensearch.test.OpenSearchTestCase;
38+
import org.junit.Before;
39+
40+
import java.io.IOException;
41+
import java.nio.charset.StandardCharsets;
42+
import java.util.ArrayList;
43+
import java.util.Collections;
44+
import java.util.HashMap;
45+
import java.util.HashSet;
46+
import java.util.List;
47+
import java.util.UUID;
48+
49+
import static org.opensearch.index.compositeindex.CompositeIndexConstants.MAGIC_MARKER;
50+
import static org.opensearch.index.mapper.CompositeMappedFieldType.CompositeFieldType.STAR_TREE;
51+
52+
public class StarTreeMetaTests extends OpenSearchTestCase {
53+
54+
private IndexOutput metaOut;
55+
private IndexInput metaIn;
56+
private StarTreeField starTreeField;
57+
private SegmentWriteState writeState;
58+
private Directory directory;
59+
private FieldInfo[] fieldsInfo;
60+
private List<Dimension> dimensionsOrder;
61+
private List<String> fields = List.of();
62+
private List<Metric> metrics;
63+
private List<MetricAggregatorInfo> metricAggregatorInfos = new ArrayList<>();
64+
private int segmentDocumentCount;
65+
private long dataFilePointer;
66+
private long dataFileLength;
67+
68+
@Before
69+
public void setup() throws IOException {
70+
fields = List.of("field1", "field2", "field3", "field4", "field5", "field6", "field7", "field8", "field9", "field10");
71+
directory = newFSDirectory(createTempDir());
72+
SegmentInfo segmentInfo = new SegmentInfo(
73+
directory,
74+
Version.LATEST,
75+
Version.LUCENE_9_11_0,
76+
"test_segment",
77+
6,
78+
false,
79+
false,
80+
new Lucene99Codec(),
81+
new HashMap<>(),
82+
UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8),
83+
new HashMap<>(),
84+
null
85+
);
86+
87+
fieldsInfo = new FieldInfo[fields.size()];
88+
for (int i = 0; i < fieldsInfo.length; i++) {
89+
fieldsInfo[i] = new FieldInfo(
90+
fields.get(i),
91+
i,
92+
false,
93+
false,
94+
true,
95+
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
96+
DocValuesType.SORTED_NUMERIC,
97+
-1,
98+
Collections.emptyMap(),
99+
0,
100+
0,
101+
0,
102+
0,
103+
VectorEncoding.FLOAT32,
104+
VectorSimilarityFunction.EUCLIDEAN,
105+
false,
106+
false
107+
);
108+
}
109+
FieldInfos fieldInfos = new FieldInfos(fieldsInfo);
110+
writeState = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random()));
111+
}
112+
113+
public void test_starTreeMetadata() throws IOException {
114+
dimensionsOrder = List.of(
115+
new NumericDimension("field1"),
116+
new NumericDimension("field3"),
117+
new NumericDimension("field5"),
118+
new NumericDimension("field8")
119+
);
120+
metrics = List.of(
121+
new Metric("field2", List.of(MetricStat.SUM)),
122+
new Metric("field4", List.of(MetricStat.SUM)),
123+
new Metric("field6", List.of(MetricStat.COUNT)),
124+
new Metric("field9", List.of(MetricStat.MIN)),
125+
new Metric("field10", List.of(MetricStat.MAX))
126+
);
127+
int maxLeafDocs = randomInt();
128+
StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration(
129+
maxLeafDocs,
130+
new HashSet<>(),
131+
StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP
132+
);
133+
starTreeField = new StarTreeField("star_tree", dimensionsOrder, metrics, starTreeFieldConfiguration);
134+
135+
for (Metric metric : metrics) {
136+
for (MetricStat metricType : metric.getMetrics()) {
137+
MetricAggregatorInfo metricAggregatorInfo = new MetricAggregatorInfo(
138+
metricType,
139+
metric.getField(),
140+
starTreeField.getName(),
141+
IndexNumericFieldData.NumericType.DOUBLE
142+
);
143+
metricAggregatorInfos.add(metricAggregatorInfo);
144+
}
145+
}
146+
147+
dataFileLength = randomLong();
148+
dataFilePointer = randomLong();
149+
segmentDocumentCount = randomInt();
150+
metaOut = directory.createOutput("star-tree-metadata", IOContext.DEFAULT);
151+
StarTreeBuilderUtils.serializeStarTreeMetadata(
152+
metaOut,
153+
starTreeField,
154+
writeState,
155+
metricAggregatorInfos,
156+
segmentDocumentCount,
157+
dataFilePointer,
158+
dataFileLength
159+
);
160+
metaOut.close();
161+
metaIn = directory.openInput("star-tree-metadata", IOContext.READONCE);
162+
assertEquals(MAGIC_MARKER, metaIn.readLong());
163+
164+
CompositeIndexMetadata compositeIndexMetadata = new CompositeIndexMetadata(metaIn);
165+
assertEquals(starTreeField.getName(), compositeIndexMetadata.getCompositeFieldName());
166+
assertEquals(STAR_TREE, compositeIndexMetadata.getCompositeFieldType());
167+
168+
StarTreeMetadata starTreeMetadata = compositeIndexMetadata.getStarTreeMetadata();
169+
assertNotNull(starTreeMetadata);
170+
171+
for (int i = 0; i < dimensionsOrder.size(); i++) {
172+
assertEquals(
173+
writeState.fieldInfos.fieldInfo(dimensionsOrder.get(i).getField()).getFieldNumber(),
174+
starTreeMetadata.getDimensionFieldNumbers().get(i),
175+
0
176+
);
177+
}
178+
179+
for (int i = 0; i < metricAggregatorInfos.size(); i++) {
180+
MetricEntry metricEntry = starTreeMetadata.getMetricEntries().get(i);
181+
assertEquals(metricAggregatorInfos.get(i).getField(), metricEntry.getMetricName());
182+
assertEquals(metricAggregatorInfos.get(i).getMetricStat(), metricEntry.getMetricStat());
183+
}
184+
assertEquals(segmentDocumentCount, starTreeMetadata.getSegmentAggregatedDocCount(), 0);
185+
assertEquals(maxLeafDocs, starTreeMetadata.getMaxLeafDocs(), 0);
186+
assertEquals(
187+
starTreeFieldConfiguration.getSkipStarNodeCreationInDims().size(),
188+
starTreeMetadata.getSkipStarNodeCreationInDims().size()
189+
);
190+
for (String skipStarNodeCreationInDims : starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims()) {
191+
Integer skipStarNodeCreationInDimsFieldNumber = writeState.fieldInfos.fieldInfo(skipStarNodeCreationInDims).getFieldNumber();
192+
assertTrue(starTreeMetadata.getSkipStarNodeCreationInDims().contains(skipStarNodeCreationInDimsFieldNumber));
193+
}
194+
assertEquals(starTreeFieldConfiguration.getBuildMode(), starTreeMetadata.getStarTreeBuildMode());
195+
assertEquals(dataFileLength, starTreeMetadata.getDataLength());
196+
assertEquals(dataFilePointer, starTreeMetadata.getDataStartFilePointer());
197+
198+
metaIn.close();
199+
200+
}
201+
202+
@Override
203+
public void tearDown() throws Exception {
204+
super.tearDown();
205+
metaOut.close();
206+
metaIn.close();
207+
directory.close();
208+
}
209+
210+
}

0 commit comments

Comments
 (0)