diff --git a/src/Apache.Arrow.Operations/Apache.Arrow.Operations.csproj b/src/Apache.Arrow.Operations/Apache.Arrow.Operations.csproj
index c4f0c453..524eb12d 100644
--- a/src/Apache.Arrow.Operations/Apache.Arrow.Operations.csproj
+++ b/src/Apache.Arrow.Operations/Apache.Arrow.Operations.csproj
@@ -18,6 +18,7 @@
+
diff --git a/src/Apache.Arrow.Operations/Shredding/ShredOptions.cs b/src/Apache.Arrow.Operations/Shredding/ShredOptions.cs
new file mode 100644
index 00000000..d69bdb9e
--- /dev/null
+++ b/src/Apache.Arrow.Operations/Shredding/ShredOptions.cs
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow.Operations.Shredding
+{
+ ///
+ /// Options controlling how infers a shredding schema.
+ ///
+ public sealed class ShredOptions
+ {
+ ///
+ /// Maximum nesting depth for shredded objects and arrays.
+ /// 0 means only top-level fields are shredded.
+ /// Default is 3.
+ ///
+ public int MaxDepth { get; set; } = 3;
+
+ ///
+ /// Minimum fraction of values (0.0–1.0) in which a field must appear
+ /// to be considered for shredding. Fields appearing less frequently
+ /// than this threshold are left in the binary residual.
+ /// Default is 0.5 (50%).
+ ///
+ public double MinFieldFrequency { get; set; } = 0.5;
+
+ ///
+ /// Minimum fraction of non-null values (0.0–1.0) for a field that must
+ /// share the same type for the field to be shredded as a typed column.
+ /// If the type consistency is below this threshold, the field gets a
+ /// schema (binary-only).
+ /// Default is 0.8 (80%).
+ ///
+ public double MinTypeConsistency { get; set; } = 0.8;
+
+ /// Default options.
+ public static ShredOptions Default => new ShredOptions();
+ }
+}
diff --git a/src/Apache.Arrow.Operations/Shredding/ShredResult.cs b/src/Apache.Arrow.Operations/Shredding/ShredResult.cs
new file mode 100644
index 00000000..e013e55e
--- /dev/null
+++ b/src/Apache.Arrow.Operations/Shredding/ShredResult.cs
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System.Collections.Generic;
+
+namespace Apache.Arrow.Operations.Shredding
+{
+ ///
+ /// The result of shredding a single variant value: a (value, typed_value) pair.
+ ///
+ /// Follows the Parquet variant shredding spec encoding matrix:
+ ///
+ /// Both null → missing (only valid for object sub-fields)
+ /// value non-null, typed_value null → unshredded (value in binary)
+ /// value null, typed_value non-null → fully shredded into typed column
+ /// Both non-null → partially shredded object (typed_value has shredded fields, value has residual)
+ ///
+ ///
+ ///
+ public sealed class ShredResult
+ {
+ ///
+ /// The residual variant value bytes. These reference the column-level metadata
+ /// returned by ;
+ /// they are NOT self-contained. Non-null when the value (or part of it) could
+ /// not be shredded into the typed column. For partially shredded objects this
+ /// contains only the unshredded fields.
+ ///
+ public byte[] Value { get; }
+
+ ///
+ /// The typed value extracted according to the schema. The runtime type depends
+ /// on the :
+ ///
+ /// Primitives: the corresponding CLR type (bool, int, long, double, string, etc.)
+ /// Object:
+ /// Array:
+ ///
+ /// Null when the value does not match the schema type (falls back to binary).
+ ///
+ public object TypedValue { get; }
+
+ ///
+ /// True when both and are null,
+ /// indicating the field is missing (only valid for object sub-fields).
+ ///
+ public bool IsMissing => Value == null && TypedValue == null;
+
+ /// Creates a shred result.
+ public ShredResult(byte[] value, object typedValue)
+ {
+ Value = value;
+ TypedValue = typedValue;
+ }
+
+ /// A missing result (both null).
+ public static readonly ShredResult Missing = new ShredResult(null, null);
+ }
+
+ ///
+ /// The typed_value result for a shredded object. Contains one
+ /// per field defined in the object's .
+ ///
+ public sealed class ShredObjectResult
+ {
+ ///
+ /// Shredded fields, keyed by field name matching the .
+ /// Each entry is the shredded (value, typed_value) pair for that field.
+ ///
+ public IReadOnlyDictionary Fields { get; }
+
+ /// Creates a shredded object result.
+ public ShredObjectResult(IReadOnlyDictionary fields)
+ {
+ Fields = fields;
+ }
+ }
+
+ ///
+ /// The typed_value result for a shredded array. Contains one
+ /// per element in the source array.
+ ///
+ public sealed class ShredArrayResult
+ {
+ ///
+ /// Shredded elements. Each entry is the shredded (value, typed_value) pair for that element.
+ /// Array elements are never missing — null elements are encoded as variant null in the value column.
+ ///
+ public IReadOnlyList Elements { get; }
+
+ /// Creates a shredded array result.
+ public ShredArrayResult(IReadOnlyList elements)
+ {
+ Elements = elements;
+ }
+ }
+}
diff --git a/src/Apache.Arrow.Operations/Shredding/ShredSchema.cs b/src/Apache.Arrow.Operations/Shredding/ShredSchema.cs
new file mode 100644
index 00000000..e1ea5e3c
--- /dev/null
+++ b/src/Apache.Arrow.Operations/Shredding/ShredSchema.cs
@@ -0,0 +1,293 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using Apache.Arrow;
+using Apache.Arrow.Scalars.Variant;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow.Operations.Shredding
+{
+ ///
+ /// Describes the shredding schema for a variant column — which fields
+ /// to extract into typed Parquet columns and at what types.
+ ///
+ public sealed class ShredSchema
+ {
+ ///
+ /// The type of the typed_value column. For primitives, this is the
+ /// expected scalar type. For objects, use
+ /// and populate . For arrays, use
+ /// and populate .
+ /// means no typed_value — everything goes to binary value.
+ ///
+ public ShredType TypedValueType { get; }
+
+ ///
+ /// For : the shredding schemas for each named sub-field.
+ /// Null for non-object types.
+ ///
+ public IReadOnlyDictionary ObjectFields { get; }
+
+ ///
+ /// For : the shredding schema applied to each element.
+ /// Null for non-array types.
+ ///
+ public ShredSchema ArrayElement { get; }
+
+ private ShredSchema(ShredType typedValueType, IReadOnlyDictionary objectFields, ShredSchema arrayElement)
+ {
+ TypedValueType = typedValueType;
+ ObjectFields = objectFields;
+ ArrayElement = arrayElement;
+ }
+
+ /// Creates a schema that does no shredding (all values go to binary).
+ public static ShredSchema Unshredded() => new ShredSchema(ShredType.None, null, null);
+
+ ///
+ /// Creates a schema that shreds values into a typed primitive column.
+ /// Values not matching this type fall back to the binary value column.
+ ///
+ public static ShredSchema Primitive(ShredType type)
+ {
+ if (type == ShredType.None || type == ShredType.Object || type == ShredType.Array)
+ {
+ throw new ArgumentException($"Use the appropriate factory method for {type}.", nameof(type));
+ }
+ return new ShredSchema(type, null, null);
+ }
+
+ ///
+ /// Creates a schema that shreds object values by extracting named fields
+ /// into typed sub-columns.
+ ///
+ public static ShredSchema ForObject(IDictionary fields)
+ {
+ if (fields == null) throw new ArgumentNullException(nameof(fields));
+ Dictionary copy = new Dictionary(fields);
+ return new ShredSchema(ShredType.Object, copy, null);
+ }
+
+ ///
+ /// Creates a schema that shreds array values by applying the element
+ /// schema to each element.
+ ///
+ public static ShredSchema ForArray(ShredSchema elementSchema)
+ {
+ if (elementSchema == null) throw new ArgumentNullException(nameof(elementSchema));
+ return new ShredSchema(ShredType.Array, null, elementSchema);
+ }
+
+ ///
+ /// Maps a to the corresponding .
+ ///
+ public static ShredType ShredTypeFromPrimitive(VariantPrimitiveType primitiveType)
+ {
+ switch (primitiveType)
+ {
+ case VariantPrimitiveType.BooleanTrue:
+ case VariantPrimitiveType.BooleanFalse:
+ return ShredType.Boolean;
+ case VariantPrimitiveType.Int8: return ShredType.Int8;
+ case VariantPrimitiveType.Int16: return ShredType.Int16;
+ case VariantPrimitiveType.Int32: return ShredType.Int32;
+ case VariantPrimitiveType.Int64: return ShredType.Int64;
+ case VariantPrimitiveType.Float: return ShredType.Float;
+ case VariantPrimitiveType.Double: return ShredType.Double;
+ case VariantPrimitiveType.Decimal4: return ShredType.Decimal4;
+ case VariantPrimitiveType.Decimal8: return ShredType.Decimal8;
+ case VariantPrimitiveType.Decimal16: return ShredType.Decimal16;
+ case VariantPrimitiveType.Date: return ShredType.Date;
+ case VariantPrimitiveType.Timestamp: return ShredType.Timestamp;
+ case VariantPrimitiveType.TimestampNtz: return ShredType.TimestampNtz;
+ case VariantPrimitiveType.TimeNtz: return ShredType.TimeNtz;
+ case VariantPrimitiveType.TimestampTzNanos: return ShredType.TimestampTzNanos;
+ case VariantPrimitiveType.TimestampNtzNanos: return ShredType.TimestampNtzNanos;
+ case VariantPrimitiveType.String: return ShredType.String;
+ case VariantPrimitiveType.Binary: return ShredType.Binary;
+ case VariantPrimitiveType.Uuid: return ShredType.Uuid;
+ default: return ShredType.None;
+ }
+ }
+
+ ///
+ /// Derives a from the Arrow type of a typed_value column.
+ ///
+ ///
+ /// The typed_value Arrow type, or null for a fully unshredded column.
+ ///
+ /// A describing the shredding.
+ ///
+ /// Thrown when is not a valid shredded type
+ /// per the Parquet variant shredding spec (for example, an unsigned integer or
+ /// a fixed-size binary that isn't UUID).
+ ///
+ public static ShredSchema FromArrowType(IArrowType typedValueType)
+ {
+ if (typedValueType == null) return Unshredded();
+ return MapArrowType(typedValueType);
+ }
+
+ private static ShredSchema MapArrowType(IArrowType type)
+ {
+ switch (type)
+ {
+ case BooleanType _: return Primitive(ShredType.Boolean);
+ case Int8Type _: return Primitive(ShredType.Int8);
+ case Int16Type _: return Primitive(ShredType.Int16);
+ case Int32Type _: return Primitive(ShredType.Int32);
+ case Int64Type _: return Primitive(ShredType.Int64);
+ case FloatType _: return Primitive(ShredType.Float);
+ case DoubleType _: return Primitive(ShredType.Double);
+ case StringType _: return Primitive(ShredType.String);
+ case BinaryType _: return Primitive(ShredType.Binary);
+ case LargeBinaryType _: return Primitive(ShredType.Binary);
+ case LargeStringType _: return Primitive(ShredType.String);
+ case Date32Type _: return Primitive(ShredType.Date);
+
+ case Time64Type t when t.Unit == TimeUnit.Microsecond:
+ return Primitive(ShredType.TimeNtz);
+
+ case TimestampType ts when ts.Unit == TimeUnit.Microsecond && ts.IsTimeZoneAware:
+ return Primitive(ShredType.Timestamp);
+ case TimestampType ts when ts.Unit == TimeUnit.Microsecond && !ts.IsTimeZoneAware:
+ return Primitive(ShredType.TimestampNtz);
+ case TimestampType ts when ts.Unit == TimeUnit.Nanosecond && ts.IsTimeZoneAware:
+ return Primitive(ShredType.TimestampTzNanos);
+ case TimestampType ts when ts.Unit == TimeUnit.Nanosecond && !ts.IsTimeZoneAware:
+ return Primitive(ShredType.TimestampNtzNanos);
+
+ // The Parquet variant spec allows any Arrow decimal representation
+ // whose precision fits in one of the variant's decimal widths
+ // (≤9 digits → 4-byte unscaled, ≤18 → 8-byte, ≤38 → 16-byte).
+ // Decimal128Type extends FixedSizeBinaryType with byte_width=16, so
+ // we MUST match the decimal cases before the UUID fallback below,
+ // and dispatch by precision inside the cases rather than via `when`
+ // guards that can fall through into the FSB(16) branch.
+ case Decimal32Type d32: return MapDecimalByPrecision(d32.Precision, type);
+ case Decimal64Type d64: return MapDecimalByPrecision(d64.Precision, type);
+ case Decimal128Type d128: return MapDecimalByPrecision(d128.Precision, type);
+
+ case ExtensionType ext when ext.Name == "arrow.uuid":
+ return Primitive(ShredType.Uuid);
+
+ // When the Arrow IPC reader has no UUID extension registered, the
+ // column comes through as its storage type (16-byte fixed binary).
+ // Per the Parquet variant shredding spec, fixed_size_binary(16) is
+ // the only valid fixed-size binary type and represents UUID.
+ case FixedSizeBinaryType fsb when fsb.ByteWidth == 16:
+ return Primitive(ShredType.Uuid);
+
+ case ListType list:
+ return MapArrayType(list);
+
+ case StructType structType:
+ return MapObjectType(structType);
+
+ default:
+ throw new ArgumentException(
+ $"Unsupported shredded value type: {type}",
+ nameof(type));
+ }
+ }
+
+ private static ShredSchema MapDecimalByPrecision(int precision, IArrowType type)
+ {
+ if (precision <= 9) return Primitive(ShredType.Decimal4);
+ if (precision <= 18) return Primitive(ShredType.Decimal8);
+ if (precision <= 38) return Primitive(ShredType.Decimal16);
+ throw new ArgumentException(
+ $"Unsupported decimal precision {precision} (max 38): {type}",
+ nameof(type));
+ }
+
+ private static ShredSchema MapArrayType(ListType list)
+ {
+ if (!(list.ValueDataType is StructType elementStruct) || !IsElementGroupStruct(elementStruct))
+ {
+ throw new ArgumentException(
+ "Shredded array element must be a struct with 'value' and/or 'typed_value' fields.",
+ nameof(list));
+ }
+ return ForArray(ParseElementGroup(elementStruct));
+ }
+
+ private static ShredSchema MapObjectType(StructType structType)
+ {
+ Dictionary fields = new Dictionary(structType.Fields.Count);
+ foreach (Field field in structType.Fields)
+ {
+ if (!(field.DataType is StructType elementGroup) || !IsElementGroupStruct(elementGroup))
+ {
+ throw new ArgumentException(
+ $"Shredded object field '{field.Name}' must be a struct with 'value' and/or 'typed_value' fields.",
+ nameof(structType));
+ }
+ fields[field.Name] = ParseElementGroup(elementGroup);
+ }
+ return ForObject(fields);
+ }
+
+ ///
+ /// Tests whether a struct type is a valid shredded "element group":
+ /// a struct with at least one of value (binary) or typed_value,
+ /// and no other fields.
+ ///
+ private static bool IsElementGroupStruct(StructType st)
+ {
+ int valueIdx = st.GetFieldIndex("value");
+ int typedIdx = st.GetFieldIndex("typed_value");
+
+ if (valueIdx < 0 && typedIdx < 0)
+ {
+ return false;
+ }
+
+ if (valueIdx >= 0)
+ {
+ IArrowType valueFieldType = st.Fields[valueIdx].DataType;
+ if (!(valueFieldType is BinaryType ||
+ valueFieldType is LargeBinaryType ||
+ valueFieldType is BinaryViewType))
+ {
+ return false;
+ }
+ }
+
+ // Reject structs with unexpected extra fields.
+ foreach (Field f in st.Fields)
+ {
+ if (f.Name != "value" && f.Name != "typed_value")
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private static ShredSchema ParseElementGroup(StructType elementStruct)
+ {
+ int typedIdx = elementStruct.GetFieldIndex("typed_value");
+ if (typedIdx < 0)
+ {
+ return Unshredded();
+ }
+ return MapArrowType(elementStruct.Fields[typedIdx].DataType);
+ }
+ }
+}
diff --git a/src/Apache.Arrow.Operations/Shredding/ShredSchemaInferer.cs b/src/Apache.Arrow.Operations/Shredding/ShredSchemaInferer.cs
new file mode 100644
index 00000000..4537c3cb
--- /dev/null
+++ b/src/Apache.Arrow.Operations/Shredding/ShredSchemaInferer.cs
@@ -0,0 +1,201 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using Apache.Arrow.Scalars.Variant;
+
+namespace Apache.Arrow.Operations.Shredding
+{
+ ///
+ /// Analyzes a batch of s and infers an optimal
+ /// for shredding them.
+ ///
+ public sealed class ShredSchemaInferer
+ {
+ ///
+ /// Infers a shredding schema by analyzing the given values.
+ ///
+ /// The variant values to analyze.
+ /// Options controlling depth, frequency, and type consistency thresholds.
+ /// An inferred .
+ public ShredSchema Infer(IEnumerable values, ShredOptions options = null)
+ {
+ if (values == null) throw new ArgumentNullException(nameof(values));
+ if (options == null) options = ShredOptions.Default;
+
+ TypeStats stats = new TypeStats();
+ int totalCount = 0;
+
+ foreach (VariantValue value in values)
+ {
+ CollectStats(value, stats, 0, options.MaxDepth);
+ totalCount++;
+ }
+
+ if (totalCount == 0)
+ {
+ return ShredSchema.Unshredded();
+ }
+
+ return BuildSchema(stats, totalCount, options, 0);
+ }
+
+ private void CollectStats(VariantValue value, TypeStats stats, int depth, int maxDepth)
+ {
+ ShredType type = VariantShredder.GetShredType(value);
+ stats.TypeCounts.TryGetValue(type, out int count);
+ stats.TypeCounts[type] = count + 1;
+
+ if (type == ShredType.Object && depth <= maxDepth && value.IsObject)
+ {
+ if (stats.ObjectFieldStats == null)
+ {
+ stats.ObjectFieldStats = new Dictionary();
+ }
+
+ foreach (KeyValuePair field in value.AsObject())
+ {
+ if (!stats.ObjectFieldStats.TryGetValue(field.Key, out TypeStats fieldStats))
+ {
+ fieldStats = new TypeStats();
+ stats.ObjectFieldStats[field.Key] = fieldStats;
+ }
+ CollectStats(field.Value, fieldStats, depth + 1, maxDepth);
+ }
+ }
+ else if (type == ShredType.Array && depth <= maxDepth && value.IsArray)
+ {
+ if (stats.ArrayElementStats == null)
+ {
+ stats.ArrayElementStats = new TypeStats();
+ }
+
+ foreach (VariantValue element in value.AsArray())
+ {
+ CollectStats(element, stats.ArrayElementStats, depth + 1, maxDepth);
+ }
+ }
+ }
+
+ private ShredSchema BuildSchema(TypeStats stats, int totalCount, ShredOptions options, int depth)
+ {
+ // Find the dominant type.
+ ShredType dominantType = ShredType.None;
+ int dominantCount = 0;
+ int nonNullCount = 0;
+
+ foreach (KeyValuePair entry in stats.TypeCounts)
+ {
+ if (entry.Key != ShredType.None)
+ {
+ nonNullCount += entry.Value;
+ if (entry.Value > dominantCount)
+ {
+ dominantCount = entry.Value;
+ dominantType = entry.Key;
+ }
+ }
+ }
+
+ if (nonNullCount == 0)
+ {
+ return ShredSchema.Unshredded();
+ }
+
+ // Check type consistency.
+ double consistency = (double)dominantCount / nonNullCount;
+ if (consistency < options.MinTypeConsistency)
+ {
+ return ShredSchema.Unshredded();
+ }
+
+ if (dominantType == ShredType.Object && stats.ObjectFieldStats != null)
+ {
+ return BuildObjectSchema(stats, totalCount, dominantCount, options, depth);
+ }
+
+ if (dominantType == ShredType.Array && stats.ArrayElementStats != null)
+ {
+ return BuildArraySchema(stats, dominantCount, options, depth);
+ }
+
+ // Object/Array without collected sub-stats (e.g., maxDepth reached) — can't shred further.
+ if (dominantType == ShredType.Object || dominantType == ShredType.Array)
+ {
+ return ShredSchema.Unshredded();
+ }
+
+ // Primitive type — shred as that type.
+ return ShredSchema.Primitive(dominantType);
+ }
+
+ private ShredSchema BuildObjectSchema(TypeStats stats, int totalCount, int objectCount, ShredOptions options, int depth)
+ {
+ Dictionary fields = new Dictionary();
+
+ foreach (KeyValuePair fieldEntry in stats.ObjectFieldStats)
+ {
+ // Check field frequency: how often does this field appear relative to the number of objects?
+ int fieldAppearances = 0;
+ foreach (KeyValuePair tc in fieldEntry.Value.TypeCounts)
+ {
+ fieldAppearances += tc.Value;
+ }
+
+ double frequency = (double)fieldAppearances / objectCount;
+ if (frequency < options.MinFieldFrequency)
+ {
+ continue;
+ }
+
+ ShredSchema fieldSchema = BuildSchema(fieldEntry.Value, fieldAppearances, options, depth + 1);
+ fields[fieldEntry.Key] = fieldSchema;
+ }
+
+ if (fields.Count == 0)
+ {
+ return ShredSchema.Unshredded();
+ }
+
+ return ShredSchema.ForObject(fields);
+ }
+
+ private ShredSchema BuildArraySchema(TypeStats stats, int arrayCount, ShredOptions options, int depth)
+ {
+ // Count total elements across all arrays.
+ int totalElements = 0;
+ foreach (KeyValuePair entry in stats.ArrayElementStats.TypeCounts)
+ {
+ totalElements += entry.Value;
+ }
+
+ if (totalElements == 0)
+ {
+ return ShredSchema.Unshredded();
+ }
+
+ ShredSchema elementSchema = BuildSchema(stats.ArrayElementStats, totalElements, options, depth + 1);
+ return ShredSchema.ForArray(elementSchema);
+ }
+
+ private sealed class TypeStats
+ {
+ public Dictionary TypeCounts { get; } = new Dictionary();
+ public Dictionary ObjectFieldStats { get; set; }
+ public TypeStats ArrayElementStats { get; set; }
+ }
+ }
+}
diff --git a/src/Apache.Arrow.Operations/Shredding/ShredType.cs b/src/Apache.Arrow.Operations/Shredding/ShredType.cs
new file mode 100644
index 00000000..19f1a98c
--- /dev/null
+++ b/src/Apache.Arrow.Operations/Shredding/ShredType.cs
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow.Operations.Shredding
+{
+ ///
+ /// Describes the type that a shredded typed_value column expects.
+ /// Maps variant primitive types to the logical Parquet types used for shredding.
+ ///
+ public enum ShredType : byte
+ {
+ /// No typed_value column — all values go to the binary value column.
+ None = 0,
+
+ /// Boolean (Parquet BOOLEAN).
+ Boolean,
+
+ /// 8-bit signed integer (Parquet INT32 with INT_8 annotation).
+ Int8,
+
+ /// 16-bit signed integer (Parquet INT32 with INT_16 annotation).
+ Int16,
+
+ /// 32-bit signed integer (Parquet INT32).
+ Int32,
+
+ /// 64-bit signed integer (Parquet INT64).
+ Int64,
+
+ /// 32-bit float (Parquet FLOAT).
+ Float,
+
+ /// 64-bit double (Parquet DOUBLE).
+ Double,
+
+ /// Decimal with 4-byte unscaled value.
+ Decimal4,
+
+ /// Decimal with 8-byte unscaled value.
+ Decimal8,
+
+ /// Decimal with 16-byte unscaled value.
+ Decimal16,
+
+ /// Date as days since epoch (Parquet DATE).
+ Date,
+
+ /// Timestamp with UTC microseconds (Parquet TIMESTAMP with isAdjustedToUTC=true, MICROS).
+ Timestamp,
+
+ /// Timestamp without timezone, microseconds (Parquet TIMESTAMP with isAdjustedToUTC=false, MICROS).
+ TimestampNtz,
+
+ /// Time without timezone, microseconds (Parquet TIME with MICROS).
+ TimeNtz,
+
+ /// Timestamp with UTC nanoseconds.
+ TimestampTzNanos,
+
+ /// Timestamp without timezone, nanoseconds.
+ TimestampNtzNanos,
+
+ /// UTF-8 string (Parquet BINARY with STRING logical type).
+ String,
+
+ /// Binary data (Parquet BINARY).
+ Binary,
+
+ /// UUID (Parquet FIXED_LEN_BYTE_ARRAY(16) with UUID logical type).
+ Uuid,
+
+ /// Shredded as an object group with named sub-fields.
+ Object,
+
+ /// Shredded as an array (Parquet LIST).
+ Array,
+ }
+}
diff --git a/src/Apache.Arrow.Operations/Shredding/ShreddedArray.cs b/src/Apache.Arrow.Operations/Shredding/ShreddedArray.cs
new file mode 100644
index 00000000..2dcea000
--- /dev/null
+++ b/src/Apache.Arrow.Operations/Shredding/ShreddedArray.cs
@@ -0,0 +1,153 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using Apache.Arrow;
+using Apache.Arrow.Scalars.Variant;
+
+namespace Apache.Arrow.Operations.Shredding
+{
+ ///
+ /// Reader for a single row of a shredded-array slot. The underlying storage
+ /// is a list of element groups (each a {value, typed_value} struct).
+ ///
+ public ref struct ShreddedArray
+ {
+ private readonly ShredSchema _schema;
+ private readonly ReadOnlySpan _metadata;
+ // The typed_value list (elements are {value, typed_value} structs). May be null.
+ private readonly ListArray _list;
+ // The residual binary column at the array level (for unshredded arrays). May be null.
+ private readonly IArrowArray _residual;
+ private readonly int _row;
+
+ internal ShreddedArray(
+ ShredSchema schema,
+ ReadOnlySpan metadata,
+ ListArray list,
+ IArrowArray residual,
+ int row)
+ {
+ _schema = schema;
+ _metadata = metadata;
+ _list = list;
+ _residual = residual;
+ _row = row;
+ }
+
+ ///
+ /// True when the typed list is populated at this row (the array is stored
+ /// element-by-element in the shredded column).
+ ///
+ public bool IsTypedList => _list != null && !_list.IsNull(_row);
+
+ ///
+ /// The number of shredded elements at this row. Only valid when
+ /// is true.
+ ///
+ /// If the array is stored as a residual.
+ public int ElementCount
+ {
+ get
+ {
+ if (!IsTypedList)
+ {
+ throw new InvalidOperationException(
+ "Array at this row is stored as a residual (not a typed list). " +
+ "Use TryGetResidualReader and iterate via VariantArrayReader.");
+ }
+ return _list.ValueOffsets[_row + 1] - _list.ValueOffsets[_row];
+ }
+ }
+
+ ///
+ /// Gets a reader for the element at position
+ /// . Only valid when is true.
+ ///
+ public ShreddedVariant GetElement(int index)
+ {
+ if (!IsTypedList)
+ {
+ throw new InvalidOperationException(
+ "Array at this row is stored as a residual (not a typed list).");
+ }
+ int start = _list.ValueOffsets[_row];
+ int end = _list.ValueOffsets[_row + 1];
+ if ((uint)index >= (uint)(end - start))
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+ StructArray elementGroup = (StructArray)_list.Values;
+ return ShreddingHelpers.BuildSlot(_schema.ArrayElement, _metadata, elementGroup, start + index);
+ }
+
+ ///
+ /// If the array is stored as a residual at this row (not shredded), returns
+ /// a over the residual bytes. Callers can then
+ /// inspect the array via VariantArrayReader.
+ ///
+ public bool TryGetResidualReader(out VariantReader reader)
+ {
+ if (_residual == null || _residual.IsNull(_row))
+ {
+ reader = default;
+ return false;
+ }
+ ReadOnlySpan bytes = ((BinaryArray)_residual).GetBytes(_row, out _);
+ reader = new VariantReader(_metadata, bytes);
+ return true;
+ }
+
+ ///
+ /// Materializes the array into a . If the typed
+ /// list is null at this row, falls back to the residual binary (the array
+ /// was stored unshredded for this row). When neither is populated, the
+ /// slot encodes a variant null — consistent with
+ /// and .
+ ///
+ public VariantValue ToVariantValue()
+ {
+ if (_list != null && !_list.IsNull(_row))
+ {
+ int start = _list.ValueOffsets[_row];
+ int end = _list.ValueOffsets[_row + 1];
+ int count = end - start;
+
+ StructArray elementGroup = (StructArray)_list.Values;
+ List elements = new List(count);
+ for (int i = start; i < end; i++)
+ {
+ // For array elements, a both-null slot encodes a variant null
+ // (arrays cannot contain "missing"). ShreddedVariant.ToVariantValue
+ // already returns VariantValue.Null for a missing slot.
+ ShreddedVariant slot = ShreddingHelpers.BuildSlot(_schema.ArrayElement, _metadata, elementGroup, i);
+ elements.Add(slot.ToVariantValue());
+ }
+ return VariantValue.FromArray(elements);
+ }
+
+ // No typed list at this row — decode from the residual if present,
+ // otherwise the slot is variant null.
+ if (_residual == null || _residual.IsNull(_row))
+ {
+ return VariantValue.Null;
+ }
+ BinaryArray residualBinary = (BinaryArray)_residual;
+ ReadOnlySpan bytes = residualBinary.GetBytes(_row, out _);
+ return new VariantReader(_metadata, bytes).ToVariantValue();
+ }
+ }
+}
diff --git a/src/Apache.Arrow.Operations/Shredding/ShreddedObject.cs b/src/Apache.Arrow.Operations/Shredding/ShreddedObject.cs
new file mode 100644
index 00000000..07e23811
--- /dev/null
+++ b/src/Apache.Arrow.Operations/Shredding/ShreddedObject.cs
@@ -0,0 +1,172 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using Apache.Arrow;
+using Apache.Arrow.Scalars.Variant;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow.Operations.Shredding
+{
+ ///
+ /// Reader for a single row of a shredded-object slot. Provides field-wise
+ /// access to both typed sub-columns and residual unshredded fields.
+ ///
+ public ref struct ShreddedObject
+ {
+ private readonly ShredSchema _schema;
+ private readonly ReadOnlySpan _metadata;
+ // The typed_value struct (one field per shredded field, each itself a {value, typed_value} struct).
+ // May be null if this row's typed_value column is null (i.e., the whole slot is in residual).
+ private readonly StructArray _fields;
+ // The residual value at this level (a binary column holding unshredded fields). May be null.
+ private readonly IArrowArray _residual;
+ private readonly int _index;
+
+ internal ShreddedObject(
+ ShredSchema schema,
+ ReadOnlySpan metadata,
+ StructArray typedValueStruct,
+ IArrowArray residualValue,
+ int index)
+ {
+ _schema = schema;
+ _metadata = metadata;
+ _fields = typedValueStruct;
+ _residual = residualValue;
+ _index = index;
+ }
+
+ /// The names of the shredded fields, in schema order.
+ public IEnumerable FieldNames => _schema.ObjectFields.Keys;
+
+ ///
+ /// Gets the shredded reader for a named field. The field must exist in the schema.
+ ///
+ /// If is not a shredded field.
+ public ShreddedVariant GetField(string name)
+ {
+ if (!TryGetField(name, out ShreddedVariant field))
+ {
+ throw new KeyNotFoundException($"Field '{name}' is not in the shredded object schema.");
+ }
+ return field;
+ }
+
+ ///
+ /// Tries to get a reader for a shredded sub-field by name. Returns false if
+ /// isn't a shredded field (it may still exist in the
+ /// residual — use to inspect).
+ ///
+ public bool TryGetField(string name, out ShreddedVariant field)
+ {
+ if (!_schema.ObjectFields.TryGetValue(name, out ShredSchema fieldSchema))
+ {
+ field = default;
+ return false;
+ }
+ if (_fields == null || _fields.IsNull(_index))
+ {
+ // typed_value is null at this row — the field is effectively missing
+ // from the typed column. Return a slot with no typed/residual set.
+ field = new ShreddedVariant(fieldSchema, _metadata, null, null, _index);
+ return true;
+ }
+ StructType fieldsStructType = (StructType)_fields.Data.DataType;
+ int fieldIdx = fieldsStructType.GetFieldIndex(name);
+ StructArray elementGroup = (StructArray)_fields.Fields[fieldIdx];
+ field = ShreddingHelpers.BuildSlot(fieldSchema, _metadata, elementGroup, _index);
+ return true;
+ }
+
+ ///
+ /// If the object's residual binary is populated at this row, returns a
+ /// over it. The residual holds whatever fields
+ /// were not shredded (or, for a non-object row, the whole value).
+ ///
+ public bool TryGetResidualReader(out VariantReader reader)
+ {
+ if (_residual == null || _residual.IsNull(_index))
+ {
+ reader = default;
+ return false;
+ }
+ ReadOnlySpan bytes = ((BinaryArray)_residual).GetBytes(_index, out _);
+ reader = new VariantReader(_metadata, bytes);
+ return true;
+ }
+
+ ///
+ /// Materializes the whole shredded object into a ,
+ /// merging typed-column fields with residual unshredded fields. When the
+ /// typed_value column is null at this row, the residual is returned
+ /// as-is (it may be any variant type, not just an object).
+ ///
+ public VariantValue ToVariantValue()
+ {
+ bool typedPopulated = _fields != null && !_fields.IsNull(_index);
+ bool residualPopulated = _residual != null && !_residual.IsNull(_index);
+
+ if (!typedPopulated && !residualPopulated)
+ {
+ return VariantValue.Null;
+ }
+
+ // No shredded fields at this row — whatever is in the residual IS the value.
+ if (!typedPopulated)
+ {
+ BinaryArray binary = (BinaryArray)_residual;
+ ReadOnlySpan bytes = binary.GetBytes(_index, out _);
+ return new VariantReader(_metadata, bytes).ToVariantValue();
+ }
+
+ Dictionary fields = new Dictionary();
+
+ // Shredded fields (from typed_value).
+ StructType fieldsStructType = (StructType)_fields.Data.DataType;
+ foreach (KeyValuePair entry in _schema.ObjectFields)
+ {
+ int fieldIdx = fieldsStructType.GetFieldIndex(entry.Key);
+ StructArray elementGroup = (StructArray)_fields.Fields[fieldIdx];
+ ShreddedVariant slot = ShreddingHelpers.BuildSlot(entry.Value, _metadata, elementGroup, _index);
+ if (!slot.IsMissing)
+ {
+ fields[entry.Key] = slot.ToVariantValue();
+ }
+ }
+
+ // Partially shredded object — merge residual unshredded fields.
+ if (residualPopulated)
+ {
+ BinaryArray residualBinary = (BinaryArray)_residual;
+ ReadOnlySpan residualBytes = residualBinary.GetBytes(_index, out _);
+ VariantReader residualReader = new VariantReader(_metadata, residualBytes);
+ if (!residualReader.IsObject)
+ {
+ throw new InvalidOperationException(
+ "Residual value for a partially shredded object must itself be a variant object.");
+ }
+ VariantValue residual = residualReader.ToVariantValue();
+ foreach (KeyValuePair kv in residual.AsObject())
+ {
+ fields[kv.Key] = kv.Value;
+ }
+ }
+
+ return VariantValue.FromObject(fields);
+ }
+ }
+}
diff --git a/src/Apache.Arrow.Operations/Shredding/ShreddedVariant.cs b/src/Apache.Arrow.Operations/Shredding/ShreddedVariant.cs
new file mode 100644
index 00000000..5ef35759
--- /dev/null
+++ b/src/Apache.Arrow.Operations/Shredding/ShreddedVariant.cs
@@ -0,0 +1,358 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Data.SqlTypes;
+using Apache.Arrow.Arrays;
+using Apache.Arrow.Scalars.Variant;
+
+namespace Apache.Arrow.Operations.Shredding
+{
+ ///
+ /// Zero-copy reader for a single row of a (possibly shredded) variant column.
+ /// Composes with the for this position to expose the
+ /// typed columns and residual bytes side-by-side, or to materialize the logical
+ /// value on demand.
+ ///
+ /// A does not own any Arrow buffers; it is only
+ /// valid while the underlying Arrow arrays are alive.
+ ///
+ ///
+ public ref struct ShreddedVariant
+ {
+ private readonly ShredSchema _schema;
+ private readonly ReadOnlySpan _metadata;
+ // _valueArray is the residual binary column at this level (may be null).
+ private readonly IArrowArray _valueArray;
+ // _typedValueArray is the typed column at this level (may be null if no shredding here).
+ private readonly IArrowArray _typedValueArray;
+ private readonly int _index;
+
+ internal ShreddedVariant(
+ ShredSchema schema,
+ ReadOnlySpan metadata,
+ IArrowArray valueArray,
+ IArrowArray typedValueArray,
+ int index)
+ {
+ _schema = schema ?? throw new ArgumentNullException(nameof(schema));
+ _metadata = metadata;
+ _valueArray = valueArray;
+ _typedValueArray = typedValueArray;
+ _index = index;
+ }
+
+ /// The schema describing how this slot is shredded.
+ public ShredSchema Schema => _schema;
+
+ /// The column-level variant metadata.
+ public ReadOnlySpan Metadata => _metadata;
+
+ /// True when the residual value column has a value at this index.
+ public bool HasResidual => _valueArray != null && !_valueArray.IsNull(_index);
+
+ /// True when the typed_value column has a value at this index.
+ public bool HasTypedValue => _typedValueArray != null && !_typedValueArray.IsNull(_index);
+
+ ///
+ /// True when neither the residual nor the typed column is populated at this index
+ /// — valid only for sub-fields of shredded objects.
+ ///
+ public bool IsMissing => !HasResidual && !HasTypedValue;
+
+ ///
+ /// Materializes this slot into a logical , merging
+ /// typed-column values with residual bytes per the shredding spec.
+ ///
+ /// If the slot is missing.
+ public VariantValue ToVariantValue()
+ {
+ // Both-null at this slot means the logical value is variant null. (The
+ // "missing" encoding — omitting the field entirely from the output — is
+ // a choice made by the container: see ShreddedObject, which uses
+ // IsMissing to decide whether to drop a sub-field.)
+ if (IsMissing)
+ {
+ return VariantValue.Null;
+ }
+
+ switch (_schema.TypedValueType)
+ {
+ case ShredType.None:
+ return ReadResidual();
+
+ case ShredType.Object:
+ return GetObject().ToVariantValue();
+
+ case ShredType.Array:
+ return GetArray().ToVariantValue();
+
+ default:
+ // Primitive shredding. Per the Parquet variant shredding spec, a
+ // primitive slot may have at most one of value / typed_value set.
+ // If both are populated at the same row, the shredded data is
+ // invalid and implementations should reject it.
+ if (HasTypedValue)
+ {
+ if (HasResidual)
+ {
+ throw new InvalidOperationException(
+ "Invalid shredded variant: primitive slot has both 'value' and 'typed_value' populated.");
+ }
+ return ReadTypedPrimitive();
+ }
+ return ReadResidual();
+ }
+ }
+
+ ///
+ /// If the residual column has a value at this index, returns a
+ /// over its bytes.
+ ///
+ public bool TryGetResidualReader(out VariantReader reader)
+ {
+ if (HasResidual)
+ {
+ BinaryArray binary = (BinaryArray)_valueArray;
+ ReadOnlySpan bytes = binary.GetBytes(_index, out _);
+ reader = new VariantReader(_metadata, bytes);
+ return true;
+ }
+ reader = default;
+ return false;
+ }
+
+ ///
+ /// Reader for a shredded object at this slot. Valid only when the schema's
+ /// is .
+ ///
+ public ShreddedObject GetObject()
+ {
+ if (_schema.TypedValueType != ShredType.Object)
+ {
+ throw new InvalidOperationException(
+ $"Slot is not shredded as an object (schema type {_schema.TypedValueType}).");
+ }
+ return new ShreddedObject(_schema, _metadata, _typedValueArray as StructArray, _valueArray, _index);
+ }
+
+ ///
+ /// Reader for a shredded array at this slot. Valid only when the schema's
+ /// is .
+ ///
+ public ShreddedArray GetArray()
+ {
+ if (_schema.TypedValueType != ShredType.Array)
+ {
+ throw new InvalidOperationException(
+ $"Slot is not shredded as an array (schema type {_schema.TypedValueType}).");
+ }
+ return new ShreddedArray(_schema, _metadata, _typedValueArray as ListArray, _valueArray, _index);
+ }
+
+ private VariantValue ReadResidual()
+ {
+ if (!HasResidual)
+ {
+ throw new InvalidOperationException("No residual value to read.");
+ }
+ BinaryArray binary = (BinaryArray)_valueArray;
+ ReadOnlySpan bytes = binary.GetBytes(_index, out _);
+ return new VariantReader(_metadata, bytes).ToVariantValue();
+ }
+
+ // ---------------------------------------------------------------
+ // Typed-column accessors — zero-copy access to the shredded value
+ // without materializing a VariantValue.
+ //
+ // Each getter requires:
+ // (a) the slot's schema to match the requested type, and
+ // (b) the typed_value column to be populated at this index.
+ // Otherwise it throws. Callers that want automatic residual fallback
+ // should use ToVariantValue instead.
+ // ---------------------------------------------------------------
+
+ /// Reads the shredded boolean value at this slot.
+ public bool GetBoolean() => ((BooleanArray)RequireTyped(ShredType.Boolean)).GetValue(_index).Value;
+
+ /// Reads the shredded 8-bit signed integer at this slot.
+ public sbyte GetInt8() => ((Int8Array)RequireTyped(ShredType.Int8)).GetValue(_index).Value;
+
+ /// Reads the shredded 16-bit signed integer at this slot.
+ public short GetInt16() => ((Int16Array)RequireTyped(ShredType.Int16)).GetValue(_index).Value;
+
+ /// Reads the shredded 32-bit signed integer at this slot.
+ public int GetInt32() => ((Int32Array)RequireTyped(ShredType.Int32)).GetValue(_index).Value;
+
+ /// Reads the shredded 64-bit signed integer at this slot.
+ public long GetInt64() => ((Int64Array)RequireTyped(ShredType.Int64)).GetValue(_index).Value;
+
+ /// Reads the shredded 32-bit float at this slot.
+ public float GetFloat() => ((FloatArray)RequireTyped(ShredType.Float)).GetValue(_index).Value;
+
+ /// Reads the shredded 64-bit double at this slot.
+ public double GetDouble() => ((DoubleArray)RequireTyped(ShredType.Double)).GetValue(_index).Value;
+
+ ///
+ /// Reads the shredded decimal value at this slot. Works for Decimal4, Decimal8,
+ /// and Decimal16 shred types, regardless of whether the Arrow column is backed
+ /// by Decimal32Array, Decimal64Array, or Decimal128Array.
+ ///
+ public decimal GetDecimal()
+ {
+ RequireDecimalSchema();
+ if (!HasTypedValue) ThrowNoTyped();
+ IArrowArray arr = UnwrapExtension(_typedValueArray);
+ switch (arr)
+ {
+ case Decimal32Array d32: return d32.GetValue(_index).Value;
+ case Decimal64Array d64: return d64.GetValue(_index).Value;
+ case Decimal128Array d128: return d128.GetValue(_index).Value;
+ default:
+ throw new InvalidOperationException(
+ $"Shredded decimal column is backed by {arr.GetType().Name}, which is not a supported decimal array type.");
+ }
+ }
+
+ ///
+ /// Reads the shredded decimal value at this slot. Works for Decimal4, Decimal8,
+ /// and Decimal16 shred types, regardless of whether the Arrow column is backed
+ /// by Decimal32Array, Decimal64Array, or Decimal128Array.
+ ///
+ public SqlDecimal GetSqlDecimal()
+ {
+ RequireDecimalSchema();
+ if (!HasTypedValue) ThrowNoTyped();
+ IArrowArray arr = UnwrapExtension(_typedValueArray);
+ switch (arr)
+ {
+ case Decimal32Array d32: return d32.GetValue(_index).Value;
+ case Decimal64Array d64: return d64.GetValue(_index).Value;
+ case Decimal128Array d128: return d128.GetSqlDecimal(_index).Value;
+ default:
+ throw new InvalidOperationException(
+ $"Shredded decimal column is backed by {arr.GetType().Name}, which is not a supported decimal array type.");
+ }
+ }
+
+ /// Reads the shredded date (days since epoch) at this slot.
+ public int GetDateDays() => ((Date32Array)RequireTyped(ShredType.Date)).GetValue(_index).Value;
+
+ /// Reads the shredded timestamp (microseconds since epoch, UTC) at this slot.
+ public long GetTimestampMicros() => ((TimestampArray)RequireTyped(ShredType.Timestamp)).GetValue(_index).Value;
+
+ /// Reads the shredded timestamp-without-tz (microseconds since epoch) at this slot.
+ public long GetTimestampNtzMicros() => ((TimestampArray)RequireTyped(ShredType.TimestampNtz)).GetValue(_index).Value;
+
+ /// Reads the shredded time-without-tz (microseconds since midnight) at this slot.
+ public long GetTimeNtzMicros() => ((Time64Array)RequireTyped(ShredType.TimeNtz)).GetValue(_index).Value;
+
+ /// Reads the shredded timestamp-with-tz (nanoseconds since epoch) at this slot.
+ public long GetTimestampTzNanos() => ((TimestampArray)RequireTyped(ShredType.TimestampTzNanos)).GetValue(_index).Value;
+
+ /// Reads the shredded timestamp-without-tz (nanoseconds since epoch) at this slot.
+ public long GetTimestampNtzNanos() => ((TimestampArray)RequireTyped(ShredType.TimestampNtzNanos)).GetValue(_index).Value;
+
+ /// Reads the shredded string value at this slot.
+ public string GetString() => ((StringArray)RequireTyped(ShredType.String)).GetString(_index);
+
+ /// Reads the shredded binary value at this slot as a byte span.
+ public ReadOnlySpan GetBinaryBytes() => ((BinaryArray)RequireTyped(ShredType.Binary)).GetBytes(_index);
+
+ /// Reads the shredded UUID at this slot.
+ public Guid GetUuid()
+ {
+ FixedSizeBinaryArray arr = (FixedSizeBinaryArray)RequireTyped(ShredType.Uuid);
+ ReadOnlySpan raw = arr.GetBytes(_index);
+#if NET8_0_OR_GREATER
+ return new Guid(raw, bigEndian: true);
+#else
+ byte[] bytes = new byte[16];
+ bytes[0] = raw[3]; bytes[1] = raw[2]; bytes[2] = raw[1]; bytes[3] = raw[0];
+ bytes[4] = raw[5]; bytes[5] = raw[4];
+ bytes[6] = raw[7]; bytes[7] = raw[6];
+ raw.Slice(8, 8).CopyTo(bytes.AsSpan(8));
+ return new Guid(bytes);
+#endif
+ }
+
+ /// Reads the shredded UUID at this slot as raw big-endian (RFC 4122) bytes.
+ public ReadOnlySpan GetUuidBytes()
+ => ((FixedSizeBinaryArray)RequireTyped(ShredType.Uuid)).GetBytes(_index);
+
+ // ---------------------------------------------------------------
+ // Primitive dispatch for internal materialization. Delegates to the
+ // typed getters so the two paths stay in sync.
+ // ---------------------------------------------------------------
+
+ private VariantValue ReadTypedPrimitive()
+ {
+ switch (_schema.TypedValueType)
+ {
+ case ShredType.Boolean: return VariantValue.FromBoolean(GetBoolean());
+ case ShredType.Int8: return VariantValue.FromInt8(GetInt8());
+ case ShredType.Int16: return VariantValue.FromInt16(GetInt16());
+ case ShredType.Int32: return VariantValue.FromInt32(GetInt32());
+ case ShredType.Int64: return VariantValue.FromInt64(GetInt64());
+ case ShredType.Float: return VariantValue.FromFloat(GetFloat());
+ case ShredType.Double: return VariantValue.FromDouble(GetDouble());
+ case ShredType.Decimal4: return VariantValue.FromDecimal4(GetDecimal());
+ case ShredType.Decimal8: return VariantValue.FromDecimal8(GetDecimal());
+ case ShredType.Decimal16: return VariantValue.FromDecimal16(GetSqlDecimal());
+ case ShredType.Date: return VariantValue.FromDate(GetDateDays());
+ case ShredType.Timestamp: return VariantValue.FromTimestamp(GetTimestampMicros());
+ case ShredType.TimestampNtz: return VariantValue.FromTimestampNtz(GetTimestampNtzMicros());
+ case ShredType.TimeNtz: return VariantValue.FromTimeNtz(GetTimeNtzMicros());
+ case ShredType.TimestampTzNanos: return VariantValue.FromTimestampTzNanos(GetTimestampTzNanos());
+ case ShredType.TimestampNtzNanos: return VariantValue.FromTimestampNtzNanos(GetTimestampNtzNanos());
+ case ShredType.String: return VariantValue.FromString(GetString());
+ case ShredType.Binary: return VariantValue.FromBinary(GetBinaryBytes().ToArray());
+ case ShredType.Uuid: return VariantValue.FromUuid(GetUuid());
+ default:
+ throw new InvalidOperationException(
+ $"Unexpected primitive shred type {_schema.TypedValueType}.");
+ }
+ }
+
+ private IArrowArray RequireTyped(ShredType expected)
+ {
+ if (_schema.TypedValueType != expected)
+ {
+ throw new InvalidOperationException(
+ $"Slot schema is {_schema.TypedValueType}, not {expected}.");
+ }
+ if (!HasTypedValue) ThrowNoTyped();
+ return UnwrapExtension(_typedValueArray);
+ }
+
+ private void RequireDecimalSchema()
+ {
+ if (_schema.TypedValueType != ShredType.Decimal4 &&
+ _schema.TypedValueType != ShredType.Decimal8 &&
+ _schema.TypedValueType != ShredType.Decimal16)
+ {
+ throw new InvalidOperationException(
+ $"Slot schema is {_schema.TypedValueType}, not a decimal type.");
+ }
+ }
+
+ private void ThrowNoTyped() =>
+ throw new InvalidOperationException(
+ "No typed_value at this index (check HasTypedValue first, or use ToVariantValue for residual fallback).");
+
+ private static IArrowArray UnwrapExtension(IArrowArray arr) =>
+ arr is ExtensionArray ext ? ext.Storage : arr;
+ }
+}
diff --git a/src/Apache.Arrow.Operations/Shredding/ShreddedVariantArrayBuilder.cs b/src/Apache.Arrow.Operations/Shredding/ShreddedVariantArrayBuilder.cs
new file mode 100644
index 00000000..44b6de59
--- /dev/null
+++ b/src/Apache.Arrow.Operations/Shredding/ShreddedVariantArrayBuilder.cs
@@ -0,0 +1,513 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using Apache.Arrow;
+using Apache.Arrow.Arrays;
+using Apache.Arrow.Memory;
+using Apache.Arrow.Scalars.Variant;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow.Operations.Shredding
+{
+ ///
+ /// Assembles a shredded from pre-shredded rows.
+ /// Produces an Arrow struct with shared metadata, residual value,
+ /// and the typed_value tree whose Arrow shape matches the .
+ ///
+ public static class ShreddedVariantArrayBuilder
+ {
+ ///
+ /// Builds a shredded from the output of
+ /// .
+ ///
+ /// The shredding schema applied to each row.
+ /// The column-level variant metadata (shared across rows).
+ /// Per-row shred results whose residual bytes reference .
+ /// Arrow memory allocator, or default if null.
+ public static VariantArray Build(
+ ShredSchema schema,
+ byte[] metadata,
+ IReadOnlyList rows,
+ MemoryAllocator allocator = null)
+ {
+ if (schema == null) throw new ArgumentNullException(nameof(schema));
+ if (metadata == null) throw new ArgumentNullException(nameof(metadata));
+ if (rows == null) throw new ArgumentNullException(nameof(rows));
+
+ int rowCount = rows.Count;
+
+ // metadata column: emit the shared bytes once per row. (A dictionary-encoded
+ // or run-end-encoded representation would compress this; VariantArray's reader
+ // already handles those, but for simplicity we emit the plain binary form.)
+ BinaryArray.Builder metadataBuilder = new BinaryArray.Builder();
+ for (int i = 0; i < rowCount; i++)
+ {
+ metadataBuilder.Append((ReadOnlySpan)metadata);
+ }
+ BinaryArray metadataArr = metadataBuilder.Build(allocator);
+
+ // value column: residual bytes (or null).
+ BinaryArray valueArr = BuildBinaryColumn(rows, allocator);
+
+ // typed_value column (if the schema has one).
+ List fields = new List
+ {
+ new Field("metadata", BinaryType.Default, false),
+ new Field("value", BinaryType.Default, true),
+ };
+ List children = new List { metadataArr, valueArr };
+
+ if (schema.TypedValueType != ShredType.None)
+ {
+ List