diff --git a/api/src/main/java/org/apache/iceberg/Schema.java b/api/src/main/java/org/apache/iceberg/Schema.java index 3d3f9975fad0..762d3d8721f8 100644 --- a/api/src/main/java/org/apache/iceberg/Schema.java +++ b/api/src/main/java/org/apache/iceberg/Schema.java @@ -28,6 +28,7 @@ import java.util.Map; import java.util.Set; import java.util.stream.Collectors; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Joiner; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.BiMap; @@ -54,9 +55,20 @@ public class Schema implements Serializable { private static final Joiner NEWLINE = Joiner.on('\n'); private static final String ALL_COLUMNS = "*"; private static final int DEFAULT_SCHEMA_ID = 0; - private static final int DEFAULT_VALUES_MIN_FORMAT_VERSION = 3; - private static final Map MIN_FORMAT_VERSIONS = - ImmutableMap.of(Type.TypeID.TIMESTAMP_NANO, 3, Type.TypeID.VARIANT, 3); + + @VisibleForTesting static final int DEFAULT_VALUES_MIN_FORMAT_VERSION = 3; + + @VisibleForTesting + static final Map MIN_FORMAT_VERSIONS = + ImmutableMap.of( + Type.TypeID.TIMESTAMP_NANO, + 3, + Type.TypeID.VARIANT, + 3, + Type.TypeID.GEOMETRY, + 3, + Type.TypeID.GEOGRAPHY, + 3); private final StructType struct; private final int schemaId; diff --git a/api/src/main/java/org/apache/iceberg/transforms/Identity.java b/api/src/main/java/org/apache/iceberg/transforms/Identity.java index eb613f803a68..2b88222d6a81 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Identity.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Identity.java @@ -24,13 +24,14 @@ import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.expressions.UnboundPredicate; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.Types; import org.apache.iceberg.util.SerializableFunction; class Identity implements Transform { - private static final Set UNSUPPORTED_TYPES = Set.of(Types.VariantType.get()); private static final Identity INSTANCE = new Identity<>(); + private static final Set UNSUPPORTED_TYPES = + ImmutableSet.of(Type.TypeID.VARIANT, Type.TypeID.GEOMETRY, Type.TypeID.GEOGRAPHY); private final Type type; @@ -42,7 +43,7 @@ class Identity implements Transform { @Deprecated public static Identity get(Type type) { Preconditions.checkArgument( - !UNSUPPORTED_TYPES.contains(type), "Unsupported type for identity: %s", type); + !UNSUPPORTED_TYPES.contains(type.typeId()), "Unsupported type for identity: %s", type); return new Identity<>(type); } @@ -88,6 +89,9 @@ public SerializableFunction bind(Type type) { @Override public boolean canTransform(Type maybePrimitive) { + if (UNSUPPORTED_TYPES.contains(maybePrimitive.typeId())) { + return false; + } return maybePrimitive.isPrimitiveType(); } diff --git a/api/src/main/java/org/apache/iceberg/types/AssignFreshIds.java b/api/src/main/java/org/apache/iceberg/types/AssignFreshIds.java index f6422671bacb..75055cddc197 100644 --- a/api/src/main/java/org/apache/iceberg/types/AssignFreshIds.java +++ b/api/src/main/java/org/apache/iceberg/types/AssignFreshIds.java @@ -125,8 +125,8 @@ public Type map(Types.MapType map, Supplier keyFuture, Supplier valu } @Override - public Type variant() { - return Types.VariantType.get(); + public Type variant(Types.VariantType variant) { + return variant; } @Override diff --git a/api/src/main/java/org/apache/iceberg/types/AssignIds.java b/api/src/main/java/org/apache/iceberg/types/AssignIds.java index 68588f581adc..b2f72751eb89 100644 --- a/api/src/main/java/org/apache/iceberg/types/AssignIds.java +++ b/api/src/main/java/org/apache/iceberg/types/AssignIds.java @@ -92,6 +92,11 @@ public Type map(Types.MapType map, Supplier keyFuture, Supplier valu } } + @Override + public Type variant(Types.VariantType variant) { + return variant; + } + @Override public Type primitive(Type.PrimitiveType primitive) { return primitive; diff --git a/api/src/main/java/org/apache/iceberg/types/EdgeAlgorithm.java b/api/src/main/java/org/apache/iceberg/types/EdgeAlgorithm.java new file mode 100644 index 000000000000..781c9f636d86 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/types/EdgeAlgorithm.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.types; + +import java.util.Locale; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +/** The algorithm for interpolating edges. */ +public enum EdgeAlgorithm { + /** Edges are interpolated as geodesics on a sphere. */ + SPHERICAL, + /** See Vincenty's formulae */ + VINCENTY, + /** + * Thomas, Paul D. Spheroidal geodesics, reference systems, & local geometry. US Naval + * Oceanographic Office, 1970. + */ + THOMAS, + /** + * Thomas, Paul D. Mathematical models for navigation systems. US Naval Oceanographic Office, + * 1965. + */ + ANDOYER, + /** + * Karney, Charles + * FF. "Algorithms for geodesics." Journal of Geodesy 87 (2013): 43-55 , and GeographicLib. + */ + KARNEY; + + public static EdgeAlgorithm fromName(String algorithmName) { + Preconditions.checkNotNull(algorithmName, "Edge interpolation algorithm cannot be null"); + try { + return EdgeAlgorithm.valueOf(algorithmName.toUpperCase(Locale.ENGLISH)); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException( + String.format("Invalid edge interpolation algorithm: %s", algorithmName), e); + } + } + + @Override + public String toString() { + return name().toLowerCase(Locale.ENGLISH); + } +} diff --git a/api/src/main/java/org/apache/iceberg/types/ReassignDoc.java b/api/src/main/java/org/apache/iceberg/types/ReassignDoc.java index 9ce04a7bd103..328d81c42885 100644 --- a/api/src/main/java/org/apache/iceberg/types/ReassignDoc.java +++ b/api/src/main/java/org/apache/iceberg/types/ReassignDoc.java @@ -96,6 +96,11 @@ public Type map(Types.MapType map, Supplier keyTypeFuture, Supplier } } + @Override + public Type variant(Types.VariantType variant) { + return variant; + } + @Override public Type primitive(Type.PrimitiveType primitive) { return primitive; diff --git a/api/src/main/java/org/apache/iceberg/types/ReassignIds.java b/api/src/main/java/org/apache/iceberg/types/ReassignIds.java index 24c99bff0fda..6e7b42a474cf 100644 --- a/api/src/main/java/org/apache/iceberg/types/ReassignIds.java +++ b/api/src/main/java/org/apache/iceberg/types/ReassignIds.java @@ -162,8 +162,8 @@ public Type map(Types.MapType map, Supplier keyTypeFuture, Supplier } @Override - public Type variant() { - return Types.VariantType.get(); + public Type variant(Types.VariantType variant) { + return variant; } @Override diff --git a/api/src/main/java/org/apache/iceberg/types/Type.java b/api/src/main/java/org/apache/iceberg/types/Type.java index 306cf0eec625..b9492da75145 100644 --- a/api/src/main/java/org/apache/iceberg/types/Type.java +++ b/api/src/main/java/org/apache/iceberg/types/Type.java @@ -43,6 +43,8 @@ enum TypeID { FIXED(ByteBuffer.class), BINARY(ByteBuffer.class), DECIMAL(BigDecimal.class), + GEOMETRY(ByteBuffer.class), + GEOGRAPHY(ByteBuffer.class), STRUCT(StructLike.class), LIST(List.class), MAP(Map.class), @@ -81,6 +83,10 @@ default Types.MapType asMapType() { throw new IllegalArgumentException("Not a map type: " + this); } + default Types.VariantType asVariantType() { + throw new IllegalArgumentException("Not a variant type: " + this); + } + default boolean isNestedType() { return false; } diff --git a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java index 996556f171b9..1e55b5c9ad82 100644 --- a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java +++ b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java @@ -395,6 +395,10 @@ public static Type find(Schema schema, Predicate predicate) { return visit(schema, new FindTypeVisitor(predicate)); } + public static Type find(Type type, Predicate predicate) { + return visit(type, new FindTypeVisitor(predicate)); + } + public static boolean isPromotionAllowed(Type from, Type.PrimitiveType to) { // Warning! Before changing this function, make sure that the type change doesn't introduce // compatibility problems in partitioning. @@ -536,6 +540,12 @@ private static int estimateSize(Type type) { case BINARY: case VARIANT: return 80; + case GEOMETRY: + case GEOGRAPHY: + // 80 bytes is an approximate size for a polygon or linestring with 4 to 5 coordinates. + // This is a reasonable estimate for the size of a geometry or geography object without + // additional details. + return 80; case DECIMAL: // 12 (header) + (12 + 12 + 4) (BigInteger) + 4 (scale) = 44 bytes return 44; @@ -709,8 +719,8 @@ public T map(Types.MapType map, Supplier keyResult, Supplier valueResult) return null; } - public T variant() { - return null; + public T variant(Types.VariantType variant) { + throw new UnsupportedOperationException("Unsupported type: variant"); } public T primitive(Type.PrimitiveType primitive) { @@ -790,7 +800,7 @@ public static T visit(Type type, CustomOrderSchemaVisitor visitor) { new VisitFuture<>(map.valueType(), visitor)); case VARIANT: - return visitor.variant(); + return visitor.variant(type.asVariantType()); default: return visitor.primitive(type.asPrimitiveType()); diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index d4a08ac27e49..dd1e1fc840a6 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -55,9 +55,16 @@ private Types() {} .put(StringType.get().toString(), StringType.get()) .put(UUIDType.get().toString(), UUIDType.get()) .put(BinaryType.get().toString(), BinaryType.get()) + .put(GeometryType.crs84().toString(), GeometryType.crs84()) + .put(GeographyType.crs84().toString(), GeographyType.crs84()) .buildOrThrow(); private static final Pattern FIXED = Pattern.compile("fixed\\[\\s*(\\d+)\\s*\\]"); + private static final Pattern GEOMETRY_PARAMETERS = + Pattern.compile("geometry\\s*(?:\\(\\s*([^,]+?)\\s*\\))?", Pattern.CASE_INSENSITIVE); + private static final Pattern GEOGRAPHY_PARAMETERS = + Pattern.compile( + "geography\\s*(?:\\(\\s*([^,]+)\\s*(?:,\\s*(\\w*)\\s*)?\\))?", Pattern.CASE_INSENSITIVE); private static final Pattern DECIMAL = Pattern.compile("decimal\\(\\s*(\\d+)\\s*,\\s*(\\d+)\\s*\\)"); @@ -75,6 +82,21 @@ public static PrimitiveType fromPrimitiveString(String typeString) { return TYPES.get(lowerTypeString); } + Matcher geometry = GEOMETRY_PARAMETERS.matcher(typeString); + if (geometry.matches()) { + String crs = geometry.group(1); + return GeometryType.of(crs != null ? crs.trim() : null); + } + + Matcher geography = GEOGRAPHY_PARAMETERS.matcher(typeString); + if (geography.matches()) { + String crs = geography.group(1); + String algorithmName = geography.group(2); + EdgeAlgorithm algorithm = + algorithmName == null ? null : EdgeAlgorithm.fromName(algorithmName.trim()); + return GeographyType.of(crs != null ? crs.trim() : null, algorithm); + } + Matcher fixed = FIXED.matcher(lowerTypeString); if (fixed.matches()) { return FixedType.ofLength(Integer.parseInt(fixed.group(1))); @@ -442,6 +464,11 @@ public boolean isVariantType() { return true; } + @Override + public VariantType asVariantType() { + return this; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -516,6 +543,134 @@ public int hashCode() { } } + public static class GeometryType extends PrimitiveType { + + private final String crs; + + private GeometryType(String crs) { + if (crs != null) { + Preconditions.checkArgument(!crs.isEmpty(), "Invalid CRS: (empty string)"); + Preconditions.checkArgument( + crs.trim().equals(crs), "CRS must not have leading or trailing spaces: '%s'", crs); + } + this.crs = crs; + } + + public static GeometryType crs84() { + return new GeometryType(null); + } + + public static GeometryType of(String crs) { + return new GeometryType(crs); + } + + @Override + public TypeID typeId() { + return TypeID.GEOMETRY; + } + + public String crs() { + return crs; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } else if (!(o instanceof GeometryType)) { + return false; + } + + GeometryType that = (GeometryType) o; + return Objects.equals(crs, that.crs); + } + + @Override + public int hashCode() { + return Objects.hash(GeometryType.class, crs); + } + + @Override + public String toString() { + if (crs == null) { + return "geometry"; + } + + return String.format("geometry(%s)", crs); + } + } + + public static class GeographyType extends PrimitiveType { + + public static final String DEFAULT_CRS = "OGC:CRS84"; + + private final String crs; + private final EdgeAlgorithm algorithm; + + private GeographyType(String crs, EdgeAlgorithm algorithm) { + if (crs != null) { + Preconditions.checkArgument(!crs.isEmpty(), "Invalid CRS: (empty string)"); + Preconditions.checkArgument( + crs.trim().equals(crs), "CRS must not have leading or trailing spaces: '%s'", crs); + } + this.crs = crs; + this.algorithm = algorithm; + } + + public static GeographyType crs84() { + return new GeographyType(null, null); + } + + public static GeographyType of(String crs) { + return new GeographyType(crs, null); + } + + public static GeographyType of(String crs, EdgeAlgorithm algorithm) { + return new GeographyType(crs, algorithm); + } + + @Override + public TypeID typeId() { + return TypeID.GEOGRAPHY; + } + + public String crs() { + return crs; + } + + public EdgeAlgorithm algorithm() { + return algorithm; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } else if (!(o instanceof GeographyType)) { + return false; + } + + GeographyType that = (GeographyType) o; + return Objects.equals(crs, that.crs) && Objects.equals(algorithm, that.algorithm); + } + + @Override + public int hashCode() { + return Objects.hash(GeographyType.class, crs, algorithm); + } + + @Override + public String toString() { + if (algorithm != null) { + return String.format("geography(%s, %s)", crs != null ? crs : DEFAULT_CRS, algorithm); + } else if (crs != null) { + return String.format("geography(%s)", crs); + } else { + return "geography"; + } + } + } + public static class NestedField implements Serializable { public static NestedField optional(int id, String name, Type type) { return new NestedField(true, id, name, type, null, null, null); diff --git a/api/src/test/java/org/apache/iceberg/TestHelpers.java b/api/src/test/java/org/apache/iceberg/TestHelpers.java index ca3b1a908ac6..003e7835ed4b 100644 --- a/api/src/test/java/org/apache/iceberg/TestHelpers.java +++ b/api/src/test/java/org/apache/iceberg/TestHelpers.java @@ -53,6 +53,9 @@ public class TestHelpers { private TestHelpers() {} + public static final int MAX_FORMAT_VERSION = 3; + public static final int[] ALL_VERSIONS = IntStream.rangeClosed(1, MAX_FORMAT_VERSION).toArray(); + /** Wait in a tight check loop until system clock is past {@code timestampMillis} */ public static long waitUntilAfter(long timestampMillis) { long current = System.currentTimeMillis(); diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java index 971f5a9e4510..5b7d4fceffbf 100644 --- a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java +++ b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java @@ -26,6 +26,8 @@ import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.NestedField; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; public class TestPartitionSpecValidation { private static final Schema SCHEMA = @@ -36,7 +38,9 @@ public class TestPartitionSpecValidation { NestedField.required(4, "d", Types.TimestampType.withZone()), NestedField.required(5, "another_d", Types.TimestampType.withZone()), NestedField.required(6, "s", Types.StringType.get()), - NestedField.required(7, "v", Types.VariantType.get())); + NestedField.required(7, "v", Types.VariantType.get()), + NestedField.required(8, "geom", Types.GeometryType.crs84()), + NestedField.required(9, "geog", Types.GeographyType.crs84())); @Test public void testMultipleTimestampPartitions() { @@ -315,14 +319,23 @@ public void testAddPartitionFieldsWithAndWithoutFieldIds() { assertThat(spec.lastAssignedFieldId()).isEqualTo(1006); } - @Test - public void testVariantUnsupported() { + @ParameterizedTest + @MethodSource("unsupportedFieldsProvider") + public void testUnsupported(int fieldId, String partitionName, String expectedErrorMessage) { assertThatThrownBy( () -> PartitionSpec.builderFor(SCHEMA) - .add(7, 1005, "variant_partition1", Transforms.bucket(5)) + .add(fieldId, 1005, partitionName, Transforms.bucket(5)) .build()) .isInstanceOf(ValidationException.class) - .hasMessage("Cannot partition by non-primitive source field: variant"); + .hasMessage(expectedErrorMessage); + } + + private static Object[][] unsupportedFieldsProvider() { + return new Object[][] { + {7, "variant_partition1", "Cannot partition by non-primitive source field: variant"}, + {8, "geom_partition1", "Invalid source type geometry for transform: bucket[5]"}, + {9, "geog_partition1", "Invalid source type geography for transform: bucket[5]"} + }; } } diff --git a/api/src/test/java/org/apache/iceberg/TestSchema.java b/api/src/test/java/org/apache/iceberg/TestSchema.java index fec7343c5cbc..09733dcd6141 100644 --- a/api/src/test/java/org/apache/iceberg/TestSchema.java +++ b/api/src/test/java/org/apache/iceberg/TestSchema.java @@ -18,15 +18,36 @@ */ package org.apache.iceberg; +import static org.apache.iceberg.Schema.DEFAULT_VALUES_MIN_FORMAT_VERSION; +import static org.apache.iceberg.Schema.MIN_FORMAT_VERSIONS; +import static org.apache.iceberg.TestHelpers.MAX_FORMAT_VERSION; import static org.assertj.core.api.Assertions.assertThatCode; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import java.util.List; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.types.EdgeAlgorithm; +import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; -import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.FieldSource; +import org.junit.jupiter.params.provider.MethodSource; public class TestSchema { + + private static final List TEST_TYPES = + ImmutableList.of( + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), + Types.VariantType.get(), + Types.GeometryType.crs84(), + Types.GeometryType.of("srid:3857"), + Types.GeographyType.crs84(), + Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY)); + private static final Schema TS_NANO_CASES = new Schema( Types.NestedField.required(1, "id", Types.LongType.get()), @@ -64,27 +85,77 @@ public class TestSchema { .withWriteDefault("--") .build()); + private Schema generateTypeSchema(Type type) { + return new Schema( + Types.NestedField.required(1, "id", Types.LongType.get()), + Types.NestedField.optional(2, "top", type), + Types.NestedField.optional(3, "arr", Types.ListType.ofRequired(4, type)), + Types.NestedField.required( + 5, + "struct", + Types.StructType.of( + Types.NestedField.optional(6, "inner_op", type), + Types.NestedField.required(7, "inner_req", type), + Types.NestedField.optional( + 8, + "struct_arr", + Types.StructType.of(Types.NestedField.optional(9, "deep", type)))))); + } + + private static Stream unsupportedTypes() { + return TEST_TYPES.stream() + .flatMap( + type -> + IntStream.range(1, MIN_FORMAT_VERSIONS.get(type.typeId())) + .mapToObj(unsupportedVersion -> Arguments.of(type, unsupportedVersion))); + } + @ParameterizedTest - @ValueSource(ints = {1, 2}) - public void testUnsupportedTimestampNano(int formatVersion) { - assertThatThrownBy(() -> Schema.checkCompatibility(TS_NANO_CASES, formatVersion)) + @MethodSource("unsupportedTypes") + public void testUnsupportedTypes(Type type, int unsupportedVersion) { + assertThatThrownBy( + () -> Schema.checkCompatibility(generateTypeSchema(type), unsupportedVersion)) .isInstanceOf(IllegalStateException.class) .hasMessage( "Invalid schema for v%s:\n" - + "- Invalid type for ts: timestamptz_ns is not supported until v3\n" - + "- Invalid type for arr.element: timestamp_ns is not supported until v3\n" - + "- Invalid type for struct.inner_ts: timestamptz_ns is not supported until v3\n" - + "- Invalid type for struct_arr.ts: timestamp_ns is not supported until v3", - formatVersion); + + "- Invalid type for top: %s is not supported until v%s\n" + + "- Invalid type for arr.element: %s is not supported until v%s\n" + + "- Invalid type for struct.inner_op: %s is not supported until v%s\n" + + "- Invalid type for struct.inner_req: %s is not supported until v%s\n" + + "- Invalid type for struct.struct_arr.deep: %s is not supported until v%s", + unsupportedVersion, + type, + MIN_FORMAT_VERSIONS.get(type.typeId()), + type, + MIN_FORMAT_VERSIONS.get(type.typeId()), + type, + MIN_FORMAT_VERSIONS.get(type.typeId()), + type, + MIN_FORMAT_VERSIONS.get(type.typeId()), + type, + MIN_FORMAT_VERSIONS.get(type.typeId())); } - @Test - public void testSupportedTimestampNano() { - assertThatCode(() -> Schema.checkCompatibility(TS_NANO_CASES, 3)).doesNotThrowAnyException(); + private static Stream supportedTypes() { + return TEST_TYPES.stream() + .flatMap( + type -> + IntStream.rangeClosed(MIN_FORMAT_VERSIONS.get(type.typeId()), MAX_FORMAT_VERSION) + .mapToObj(supportedVersion -> Arguments.of(type, supportedVersion))); } @ParameterizedTest - @ValueSource(ints = {1, 2}) + @MethodSource("supportedTypes") + public void testTypeSupported(Type type, int supportedVersion) { + assertThatCode(() -> Schema.checkCompatibility(generateTypeSchema(type), supportedVersion)) + .doesNotThrowAnyException(); + } + + private static int[] unsupportedInitialDefault = + IntStream.range(1, DEFAULT_VALUES_MIN_FORMAT_VERSION).toArray(); + + @ParameterizedTest + @FieldSource("unsupportedInitialDefault") public void testUnsupportedInitialDefault(int formatVersion) { assertThatThrownBy(() -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, formatVersion)) .isInstanceOf(IllegalStateException.class) @@ -95,14 +166,18 @@ public void testUnsupportedInitialDefault(int formatVersion) { formatVersion); } - @Test - public void testSupportedInitialDefault() { - assertThatCode(() -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, 3)) + private static int[] supportedInitialDefault = + IntStream.rangeClosed(DEFAULT_VALUES_MIN_FORMAT_VERSION, MAX_FORMAT_VERSION).toArray(); + + @ParameterizedTest + @FieldSource("supportedInitialDefault") + public void testSupportedInitialDefault(int formatVersion) { + assertThatCode(() -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, formatVersion)) .doesNotThrowAnyException(); } @ParameterizedTest - @ValueSource(ints = {1, 2, 3}) + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") public void testSupportedWriteDefault(int formatVersion) { // only the initial default is a forward-incompatible change assertThatCode(() -> Schema.checkCompatibility(WRITE_DEFAULT_SCHEMA, formatVersion)) diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java index 5f0cac2b5e8c..c60b6ea6c54a 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java @@ -431,6 +431,34 @@ public void testVariantUnsupported() { assertThat(bucket.canTransform(Types.VariantType.get())).isFalse(); } + @Test + public void testGeometryUnsupported() { + assertThatThrownBy(() -> Transforms.bucket(Types.GeometryType.crs84(), 3)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot bucket by type: geometry"); + + Transform bucket = Transforms.bucket(3); + assertThatThrownBy(() -> bucket.bind(Types.GeometryType.crs84())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot bucket by type: geometry"); + + assertThat(bucket.canTransform(Types.GeometryType.crs84())).isFalse(); + } + + @Test + public void testGeographyUnsupported() { + assertThatThrownBy(() -> Transforms.bucket(Types.GeographyType.crs84(), 3)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot bucket by type: geography"); + + Transform bucket = Transforms.bucket(3); + assertThatThrownBy(() -> bucket.bind(Types.GeographyType.crs84())) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot bucket by type: geography"); + + assertThat(bucket.canTransform(Types.GeographyType.crs84())).isFalse(); + } + private byte[] randomBytes(int length) { byte[] bytes = new byte[length]; testRandom.nextBytes(bytes); diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java index b5076e08a947..6b6c7ba16cd3 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java @@ -24,8 +24,11 @@ import java.math.BigDecimal; import java.nio.ByteBuffer; import org.apache.iceberg.expressions.Literal; +import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; public class TestIdentity { @Test @@ -157,20 +160,27 @@ public void testBigDecimalToHumanString() { .isEqualTo(decimalString); } - @Test - public void testVariantUnsupported() { - assertThatThrownBy(() -> Transforms.identity().bind(Types.VariantType.get())) + @ParameterizedTest + @MethodSource("unsupportedTypesProvider") + public void testUnsupported(Type type) { + assertThatThrownBy(() -> Transforms.identity().bind(type)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Cannot bind to unsupported type: variant"); + .hasMessage("Cannot bind to unsupported type: " + type); - assertThatThrownBy(() -> Transforms.fromString(Types.VariantType.get(), "identity")) + assertThatThrownBy(() -> Transforms.fromString(type, "identity")) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Unsupported type for identity: variant"); + .hasMessage("Unsupported type for identity: " + type); - assertThatThrownBy(() -> Transforms.identity(Types.VariantType.get())) + assertThatThrownBy(() -> Transforms.identity(type)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Unsupported type for identity: variant"); + .hasMessage("Unsupported type for identity: " + type); + + assertThat(Transforms.identity().canTransform(type)).isFalse(); + } - assertThat(Transforms.identity().canTransform(Types.VariantType.get())).isFalse(); + private static Type[] unsupportedTypesProvider() { + return new Type[] { + Types.VariantType.get(), Types.GeometryType.crs84(), Types.GeographyType.crs84() + }; } } diff --git a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java index 2d02da5346a7..154c41d18001 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java +++ b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java @@ -48,7 +48,12 @@ public class TestReadabilityChecks { Types.BinaryType.get(), Types.DecimalType.of(9, 2), Types.DecimalType.of(11, 2), - Types.DecimalType.of(9, 3) + Types.DecimalType.of(9, 3), + Types.GeometryType.crs84(), + Types.GeometryType.of("srid:3857"), + Types.GeographyType.crs84(), + Types.GeographyType.of("srid:4269"), + Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY), }; @Test diff --git a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java index af2ebae7e1a8..61c587d5d4a7 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java @@ -62,7 +62,12 @@ public void testEqualTypes() throws Exception { Types.DecimalType.of(9, 3), Types.DecimalType.of(11, 0), Types.FixedType.ofLength(4), - Types.FixedType.ofLength(34) + Types.FixedType.ofLength(34), + Types.GeometryType.crs84(), + Types.GeometryType.of("srid:3857"), + Types.GeographyType.crs84(), + Types.GeographyType.of("srid:4269"), + Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY) }; for (Type type : equalityPrimitives) { diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java b/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java index 7d90e7cf6e24..0978157dbf28 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java @@ -23,11 +23,14 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import java.util.Map; import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Stream; import org.apache.iceberg.Schema; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.types.Types.IntegerType; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; @@ -35,7 +38,13 @@ public class TestTypeUtil { private static Stream testTypes() { - return Stream.of(Arguments.of(Types.IntegerType.get()), Arguments.of(Types.VariantType.get())); + return Stream.of( + Arguments.of(Types.IntegerType.get()), + Arguments.of(Types.VariantType.get()), + Arguments.of(Types.GeometryType.crs84()), + Arguments.of(Types.GeometryType.of("srid:3857")), + Arguments.of(Types.GeographyType.crs84()), + Arguments.of(Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY))); } @ParameterizedTest @@ -652,4 +661,79 @@ public void testReassignOrRefreshIdsCaseInsensitive(Type testType) { required(1, "FIELD1", Types.IntegerType.get()), required(2, "FIELD2", testType))); assertThat(actualSchema.asStruct()).isEqualTo(expectedSchema.asStruct()); } + + @ParameterizedTest + @MethodSource("testTypes") + public void testAssignIdsWithType(Type testType) { + Types.StructType sourceType = + Types.StructType.of(required(0, "id", IntegerType.get()), optional(1, "data", testType)); + Type expectedType = + Types.StructType.of(required(10, "id", IntegerType.get()), optional(11, "data", testType)); + Type assignedType = TypeUtil.assignIds(sourceType, oldId -> oldId + 10); + assertThat(assignedType).isEqualTo(expectedType); + } + + @ParameterizedTest + @MethodSource("testTypes") + public void testAssignFreshIdsWithType(Type testType) { + Schema schema = new Schema(required(0, "id", IntegerType.get()), optional(1, "data", testType)); + Schema assignedSchema = TypeUtil.assignFreshIds(schema, new AtomicInteger(10)::incrementAndGet); + Schema expectedSchema = + new Schema(required(11, "id", IntegerType.get()), optional(12, "data", testType)); + assertThat(assignedSchema.asStruct()).isEqualTo(expectedSchema.asStruct()); + } + + @ParameterizedTest + @MethodSource("testTypes") + public void testReassignIdsWithType(Type testType) { + Schema schema = new Schema(required(0, "id", IntegerType.get()), optional(1, "data", testType)); + Schema sourceSchema = + new Schema(required(1, "id", IntegerType.get()), optional(2, "data", testType)); + Schema reassignedSchema = TypeUtil.reassignIds(schema, sourceSchema); + assertThat(reassignedSchema.asStruct()).isEqualTo(sourceSchema.asStruct()); + } + + @ParameterizedTest + @MethodSource("testTypes") + public void testIndexByIdWithType(Type testType) { + Schema schema = new Schema(required(0, "id", IntegerType.get()), optional(1, "data", testType)); + Map indexByIds = TypeUtil.indexById(schema.asStruct()); + assertThat(indexByIds.get(1).type()).isEqualTo(testType); + } + + @ParameterizedTest + @MethodSource("testTypes") + public void testIndexNameByIdWithType(Type testType) { + Schema schema = new Schema(required(0, "id", IntegerType.get()), optional(1, "data", testType)); + Map indexNameByIds = TypeUtil.indexNameById(schema.asStruct()); + assertThat(indexNameByIds.get(1)).isEqualTo("data"); + } + + @ParameterizedTest + @MethodSource("testTypes") + public void testProjectWithType(Type testType) { + Schema schema = new Schema(required(0, "id", IntegerType.get()), optional(1, "data", testType)); + Schema expectedSchema = new Schema(optional(1, "data", testType)); + Schema projectedSchema = TypeUtil.project(schema, Sets.newHashSet(1)); + assertThat(projectedSchema.asStruct()).isEqualTo(expectedSchema.asStruct()); + } + + @ParameterizedTest + @MethodSource("testTypes") + public void testGetProjectedIdsWithType(Type testType) { + Schema schema = new Schema(required(0, "id", IntegerType.get()), optional(1, "data", testType)); + Set projectedIds = TypeUtil.getProjectedIds(schema); + assertThat(Set.of(0, 1)).isEqualTo(projectedIds); + } + + @ParameterizedTest + @MethodSource("testTypes") + public void testReassignDocWithType(Type testType) { + Schema schema = new Schema(required(0, "id", IntegerType.get()), optional(1, "data", testType)); + Schema docSourceSchema = + new Schema( + required(0, "id", IntegerType.get(), "id"), optional(1, "data", testType, "data")); + Schema reassignedSchema = TypeUtil.reassignDoc(schema, docSourceSchema); + assertThat(reassignedSchema.asStruct()).isEqualTo(docSourceSchema.asStruct()); + } } diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypes.java b/api/src/test/java/org/apache/iceberg/types/TestTypes.java index 226c53f1e9ce..6246df9e9395 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypes.java @@ -47,4 +47,78 @@ public void fromPrimitiveString() { .isThrownBy(() -> Types.fromPrimitiveString("Unknown")) .withMessageContaining("Unknown"); } + + @Test + public void geospatialTypeFromTypeName() { + assertThat(Types.fromPrimitiveString("geometry")).isEqualTo(Types.GeometryType.crs84()); + assertThat(Types.fromPrimitiveString("Geometry")).isEqualTo(Types.GeometryType.crs84()); + assertThat(Types.fromPrimitiveString("geometry(srid:3857)")) + .isEqualTo(Types.GeometryType.of("srid:3857")); + assertThat(Types.fromPrimitiveString("geometry( srid:3857 )")) + .isEqualTo(Types.GeometryType.of("srid:3857")); + assertThat(Types.fromPrimitiveString("geometry( srid: 3857 )")) + .isEqualTo(Types.GeometryType.of("srid: 3857")); + assertThat(Types.fromPrimitiveString("Geometry( projjson:TestIdentifier )")) + .isEqualTo(Types.GeometryType.of("projjson:TestIdentifier")); + + assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> Types.fromPrimitiveString("geometry()")) + .withMessageContaining("Cannot parse type string to primitive"); + assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> Types.fromPrimitiveString("geometry( )")) + .withMessageContaining("Invalid CRS: (empty string)"); + + assertThat(Types.fromPrimitiveString("geography")).isEqualTo(Types.GeographyType.crs84()); + assertThat(Types.fromPrimitiveString("Geography")).isEqualTo(Types.GeographyType.crs84()); + assertThat(Types.fromPrimitiveString("geography(srid:4269)")) + .isEqualTo(Types.GeographyType.of("srid:4269")); + assertThat(Types.fromPrimitiveString("geography(srid: 4269)")) + .isEqualTo(Types.GeographyType.of("srid: 4269")); + assertThat(Types.fromPrimitiveString("geography(srid:4269, spherical)")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.SPHERICAL)); + assertThat(Types.fromPrimitiveString("geography(srid:4269, vincenty)")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.VINCENTY)); + assertThat(Types.fromPrimitiveString("geography(srid:4269, thomas)")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.THOMAS)); + assertThat(Types.fromPrimitiveString("geography(srid:4269, andoyer)")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.ANDOYER)); + assertThat(Types.fromPrimitiveString("geography(srid:4269, karney)")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY)); + assertThat(Types.fromPrimitiveString("geography(srid: 4269, karney)")) + .isEqualTo(Types.GeographyType.of("srid: 4269", EdgeAlgorithm.KARNEY)); + assertThat(Types.fromPrimitiveString("Geography(projjson: TestIdentifier, karney)")) + .isEqualTo(Types.GeographyType.of("projjson: TestIdentifier", EdgeAlgorithm.KARNEY)); + + assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> Types.fromPrimitiveString("geography()")) + .withMessageContaining("Cannot parse type string to primitive"); + assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> Types.fromPrimitiveString("geography( , spherical)")) + .withMessageContaining("Invalid CRS: (empty string)"); + assertThatExceptionOfType(IllegalArgumentException.class) + .isThrownBy(() -> Types.fromPrimitiveString("geography(srid:4269, BadAlgorithm)")) + .withMessageContaining("Invalid edge interpolation algorithm") + .withMessageContaining("BadAlgorithm"); + + // Test geography type with various spacing + assertThat(Types.fromPrimitiveString("geography( srid:4269 )")) + .isEqualTo(Types.GeographyType.of("srid:4269")); + assertThat(Types.fromPrimitiveString("geography( srid:4269 , spherical )")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.SPHERICAL)); + assertThat(Types.fromPrimitiveString("geography(srid:4269,vincenty)")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.VINCENTY)); + assertThat(Types.fromPrimitiveString("geography( srid:4269 , karney )")) + .isEqualTo(Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY)); + } + + @Test + public void testGeospatialTypeToString() { + assertThat(Types.GeometryType.crs84().toString()).isEqualTo("geometry"); + assertThat(Types.GeometryType.of("srid:4326").toString()).isEqualTo("geometry(srid:4326)"); + assertThat(Types.GeographyType.crs84().toString()).isEqualTo("geography"); + assertThat(Types.GeographyType.of("srid:4326", EdgeAlgorithm.KARNEY).toString()) + .isEqualTo("geography(srid:4326, karney)"); + assertThat(Types.GeographyType.of(null, EdgeAlgorithm.KARNEY).toString()) + .isEqualTo("geography(OGC:CRS84, karney)"); + } } diff --git a/core/src/main/java/org/apache/iceberg/SchemaParser.java b/core/src/main/java/org/apache/iceberg/SchemaParser.java index db67ad9f709a..32ecb19d2871 100644 --- a/core/src/main/java/org/apache/iceberg/SchemaParser.java +++ b/core/src/main/java/org/apache/iceberg/SchemaParser.java @@ -125,14 +125,8 @@ static void toJson(Types.MapType map, JsonGenerator generator) throws IOExceptio generator.writeEndObject(); } - static void toJson(Type.PrimitiveType primitive, JsonGenerator generator) throws IOException { - generator.writeString(primitive.toString()); - } - static void toJson(Type type, JsonGenerator generator) throws IOException { - if (type.isPrimitiveType()) { - toJson(type.asPrimitiveType(), generator); - } else if (type.isVariantType()) { + if (type.isPrimitiveType() || type.isVariantType()) { generator.writeString(type.toString()); } else { Type.NestedType nested = type.asNestedType(); diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaParser.java b/core/src/test/java/org/apache/iceberg/TestSchemaParser.java new file mode 100644 index 000000000000..abbbac898569 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestSchemaParser.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.IOException; +import org.apache.iceberg.data.DataTest; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestSchemaParser extends DataTest { + + @Override + protected boolean supportsTimestampNanos() { + return true; + } + + @Override + protected boolean supportsVariant() { + return true; + } + + @Override + protected boolean supportsGeospatial() { + return true; + } + + @Override + protected void writeAndValidate(Schema schema) throws IOException { + Schema serialized = SchemaParser.fromJson(SchemaParser.toJson(schema)); + assertThat(serialized.asStruct()).isEqualTo(schema.asStruct()); + } + + @Test + public void testSchemaId() { + Schema schema = new Schema(34, required(1, "id", Types.LongType.get())); + + Schema serialized = SchemaParser.fromJson(SchemaParser.toJson(schema)); + assertThat(serialized.schemaId()).isEqualTo(schema.schemaId()); + } + + @Test + public void testIdentifierColumns() { + Schema schema = + new Schema( + Lists.newArrayList( + required(1, "id-1", Types.LongType.get()), + required(2, "id-2", Types.LongType.get()), + optional(3, "data", Types.StringType.get())), + Sets.newHashSet(1, 2)); + + Schema serialized = SchemaParser.fromJson(SchemaParser.toJson(schema)); + assertThat(serialized.identifierFieldIds()).isEqualTo(Sets.newHashSet(1, 2)); + } + + @Test + public void testDocStrings() { + Schema schema = + new Schema( + required(1, "id", Types.LongType.get(), "unique identifier"), + Types.NestedField.optional("data") + .withId(2) + .ofType(Types.StringType.get()) + .withDoc("payload") + .build()); + + Schema serialized = SchemaParser.fromJson(SchemaParser.toJson(schema)); + assertThat(serialized.findField("id").doc()).isEqualTo("unique identifier"); + assertThat(serialized.findField("data").doc()).isEqualTo("payload"); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java b/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java index bda76469e1fa..56a83bcd0a85 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java @@ -26,6 +26,7 @@ import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.types.EdgeAlgorithm; import org.apache.iceberg.types.Type.PrimitiveType; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.BinaryType; @@ -35,6 +36,8 @@ import org.apache.iceberg.types.Types.DoubleType; import org.apache.iceberg.types.Types.FixedType; import org.apache.iceberg.types.Types.FloatType; +import org.apache.iceberg.types.Types.GeographyType; +import org.apache.iceberg.types.Types.GeometryType; import org.apache.iceberg.types.Types.IntegerType; import org.apache.iceberg.types.Types.ListType; import org.apache.iceberg.types.Types.LongType; @@ -63,7 +66,11 @@ private static List primitiveTypes() { FixedType.ofLength(10), DecimalType.of(10, 2), LongType.get(), - FloatType.get()); + FloatType.get(), + GeometryType.crs84(), + GeometryType.of("srid:3857"), + GeographyType.crs84(), + GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY)); } private static NestedField[] primitiveFields( diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java b/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java index 2b91a408850e..8a5db3d30bd4 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java @@ -28,6 +28,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.types.EdgeAlgorithm; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; @@ -284,7 +285,11 @@ public void testUpdateFailure() { Types.FixedType.ofLength(4), Types.DecimalType.of(9, 2), Types.DecimalType.of(9, 3), - Types.DecimalType.of(18, 2)); + Types.DecimalType.of(18, 2), + Types.GeometryType.crs84(), + Types.GeometryType.of("srid:3857"), + Types.GeographyType.crs84(), + Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY)); for (Type.PrimitiveType fromType : primitives) { for (Type.PrimitiveType toType : primitives) { diff --git a/core/src/test/java/org/apache/iceberg/TestSortOrder.java b/core/src/test/java/org/apache/iceberg/TestSortOrder.java index 1044ed83fb38..f557df515743 100644 --- a/core/src/test/java/org/apache/iceberg/TestSortOrder.java +++ b/core/src/test/java/org/apache/iceberg/TestSortOrder.java @@ -353,6 +353,22 @@ public void testVariantUnsupported() { .hasMessage("Unsupported type for identity: variant"); } + @TestTemplate + public void testGeospatialUnsupported() { + Schema v3Schema = + new Schema( + Types.NestedField.required(3, "id", Types.LongType.get()), + Types.NestedField.required(4, "geom", Types.GeometryType.crs84()), + Types.NestedField.required(5, "geog", Types.GeographyType.crs84())); + + assertThatThrownBy(() -> SortOrder.builderFor(v3Schema).withOrderId(10).asc("geom").build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Unsupported type for identity: geometry"); + assertThatThrownBy(() -> SortOrder.builderFor(v3Schema).withOrderId(10).asc("geog").build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Unsupported type for identity: geography"); + } + @TestTemplate public void testPreservingOrderSortedColumnNames() { SortOrder order = diff --git a/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java b/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java index 5402a13d7d4b..c57f0d7f8271 100644 --- a/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java +++ b/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java @@ -64,6 +64,8 @@ import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.relocated.com.google.common.collect.Streams; +import org.apache.iceberg.types.EdgeAlgorithm; +import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.CharSequenceSet; import org.junit.jupiter.api.Assumptions; @@ -517,6 +519,45 @@ public void testBasicCreateTable() { assertThat(table.properties()).as("Should have table properties").isNotNull(); } + @Test + public void testCreateGeospatialTable() { + Schema schema = + new Schema( + required(3, "id", Types.IntegerType.get(), "unique ID"), + required(4, "data", Types.StringType.get()), + required(5, "geom", Types.GeometryType.of("srid:3857"), "geometry column"), + required( + 6, + "geog", + Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY), + "geography column")); + + TableIdentifier identifier = TableIdentifier.of("ns", "geos_table"); + + C catalog = catalog(); + assertThat(catalog.tableExists(identifier)).as("Table should not exist").isFalse(); + + if (requiresNamespaceCreate()) { + catalog.createNamespace(identifier.namespace()); + } + + Map properties = ImmutableMap.of(TableProperties.FORMAT_VERSION, "3"); + catalog.createTable(identifier, schema, PartitionSpec.unpartitioned(), properties); + Table table = catalog.loadTable(identifier); + + Types.NestedField geomField = table.schema().findField("geom"); + assertThat(geomField.type().typeId()).isEqualTo(Type.TypeID.GEOMETRY); + Types.GeometryType geomType = (Types.GeometryType) geomField.type(); + assertThat(geomType.crs()).isEqualTo("srid:3857"); + + Types.NestedField geogField = table.schema().findField("geog"); + assertThat(geogField.type().typeId()).isEqualTo(Type.TypeID.GEOGRAPHY); + Types.GeographyType geogType = (Types.GeographyType) geogField.type(); + assertThat(geogType.crs()).isEqualTo("srid:4269"); + assertThat(geogType.algorithm()).isEqualTo(EdgeAlgorithm.KARNEY); + assertThat(catalog.dropTable(identifier)).isTrue(); + } + @Test public void testTableNameWithSlash() { Assumptions.assumeTrue(supportsNamesWithSlashes()); diff --git a/data/src/test/java/org/apache/iceberg/data/DataTest.java b/core/src/test/java/org/apache/iceberg/data/DataTest.java similarity index 74% rename from data/src/test/java/org/apache/iceberg/data/DataTest.java rename to core/src/test/java/org/apache/iceberg/data/DataTest.java index 638a344cd2bc..3f275570f7d6 100644 --- a/data/src/test/java/org/apache/iceberg/data/DataTest.java +++ b/core/src/test/java/org/apache/iceberg/data/DataTest.java @@ -25,14 +25,19 @@ import java.nio.file.Path; import java.util.concurrent.atomic.AtomicInteger; import org.apache.iceberg.Schema; +import org.apache.iceberg.types.EdgeAlgorithm; +import org.apache.iceberg.types.Type; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.ListType; import org.apache.iceberg.types.Types.LongType; import org.apache.iceberg.types.Types.MapType; import org.apache.iceberg.types.Types.StructType; +import org.assertj.core.api.Assumptions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.FieldSource; public abstract class DataTest { @@ -60,6 +65,71 @@ public abstract class DataTest { @TempDir protected Path temp; + private static final Type[] SIMPLE_TYPES = + new Type[] { + Types.BooleanType.get(), + Types.IntegerType.get(), + LongType.get(), + Types.FloatType.get(), + Types.DoubleType.get(), + Types.DateType.get(), + Types.TimeType.get(), + Types.TimestampType.withZone(), + Types.TimestampType.withoutZone(), + Types.TimestampNanoType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.StringType.get(), + Types.FixedType.ofLength(7), + Types.BinaryType.get(), + Types.DecimalType.of(9, 0), + Types.DecimalType.of(11, 2), + Types.DecimalType.of(38, 10), + Types.VariantType.get(), + Types.GeometryType.crs84(), + Types.GeometryType.of("srid:3857"), + Types.GeographyType.crs84(), + Types.GeographyType.of("srid:4269"), + Types.GeographyType.of("srid:4269", EdgeAlgorithm.KARNEY), + }; + + protected boolean supportsTimestampNanos() { + return false; + } + + protected boolean supportsVariant() { + return false; + } + + protected boolean supportsGeospatial() { + return false; + } + + @ParameterizedTest + @FieldSource("SIMPLE_TYPES") + public void testTypeSchema(Type type) throws IOException { + if (!supportsTimestampNanos()) { + Assumptions.assumeThat( + TypeUtil.find(type, t -> t.typeId() == Type.TypeID.TIMESTAMP_NANO) == null) + .as("timestamp_ns is not yet implemented") + .isTrue(); + } + if (!supportsVariant()) { + Assumptions.assumeThat(TypeUtil.find(type, t -> t.typeId() == Type.TypeID.VARIANT) == null) + .as("variant is not yet implemented") + .isTrue(); + } + if (!supportsGeospatial()) { + Assumptions.assumeThat(TypeUtil.find(type, t -> t.typeId() == Type.TypeID.GEOMETRY) == null) + .as("geometry is not yet implemented") + .isTrue(); + Assumptions.assumeThat(TypeUtil.find(type, t -> t.typeId() == Type.TypeID.GEOGRAPHY) == null) + .as("geography is not yet implemented") + .isTrue(); + } + + writeAndValidate(new Schema(required(1, "id", LongType.get()), optional(2, "test_type", type))); + } + @Test public void testSimpleStruct() throws IOException { writeAndValidate(new Schema(SUPPORTED_PRIMITIVES.fields())); diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java index 20f9eb7f616e..e4ed1f086d8b 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java @@ -166,6 +166,8 @@ private static String convertToTypeString(Type type) { return "timestamp"; case FIXED: case BINARY: + case GEOMETRY: + case GEOGRAPHY: return "binary"; case DECIMAL: final Types.DecimalType decimalType = (Types.DecimalType) type;