From e889d0efc18e4b1463fd14372fef4c201494756b Mon Sep 17 00:00:00 2001 From: bodduv Date: Wed, 6 May 2026 13:00:38 +0200 Subject: [PATCH 1/3] Handle empty union list readers after IPC deser --- .../complex/impl/UnionLargeListReader.java | 19 ++++- .../vector/complex/impl/UnionListReader.java | 24 ++++-- .../impl/UnionListReaderBoundsChecker.java | 47 +++++++++++ .../arrow/vector/ipc/TestRoundTrip.java | 79 +++++++++++++++++++ 4 files changed, 160 insertions(+), 9 deletions(-) create mode 100644 vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReaderBoundsChecker.java diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java index be236c3166..ade455e73e 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java @@ -53,9 +53,24 @@ public boolean isSet() { @Override public void setPosition(int index) { + int valueCount = vector.getValueCount(); + if (UnionListReaderBoundsChecker.isEmptyVectorPosition(index, valueCount)) { + setEmptyPosition(index); + return; + } + + UnionListReaderBoundsChecker.checkIndex(index, valueCount); + UnionListReaderBoundsChecker.checkOffsetBuffer(vector.getOffsetBuffer(), index, OFFSET_WIDTH); + + super.setPosition(index); + currentOffset = vector.getElementStartIndex(index) - 1; + maxOffset = vector.getElementEndIndex(index); + } + + private void setEmptyPosition(int index) { super.setPosition(index); - currentOffset = vector.getOffsetBuffer().getLong((long) index * OFFSET_WIDTH) - 1; - maxOffset = vector.getOffsetBuffer().getLong(((long) index + 1L) * OFFSET_WIDTH); + currentOffset = 0; + maxOffset = 0; } @Override diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java index 014608afee..7036bba77d 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java @@ -52,14 +52,24 @@ public boolean isSet() { @Override public void setPosition(int index) { - super.setPosition(index); - if (vector.getOffsetBuffer().capacity() == 0) { - currentOffset = 0; - maxOffset = 0; - } else { - currentOffset = vector.getOffsetBuffer().getInt(index * (long) OFFSET_WIDTH) - 1; - maxOffset = vector.getOffsetBuffer().getInt((index + 1) * (long) OFFSET_WIDTH); + int valueCount = vector.getValueCount(); + if (UnionListReaderBoundsChecker.isEmptyVectorPosition(index, valueCount)) { + setEmptyPosition(index); + return; } + + UnionListReaderBoundsChecker.checkIndex(index, valueCount); + UnionListReaderBoundsChecker.checkOffsetBuffer(vector.getOffsetBuffer(), index, OFFSET_WIDTH); + + super.setPosition(index); + currentOffset = vector.getElementStartIndex(index) - 1; + maxOffset = vector.getElementEndIndex(index); + } + + private void setEmptyPosition(int index) { + super.setPosition(index); + currentOffset = 0; + maxOffset = 0; } @Override diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReaderBoundsChecker.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReaderBoundsChecker.java new file mode 100644 index 0000000000..41f9f3b9c6 --- /dev/null +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReaderBoundsChecker.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector.complex.impl; + +import org.apache.arrow.memory.ArrowBuf; + +/** Shared position validation for union list readers backed by offset buffers. */ +final class UnionListReaderBoundsChecker { + + private UnionListReaderBoundsChecker() {} + + static boolean isEmptyVectorPosition(int index, int valueCount) { + return valueCount == 0 && index == 0; + } + + static void checkIndex(int index, int valueCount) { + if (index < 0 || index >= valueCount) { + throw new IndexOutOfBoundsException( + String.format("index: %s, expected range (0, %s)", index, valueCount)); + } + } + + static void checkOffsetBuffer(ArrowBuf offsetBuffer, int index, long offsetWidth) { + long requiredBytes = ((long) index + 2L) * offsetWidth; + long capacity = offsetBuffer.capacity(); + if (capacity < requiredBytes) { + throw new IndexOutOfBoundsException( + String.format( + "Offset buffer has capacity %s but reading index %s requires %s bytes", + capacity, index, requiredBytes)); + } + } +} diff --git a/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java b/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java index 65a3791dd4..3074885acb 100644 --- a/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java +++ b/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java @@ -50,8 +50,11 @@ import org.apache.arrow.vector.TinyIntVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.VectorUnloader; +import org.apache.arrow.vector.complex.BaseRepeatedValueVector; import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.dictionary.DictionaryProvider; import org.apache.arrow.vector.ipc.message.ArrowBlock; import org.apache.arrow.vector.ipc.message.ArrowBuffer; @@ -326,6 +329,82 @@ public void testMetadata(String name, IpcOption writeOption) throws Exception { } } + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testEmptyUnionListReadersAfterIpc(String name, IpcOption writeOption) + throws Exception { + Field structField = + new Field( + "struct", + FieldType.nullable(ArrowType.Struct.INSTANCE), + Collections2.asImmutableList( + listField("list", ArrowType.List.INSTANCE), + listField("largeList", ArrowType.LargeList.INSTANCE), + mapField("map"))); + Schema schema = new Schema(Collections2.asImmutableList(structField)); + + try (final BufferAllocator originalVectorAllocator = + allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); + final StructVector vector = (StructVector) structField.createVector(originalVectorAllocator)) { + vector.allocateNewSafe(); + vector.setValueCount(0); + + List vectors = Collections2.asImmutableList(vector); + VectorSchemaRoot root = new VectorSchemaRoot(schema, vectors, 0); + roundTrip( + name, + writeOption, + root, + /* dictionaryProvider */ null, + TestRoundTrip::writeSingleBatch, + validateFileBatches(new int[] {0}, this::validateEmptyUnionListReaders), + validateStreamBatches(new int[] {0}, this::validateEmptyUnionListReaders)); + } + } + + private Field listField(String name, ArrowType type) { + return new Field( + name, + FieldType.nullable(type), + Collections2.asImmutableList( + new Field( + BaseRepeatedValueVector.DATA_VECTOR_NAME, + FieldType.nullable(new ArrowType.Int(32, true)), + null))); + } + + private Field mapField(String name) { + Field keyField = + new Field(MapVector.KEY_NAME, FieldType.notNullable(new ArrowType.Int(32, true)), null); + Field valueField = + new Field(MapVector.VALUE_NAME, FieldType.nullable(new ArrowType.Int(32, true)), null); + Field entriesField = + new Field( + MapVector.DATA_VECTOR_NAME, + FieldType.notNullable(ArrowType.Struct.INSTANCE), + Collections2.asImmutableList(keyField, valueField)); + + return new Field( + name, + FieldType.nullable(new ArrowType.Map(false)), + Collections2.asImmutableList(entriesField)); + } + + private void validateEmptyUnionListReaders(int expectedCount, VectorSchemaRoot root) { + assertEquals(0, expectedCount); + + FieldReader structReader = root.getVector("struct").getReader(); + assertEmptyUnionListReader(structReader.reader("list")); + assertEmptyUnionListReader(structReader.reader("largeList")); + assertEmptyUnionListReader(structReader.reader("map")); + } + + private void assertEmptyUnionListReader(FieldReader reader) { + assertEquals(0, reader.size()); + assertFalse(reader.next()); + assertThrows(IndexOutOfBoundsException.class, () -> reader.setPosition(1)); + } + private Map metadata(int i) { Map map = new HashMap<>(); map.put("k_" + i, "v_" + i); From 3321c855abb72af2c2e03ed3a12404ffa3810c16 Mon Sep 17 00:00:00 2001 From: bodduv Date: Thu, 7 May 2026 14:12:09 +0200 Subject: [PATCH 2/3] Address union list reader review feedback --- .../complex/impl/UnionLargeListReader.java | 7 +-- .../vector/complex/impl/UnionListReader.java | 7 +-- ... => UnionListReaderPositionValidator.java} | 10 ++-- .../complex/writer/TestComplexWriter.java | 46 +++++++++++++++++++ .../arrow/vector/ipc/TestRoundTrip.java | 5 +- 5 files changed, 61 insertions(+), 14 deletions(-) rename vector/src/main/java/org/apache/arrow/vector/complex/impl/{UnionListReaderBoundsChecker.java => UnionListReaderPositionValidator.java} (84%) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java index ade455e73e..cb7f9352ff 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java @@ -54,13 +54,14 @@ public boolean isSet() { @Override public void setPosition(int index) { int valueCount = vector.getValueCount(); - if (UnionListReaderBoundsChecker.isEmptyVectorPosition(index, valueCount)) { + if (valueCount == 0 && index == 0) { setEmptyPosition(index); return; } - UnionListReaderBoundsChecker.checkIndex(index, valueCount); - UnionListReaderBoundsChecker.checkOffsetBuffer(vector.getOffsetBuffer(), index, OFFSET_WIDTH); + UnionListReaderPositionValidator.checkIndex(index, valueCount); + UnionListReaderPositionValidator.checkOffsetBufferReadable( + vector.getOffsetBuffer(), index, OFFSET_WIDTH); super.setPosition(index); currentOffset = vector.getElementStartIndex(index) - 1; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java index 7036bba77d..b4ff29e5b5 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java @@ -53,13 +53,14 @@ public boolean isSet() { @Override public void setPosition(int index) { int valueCount = vector.getValueCount(); - if (UnionListReaderBoundsChecker.isEmptyVectorPosition(index, valueCount)) { + if (valueCount == 0 && index == 0) { setEmptyPosition(index); return; } - UnionListReaderBoundsChecker.checkIndex(index, valueCount); - UnionListReaderBoundsChecker.checkOffsetBuffer(vector.getOffsetBuffer(), index, OFFSET_WIDTH); + UnionListReaderPositionValidator.checkIndex(index, valueCount); + UnionListReaderPositionValidator.checkOffsetBufferReadable( + vector.getOffsetBuffer(), index, OFFSET_WIDTH); super.setPosition(index); currentOffset = vector.getElementStartIndex(index) - 1; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReaderBoundsChecker.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReaderPositionValidator.java similarity index 84% rename from vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReaderBoundsChecker.java rename to vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReaderPositionValidator.java index 41f9f3b9c6..67aaae02ee 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReaderBoundsChecker.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReaderPositionValidator.java @@ -19,13 +19,9 @@ import org.apache.arrow.memory.ArrowBuf; /** Shared position validation for union list readers backed by offset buffers. */ -final class UnionListReaderBoundsChecker { +final class UnionListReaderPositionValidator { - private UnionListReaderBoundsChecker() {} - - static boolean isEmptyVectorPosition(int index, int valueCount) { - return valueCount == 0 && index == 0; - } + private UnionListReaderPositionValidator() {} static void checkIndex(int index, int valueCount) { if (index < 0 || index >= valueCount) { @@ -34,7 +30,7 @@ static void checkIndex(int index, int valueCount) { } } - static void checkOffsetBuffer(ArrowBuf offsetBuffer, int index, long offsetWidth) { + static void checkOffsetBufferReadable(ArrowBuf offsetBuffer, int index, long offsetWidth) { long requiredBytes = ((long) index + 2L) * offsetWidth; long capacity = offsetBuffer.capacity(); if (capacity < requiredBytes) { diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 80d03cae6d..1c8076ff48 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -22,6 +22,7 @@ import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; @@ -29,6 +30,7 @@ import java.nio.charset.StandardCharsets; import java.time.LocalDateTime; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -48,6 +50,8 @@ import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.ViewVarCharVector; +import org.apache.arrow.vector.complex.BaseRepeatedValueVector; +import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.MapVector; @@ -59,6 +63,7 @@ import org.apache.arrow.vector.complex.impl.NullableStructWriter; import org.apache.arrow.vector.complex.impl.SingleStructReaderImpl; import org.apache.arrow.vector.complex.impl.SingleStructWriter; +import org.apache.arrow.vector.complex.impl.UnionLargeListReader; import org.apache.arrow.vector.complex.impl.UnionListReader; import org.apache.arrow.vector.complex.impl.UnionListViewReader; import org.apache.arrow.vector.complex.impl.UnionListViewWriter; @@ -89,6 +94,7 @@ import org.apache.arrow.vector.holders.NullableUuidHolder; import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; import org.apache.arrow.vector.holders.UuidHolder; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -2054,6 +2060,46 @@ public void testListViewOfListViewOfListViewWriterWithNulls() { } } + @Test + public void testUnionListReaderSetPositionOnEmptyIpcOffsetBuffer() { + try (ListVector listVector = ListVector.empty("list", allocator); + ArrowBuf validityBuffer = allocator.buffer(0); + ArrowBuf offsetBuffer = allocator.buffer(BaseRepeatedValueVector.OFFSET_WIDTH)) { + offsetBuffer.setInt(0, 0); + offsetBuffer.writerIndex(BaseRepeatedValueVector.OFFSET_WIDTH); + listVector.loadFieldBuffers( + new ArrowFieldNode(0, 0), Arrays.asList(validityBuffer, offsetBuffer)); + + assertEquals(0, listVector.getValueCount()); + assertEquals(BaseRepeatedValueVector.OFFSET_WIDTH, listVector.getOffsetBuffer().capacity()); + assertEmptyPosition(new UnionListReader(listVector)); + } + } + + @Test + public void testUnionLargeListReaderSetPositionOnEmptyIpcOffsetBuffer() { + try (LargeListVector listVector = LargeListVector.empty("largeList", allocator); + ArrowBuf validityBuffer = allocator.buffer(0); + ArrowBuf offsetBuffer = allocator.buffer(LargeListVector.OFFSET_WIDTH)) { + offsetBuffer.setLong(0, 0); + offsetBuffer.writerIndex(LargeListVector.OFFSET_WIDTH); + listVector.loadFieldBuffers( + new ArrowFieldNode(0, 0), Arrays.asList(validityBuffer, offsetBuffer)); + + assertEquals(0, listVector.getValueCount()); + assertEquals(LargeListVector.OFFSET_WIDTH, listVector.getOffsetBuffer().capacity()); + assertEmptyPosition(new UnionLargeListReader(listVector)); + } + } + + private void assertEmptyPosition(FieldReader reader) { + reader.setPosition(0); + assertEquals(0, reader.size()); + assertFalse(reader.next()); + assertThrows(IndexOutOfBoundsException.class, () -> reader.setPosition(-1)); + assertThrows(IndexOutOfBoundsException.class, () -> reader.setPosition(1)); + } + @Test public void testStructOfList() { try (StructVector structVector = StructVector.empty("struct1", allocator)) { diff --git a/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java b/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java index 3074885acb..c61ad65026 100644 --- a/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java +++ b/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java @@ -345,7 +345,8 @@ public void testEmptyUnionListReadersAfterIpc(String name, IpcOption writeOption try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); - final StructVector vector = (StructVector) structField.createVector(originalVectorAllocator)) { + final StructVector vector = + (StructVector) structField.createVector(originalVectorAllocator)) { vector.allocateNewSafe(); vector.setValueCount(0); @@ -400,8 +401,10 @@ private void validateEmptyUnionListReaders(int expectedCount, VectorSchemaRoot r } private void assertEmptyUnionListReader(FieldReader reader) { + reader.setPosition(0); assertEquals(0, reader.size()); assertFalse(reader.next()); + assertThrows(IndexOutOfBoundsException.class, () -> reader.setPosition(-1)); assertThrows(IndexOutOfBoundsException.class, () -> reader.setPosition(1)); } From ff04d15d9a79d0978df79640597e90ae7f13e985 Mon Sep 17 00:00:00 2001 From: bodduv Date: Thu, 7 May 2026 14:42:29 +0200 Subject: [PATCH 3/3] Apply reader bounds checks to list view readers --- .../complex/impl/UnionLargeListReader.java | 2 +- .../impl/UnionLargeListViewReader.java | 37 +++-- .../vector/complex/impl/UnionListReader.java | 2 +- .../UnionListReaderPositionValidator.java | 35 +++- .../complex/impl/UnionListViewReader.java | 31 +++- .../complex/writer/TestComplexWriter.java | 155 ++++++++++++++++++ 6 files changed, 238 insertions(+), 24 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java index cb7f9352ff..a919f10297 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java @@ -60,7 +60,7 @@ public void setPosition(int index) { } UnionListReaderPositionValidator.checkIndex(index, valueCount); - UnionListReaderPositionValidator.checkOffsetBufferReadable( + UnionListReaderPositionValidator.checkListBufferReadable( vector.getOffsetBuffer(), index, OFFSET_WIDTH); super.setPosition(index); diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListViewReader.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListViewReader.java index 4bcd028de3..6f5e1da917 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListViewReader.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListViewReader.java @@ -56,18 +56,33 @@ public boolean isSet() { @Override public void setPosition(int index) { - super.setPosition(index); - if (vector.getOffsetBuffer().capacity() == 0) { - currentOffset = 0; - size = 0; - } else { - currentOffset = - vector - .getOffsetBuffer() - .getInt(index * (long) BaseLargeRepeatedValueViewVector.OFFSET_WIDTH); - size = - vector.getSizeBuffer().getInt(index * (long) BaseLargeRepeatedValueViewVector.SIZE_WIDTH); + int valueCount = vector.getValueCount(); + if (valueCount == 0 && index == 0) { + setEmptyPosition(index); + return; } + + UnionListReaderPositionValidator.checkIndex(index, valueCount); + UnionListReaderPositionValidator.checkListViewBufferReadable( + vector.getOffsetBuffer(), + vector.getSizeBuffer(), + index, + BaseLargeRepeatedValueViewVector.OFFSET_WIDTH, + BaseLargeRepeatedValueViewVector.SIZE_WIDTH); + + super.setPosition(index); + currentOffset = + vector + .getOffsetBuffer() + .getInt(index * (long) BaseLargeRepeatedValueViewVector.OFFSET_WIDTH); + size = + vector.getSizeBuffer().getInt(index * (long) BaseLargeRepeatedValueViewVector.SIZE_WIDTH); + } + + private void setEmptyPosition(int index) { + super.setPosition(index); + currentOffset = 0; + size = 0; } @Override diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java index b4ff29e5b5..4273290efa 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java @@ -59,7 +59,7 @@ public void setPosition(int index) { } UnionListReaderPositionValidator.checkIndex(index, valueCount); - UnionListReaderPositionValidator.checkOffsetBufferReadable( + UnionListReaderPositionValidator.checkListBufferReadable( vector.getOffsetBuffer(), index, OFFSET_WIDTH); super.setPosition(index); diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReaderPositionValidator.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReaderPositionValidator.java index 67aaae02ee..7b7eeb0879 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReaderPositionValidator.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReaderPositionValidator.java @@ -18,19 +18,26 @@ import org.apache.arrow.memory.ArrowBuf; -/** Shared position validation for union list readers backed by offset buffers. */ +/** + * Utility class for validating list and list view reader positions. + * + *

A list vector contains an offset buffer that denotes lists boundaries. A list view vector + * contains an offset buffer and a size buffer. + */ final class UnionListReaderPositionValidator { private UnionListReaderPositionValidator() {} + /** Check if the given index is within the current value count of the vector. */ static void checkIndex(int index, int valueCount) { if (index < 0 || index >= valueCount) { throw new IndexOutOfBoundsException( - String.format("index: %s, expected range (0, %s)", index, valueCount)); + String.format("index: %s, expected range [0, %s)", index, valueCount)); } } - static void checkOffsetBufferReadable(ArrowBuf offsetBuffer, int index, long offsetWidth) { + /** Check that the list offset buffer contains entries for {@code index} and {@code index + 1}. */ + static void checkListBufferReadable(ArrowBuf offsetBuffer, int index, long offsetWidth) { long requiredBytes = ((long) index + 2L) * offsetWidth; long capacity = offsetBuffer.capacity(); if (capacity < requiredBytes) { @@ -40,4 +47,26 @@ static void checkOffsetBufferReadable(ArrowBuf offsetBuffer, int index, long off capacity, index, requiredBytes)); } } + + /** Check that the list view offset and size buffers contain entries for {@code index}. */ + static void checkListViewBufferReadable( + ArrowBuf offsetBuffer, ArrowBuf sizeBuffer, int index, long offsetWidth, long sizeWidth) { + long requiredOffsetBytes = ((long) index + 1L) * offsetWidth; + long offsetCapacity = offsetBuffer.capacity(); + if (offsetCapacity < requiredOffsetBytes) { + throw new IndexOutOfBoundsException( + String.format( + "Offset buffer has capacity %s but reading index %s requires %s bytes", + offsetCapacity, index, requiredOffsetBytes)); + } + + long requiredSizeBytes = ((long) index + 1L) * sizeWidth; + long sizeCapacity = sizeBuffer.capacity(); + if (sizeCapacity < requiredSizeBytes) { + throw new IndexOutOfBoundsException( + String.format( + "Size buffer has capacity %s but reading index %s requires %s bytes", + sizeCapacity, index, requiredSizeBytes)); + } + } } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListViewReader.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListViewReader.java index 17ac1150fd..0486f9dec2 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListViewReader.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListViewReader.java @@ -54,15 +54,30 @@ public boolean isSet() { @Override public void setPosition(int index) { - super.setPosition(index); - if (vector.getOffsetBuffer().capacity() == 0) { - currentOffset = 0; - size = 0; - } else { - currentOffset = - vector.getOffsetBuffer().getInt(index * (long) BaseRepeatedValueViewVector.OFFSET_WIDTH); - size = vector.getSizeBuffer().getInt(index * (long) BaseRepeatedValueViewVector.SIZE_WIDTH); + int valueCount = vector.getValueCount(); + if (valueCount == 0 && index == 0) { + setEmptyPosition(index); + return; } + + UnionListReaderPositionValidator.checkIndex(index, valueCount); + UnionListReaderPositionValidator.checkListViewBufferReadable( + vector.getOffsetBuffer(), + vector.getSizeBuffer(), + index, + BaseRepeatedValueViewVector.OFFSET_WIDTH, + BaseRepeatedValueViewVector.SIZE_WIDTH); + + super.setPosition(index); + currentOffset = + vector.getOffsetBuffer().getInt(index * (long) BaseRepeatedValueViewVector.OFFSET_WIDTH); + size = vector.getSizeBuffer().getInt(index * (long) BaseRepeatedValueViewVector.SIZE_WIDTH); + } + + private void setEmptyPosition(int index) { + super.setPosition(index); + currentOffset = 0; + size = 0; } @Override diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 1c8076ff48..d64c766eef 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -52,6 +52,7 @@ import org.apache.arrow.vector.ViewVarCharVector; import org.apache.arrow.vector.complex.BaseRepeatedValueVector; import org.apache.arrow.vector.complex.LargeListVector; +import org.apache.arrow.vector.complex.LargeListViewVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.MapVector; @@ -64,6 +65,7 @@ import org.apache.arrow.vector.complex.impl.SingleStructReaderImpl; import org.apache.arrow.vector.complex.impl.SingleStructWriter; import org.apache.arrow.vector.complex.impl.UnionLargeListReader; +import org.apache.arrow.vector.complex.impl.UnionLargeListViewReader; import org.apache.arrow.vector.complex.impl.UnionListReader; import org.apache.arrow.vector.complex.impl.UnionListViewReader; import org.apache.arrow.vector.complex.impl.UnionListViewWriter; @@ -2092,6 +2094,159 @@ public void testUnionLargeListReaderSetPositionOnEmptyIpcOffsetBuffer() { } } + @Test + public void testUnionListViewReaderSetPositionOnEmptyIpcBuffers() { + try (ListViewVector listViewVector = ListViewVector.empty("listView", allocator); + ArrowBuf validityBuffer = allocator.buffer(0); + ArrowBuf offsetBuffer = allocator.buffer(0); + ArrowBuf sizeBuffer = allocator.buffer(0)) { + listViewVector.loadFieldBuffers( + new ArrowFieldNode(0, 0), Arrays.asList(validityBuffer, offsetBuffer, sizeBuffer)); + + assertEquals(0, listViewVector.getValueCount()); + assertEquals(0, listViewVector.getOffsetBuffer().capacity()); + assertEquals(0, listViewVector.getSizeBuffer().capacity()); + assertEmptyPosition(new UnionListViewReader(listViewVector)); + } + } + + @Test + public void testUnionLargeListViewReaderSetPositionOnEmptyIpcBuffers() { + try (LargeListViewVector listViewVector = + LargeListViewVector.empty("largeListView", allocator); + ArrowBuf validityBuffer = allocator.buffer(0); + ArrowBuf offsetBuffer = allocator.buffer(0); + ArrowBuf sizeBuffer = allocator.buffer(0)) { + listViewVector.loadFieldBuffers( + new ArrowFieldNode(0, 0), Arrays.asList(validityBuffer, offsetBuffer, sizeBuffer)); + + assertEquals(0, listViewVector.getValueCount()); + assertEquals(0, listViewVector.getOffsetBuffer().capacity()); + assertEquals(0, listViewVector.getSizeBuffer().capacity()); + assertEmptyPosition(new UnionLargeListViewReader(listViewVector)); + } + } + + @Test + public void testUnionListReaderSetPositionChecksIpcOffsetBuffer() { + try (ListVector listVector = ListVector.empty("list", allocator); + ArrowBuf validityBuffer = allocator.buffer(1); + ArrowBuf offsetBuffer = allocator.buffer(BaseRepeatedValueVector.OFFSET_WIDTH)) { + validityBuffer.setByte(0, 1); + validityBuffer.writerIndex(1); + offsetBuffer.setInt(0, 0); + offsetBuffer.writerIndex(BaseRepeatedValueVector.OFFSET_WIDTH); + listVector.loadFieldBuffers( + new ArrowFieldNode(1, 0), Arrays.asList(validityBuffer, offsetBuffer)); + + IndexOutOfBoundsException exception = + assertThrows( + IndexOutOfBoundsException.class, + () -> new UnionListReader(listVector).setPosition(0)); + assertTrue(exception.getMessage().contains("Offset buffer has capacity")); + } + } + + @Test + public void testUnionLargeListReaderSetPositionChecksIpcOffsetBuffer() { + try (LargeListVector listVector = LargeListVector.empty("largeList", allocator); + ArrowBuf validityBuffer = allocator.buffer(1); + ArrowBuf offsetBuffer = allocator.buffer(LargeListVector.OFFSET_WIDTH)) { + validityBuffer.setByte(0, 1); + validityBuffer.writerIndex(1); + offsetBuffer.setLong(0, 0); + offsetBuffer.writerIndex(LargeListVector.OFFSET_WIDTH); + listVector.loadFieldBuffers( + new ArrowFieldNode(1, 0), Arrays.asList(validityBuffer, offsetBuffer)); + + IndexOutOfBoundsException exception = + assertThrows( + IndexOutOfBoundsException.class, + () -> new UnionLargeListReader(listVector).setPosition(0)); + assertTrue(exception.getMessage().contains("Offset buffer has capacity")); + } + } + + @Test + public void testUnionListViewReaderSetPositionChecksIpcBuffers() { + try (ListViewVector listViewVector = ListViewVector.empty("listView", allocator); + ArrowBuf validityBuffer = allocator.buffer(1); + ArrowBuf offsetBuffer = allocator.buffer(0); + ArrowBuf sizeBuffer = allocator.buffer(ListViewVector.SIZE_WIDTH)) { + validityBuffer.setByte(0, 1); + validityBuffer.writerIndex(1); + sizeBuffer.setInt(0, 0); + sizeBuffer.writerIndex(ListViewVector.SIZE_WIDTH); + listViewVector.loadFieldBuffers( + new ArrowFieldNode(1, 0), Arrays.asList(validityBuffer, offsetBuffer, sizeBuffer)); + + IndexOutOfBoundsException exception = + assertThrows( + IndexOutOfBoundsException.class, + () -> new UnionListViewReader(listViewVector).setPosition(0)); + assertTrue(exception.getMessage().contains("Offset buffer has capacity")); + } + + try (ListViewVector listViewVector = ListViewVector.empty("listView", allocator); + ArrowBuf validityBuffer = allocator.buffer(1); + ArrowBuf offsetBuffer = allocator.buffer(ListViewVector.OFFSET_WIDTH); + ArrowBuf sizeBuffer = allocator.buffer(0)) { + validityBuffer.setByte(0, 1); + validityBuffer.writerIndex(1); + offsetBuffer.setInt(0, 0); + offsetBuffer.writerIndex(ListViewVector.OFFSET_WIDTH); + listViewVector.loadFieldBuffers( + new ArrowFieldNode(1, 0), Arrays.asList(validityBuffer, offsetBuffer, sizeBuffer)); + + IndexOutOfBoundsException exception = + assertThrows( + IndexOutOfBoundsException.class, + () -> new UnionListViewReader(listViewVector).setPosition(0)); + assertTrue(exception.getMessage().contains("Size buffer has capacity")); + } + } + + @Test + public void testUnionLargeListViewReaderSetPositionChecksIpcBuffers() { + try (LargeListViewVector listViewVector = + LargeListViewVector.empty("largeListView", allocator); + ArrowBuf validityBuffer = allocator.buffer(1); + ArrowBuf offsetBuffer = allocator.buffer(0); + ArrowBuf sizeBuffer = allocator.buffer(LargeListViewVector.SIZE_WIDTH)) { + validityBuffer.setByte(0, 1); + validityBuffer.writerIndex(1); + sizeBuffer.setLong(0, 0); + sizeBuffer.writerIndex(LargeListViewVector.SIZE_WIDTH); + listViewVector.loadFieldBuffers( + new ArrowFieldNode(1, 0), Arrays.asList(validityBuffer, offsetBuffer, sizeBuffer)); + + IndexOutOfBoundsException exception = + assertThrows( + IndexOutOfBoundsException.class, + () -> new UnionLargeListViewReader(listViewVector).setPosition(0)); + assertTrue(exception.getMessage().contains("Offset buffer has capacity")); + } + + try (LargeListViewVector listViewVector = + LargeListViewVector.empty("largeListView", allocator); + ArrowBuf validityBuffer = allocator.buffer(1); + ArrowBuf offsetBuffer = allocator.buffer(LargeListViewVector.OFFSET_WIDTH); + ArrowBuf sizeBuffer = allocator.buffer(0)) { + validityBuffer.setByte(0, 1); + validityBuffer.writerIndex(1); + offsetBuffer.setLong(0, 0); + offsetBuffer.writerIndex(LargeListViewVector.OFFSET_WIDTH); + listViewVector.loadFieldBuffers( + new ArrowFieldNode(1, 0), Arrays.asList(validityBuffer, offsetBuffer, sizeBuffer)); + + IndexOutOfBoundsException exception = + assertThrows( + IndexOutOfBoundsException.class, + () -> new UnionLargeListViewReader(listViewVector).setPosition(0)); + assertTrue(exception.getMessage().contains("Size buffer has capacity")); + } + } + private void assertEmptyPosition(FieldReader reader) { reader.setPosition(0); assertEquals(0, reader.size());