Skip to content

Commit

Permalink
fix: For failure in reading dictionary encoded parquet strings (deeph…
Browse files Browse the repository at this point in the history
  • Loading branch information
malhotrashivam authored Aug 15, 2024
1 parent c74e570 commit 9ca4332
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -267,9 +267,7 @@ public ColumnPageReader next(@NotNull final SeekableChannelContext channelContex
final long dataOffset = ch.position();
nextHeaderOffset = dataOffset + pageHeader.getCompressed_page_size();
final PageType pageType = pageHeader.type;
if (pageType == PageType.DICTIONARY_PAGE && headerOffset == columnChunk.meta_data.getData_page_offset()
&& columnChunk.meta_data.getDictionary_page_offset() == 0) {
// https://stackoverflow.com/questions/55225108/why-is-dictionary-page-offset-0-for-plain-dictionary-encoding
if (pageType == PageType.DICTIONARY_PAGE) {
// Skip the dictionary page and jump to the data page
return next(holder.get());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,20 @@ public void readSampleParquetFilesFromPublicS3Part2() {
.head(10).select();
}

@Test
public void readSampleParquetFilesFromPublicS3Part3() {
Assume.assumeTrue("Skipping test because s3 testing disabled.", ENABLE_REMOTE_S3_TESTING);
final S3Instructions s3Instructions = S3Instructions.builder()
.regionName("us-east-1")
.readTimeout(Duration.ofSeconds(60))
.credentials(Credentials.anonymous())
.build();
final ParquetInstructions readInstructions = new ParquetInstructions.Builder()
.setSpecialInstructions(s3Instructions)
.build();
readTable("s3://redshift-downloads/redset/serverless/full.parquet", readInstructions).head(10).select();
}

@Test
public void readKeyValuePartitionedParquetFromPublicS3() {
Assume.assumeTrue("Skipping test because s3 testing disabled.", ENABLE_REMOTE_S3_TESTING);
Expand Down

0 comments on commit 9ca4332

Please sign in to comment.