-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-3122: Correct V2 page header compression fields for zero-size data pages #3148
base: master
Are you sure you want to change the base?
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55,7 +55,11 @@ public byte[] decrypt(byte[] lengthAndCiphertext, byte[] AAD) { | |
public byte[] decrypt(byte[] ciphertext, int cipherTextOffset, int cipherTextLength, byte[] AAD) { | ||
|
||
int plainTextLength = cipherTextLength - NONCE_LENGTH; | ||
if (plainTextLength < 1) { | ||
if (plainTextLength == 0) { | ||
return new byte[0]; | ||
} | ||
|
||
if (plainTextLength < 0) { | ||
throw new ParquetCryptoRuntimeException("Wrong input length " + plainTextLength); | ||
} | ||
|
||
|
@@ -91,7 +95,11 @@ public ByteBuffer decrypt(ByteBuffer ciphertext, byte[] AAD) { | |
int cipherTextLength = ciphertext.limit() - ciphertext.position() - SIZE_LENGTH; | ||
|
||
int plainTextLength = cipherTextLength - NONCE_LENGTH; | ||
if (plainTextLength < 1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
if (plainTextLength == 0) { | ||
return ByteBuffer.allocate(0); | ||
} | ||
|
||
if (plainTextLength < 0) { | ||
throw new ParquetCryptoRuntimeException("Wrong input length " + plainTextLength); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,7 +51,8 @@ public byte[] decrypt(byte[] lengthAndCiphertext, byte[] AAD) { | |
public byte[] decrypt(byte[] ciphertext, int cipherTextOffset, int cipherTextLength, byte[] AAD) { | ||
|
||
int plainTextLength = cipherTextLength - GCM_TAG_LENGTH - NONCE_LENGTH; | ||
if (plainTextLength < 1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AES GCM decryption is able to handle an empty plaintext. It will also use the key to check the IV/tag integrity. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the suggestions. Updated. |
||
|
||
if (plainTextLength < 0) { | ||
throw new ParquetCryptoRuntimeException("Wrong input length " + plainTextLength); | ||
} | ||
|
||
|
@@ -81,7 +82,8 @@ public ByteBuffer decrypt(ByteBuffer ciphertext, byte[] AAD) { | |
int cipherTextOffset = SIZE_LENGTH; | ||
int cipherTextLength = ciphertext.limit() - ciphertext.position() - SIZE_LENGTH; | ||
int plainTextLength = cipherTextLength - GCM_TAG_LENGTH - NONCE_LENGTH; | ||
if (plainTextLength < 1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
|
||
if (plainTextLength < 0) { | ||
throw new ParquetCryptoRuntimeException("Wrong input length " + plainTextLength); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1981,7 +1981,8 @@ public void writeDataPageV2Header( | |
rowCount, | ||
dataEncoding, | ||
rlByteLength, | ||
dlByteLength), | ||
dlByteLength, | ||
true /* compressed by default */), | ||
to); | ||
} | ||
|
||
|
@@ -2059,6 +2060,10 @@ public void writeDataPageV1Header( | |
pageHeaderAAD); | ||
} | ||
|
||
/** | ||
* @deprecated will be removed in 2.0.0. Use {@link ParquetMetadataConverter#writeDataPageV2Header(int, int, int, int, int, org.apache.parquet.column.Encoding, int, int, boolean, OutputStream)} instead | ||
*/ | ||
@Deprecated | ||
public void writeDataPageV2Header( | ||
int uncompressedSize, | ||
int compressedSize, | ||
|
@@ -2079,11 +2084,16 @@ public void writeDataPageV2Header( | |
dataEncoding, | ||
rlByteLength, | ||
dlByteLength, | ||
true, /* compressed by default */ | ||
to, | ||
null, | ||
null); | ||
} | ||
|
||
/** | ||
* @deprecated will be removed in 2.0.0. Use {@link ParquetMetadataConverter#writeDataPageV2Header(int, int, int, int, int, org.apache.parquet.column.Encoding, int, int, boolean, OutputStream, BlockCipher.Encryptor, byte[])} instead | ||
*/ | ||
@Deprecated | ||
public void writeDataPageV2Header( | ||
int uncompressedSize, | ||
int compressedSize, | ||
|
@@ -2097,35 +2107,53 @@ public void writeDataPageV2Header( | |
BlockCipher.Encryptor blockEncryptor, | ||
byte[] pageHeaderAAD) | ||
throws IOException { | ||
writePageHeader( | ||
newDataPageV2Header( | ||
uncompressedSize, | ||
compressedSize, | ||
valueCount, | ||
nullCount, | ||
rowCount, | ||
dataEncoding, | ||
rlByteLength, | ||
dlByteLength), | ||
writeDataPageV2Header( | ||
uncompressedSize, | ||
compressedSize, | ||
valueCount, | ||
nullCount, | ||
rowCount, | ||
dataEncoding, | ||
rlByteLength, | ||
dlByteLength, | ||
true, /* compressed by default */ | ||
to, | ||
blockEncryptor, | ||
pageHeaderAAD); | ||
} | ||
|
||
private PageHeader newDataPageV2Header( | ||
/** | ||
* @deprecated will be removed in 2.0.0. Use {@link ParquetMetadataConverter#writeDataPageV2Header(int, int, int, int, int, org.apache.parquet.column.Encoding, int, int, boolean, int, OutputStream, BlockCipher.Encryptor, byte[])} instead | ||
*/ | ||
@Deprecated | ||
public void writeDataPageV2Header( | ||
int uncompressedSize, | ||
int compressedSize, | ||
int valueCount, | ||
int nullCount, | ||
int rowCount, | ||
org.apache.parquet.column.Encoding dataEncoding, | ||
int rlByteLength, | ||
int dlByteLength) { | ||
DataPageHeaderV2 dataPageHeaderV2 = new DataPageHeaderV2( | ||
valueCount, nullCount, rowCount, getEncoding(dataEncoding), dlByteLength, rlByteLength); | ||
PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE_V2, uncompressedSize, compressedSize); | ||
pageHeader.setData_page_header_v2(dataPageHeaderV2); | ||
return pageHeader; | ||
int dlByteLength, | ||
int crc, | ||
OutputStream to, | ||
BlockCipher.Encryptor blockEncryptor, | ||
byte[] pageHeaderAAD) | ||
throws IOException { | ||
writeDataPageV2Header( | ||
uncompressedSize, | ||
compressedSize, | ||
valueCount, | ||
nullCount, | ||
rowCount, | ||
dataEncoding, | ||
rlByteLength, | ||
dlByteLength, | ||
true, /* compressed by default */ | ||
crc, | ||
to, | ||
blockEncryptor, | ||
pageHeaderAAD); | ||
} | ||
|
||
public void writeDataPageV2Header( | ||
|
@@ -2137,7 +2165,34 @@ public void writeDataPageV2Header( | |
org.apache.parquet.column.Encoding dataEncoding, | ||
int rlByteLength, | ||
int dlByteLength, | ||
int crc, | ||
boolean compressed, | ||
OutputStream to) | ||
throws IOException { | ||
writeDataPageV2Header( | ||
uncompressedSize, | ||
compressedSize, | ||
valueCount, | ||
nullCount, | ||
rowCount, | ||
dataEncoding, | ||
rlByteLength, | ||
dlByteLength, | ||
compressed, | ||
to, | ||
null, | ||
null); | ||
} | ||
|
||
public void writeDataPageV2Header( | ||
int uncompressedSize, | ||
int compressedSize, | ||
int valueCount, | ||
int nullCount, | ||
int rowCount, | ||
org.apache.parquet.column.Encoding dataEncoding, | ||
int rlByteLength, | ||
int dlByteLength, | ||
boolean compressed, | ||
OutputStream to, | ||
BlockCipher.Encryptor blockEncryptor, | ||
byte[] pageHeaderAAD) | ||
|
@@ -2152,12 +2207,43 @@ public void writeDataPageV2Header( | |
dataEncoding, | ||
rlByteLength, | ||
dlByteLength, | ||
crc), | ||
compressed), | ||
to, | ||
blockEncryptor, | ||
pageHeaderAAD); | ||
} | ||
|
||
public void writeDataPageV2Header( | ||
int uncompressedSize, | ||
int compressedSize, | ||
int valueCount, | ||
int nullCount, | ||
int rowCount, | ||
org.apache.parquet.column.Encoding dataEncoding, | ||
int rlByteLength, | ||
int dlByteLength, | ||
boolean compressed, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
int crc, | ||
OutputStream to, | ||
BlockCipher.Encryptor blockEncryptor, | ||
byte[] pageHeaderAAD) | ||
throws IOException { | ||
PageHeader pageHeader = newDataPageV2Header( | ||
uncompressedSize, | ||
compressedSize, | ||
valueCount, | ||
nullCount, | ||
rowCount, | ||
dataEncoding, | ||
rlByteLength, | ||
dlByteLength, | ||
compressed); | ||
|
||
pageHeader.setCrc(crc); | ||
|
||
writePageHeader(pageHeader, to, blockEncryptor, pageHeaderAAD); | ||
} | ||
|
||
private PageHeader newDataPageV2Header( | ||
int uncompressedSize, | ||
int compressedSize, | ||
|
@@ -2167,12 +2253,13 @@ private PageHeader newDataPageV2Header( | |
org.apache.parquet.column.Encoding dataEncoding, | ||
int rlByteLength, | ||
int dlByteLength, | ||
int crc) { | ||
boolean compressed) { | ||
DataPageHeaderV2 dataPageHeaderV2 = new DataPageHeaderV2( | ||
valueCount, nullCount, rowCount, getEncoding(dataEncoding), dlByteLength, rlByteLength); | ||
dataPageHeaderV2.setIs_compressed(compressed); | ||
|
||
PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE_V2, uncompressedSize, compressedSize); | ||
pageHeader.setData_page_header_v2(dataPageHeaderV2); | ||
pageHeader.setCrc(crc); | ||
return pageHeader; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
decryptor doesn't support decrypting zero-byte data, fixes here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ggershinsky Does this look ok to you?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yep, technically this should work ok - unlike GCM, the CTR mode doesn't guarantee/check the integrity, so we don't need to protect against page replacement attacks (we can't anyway).
In terms of implementation, it might be a bit cleaner if we use cipher decryption of zero-sized arrays; fewer lines of code. But I'll verify first if this is supported in CTR, will get back on that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, CTR can handle this as well. So the fix can simply be a replacement of
if (plainTextLength < 1)
withif (plainTextLength < 0)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated