Skip to content

Commit 1b42a38

Browse files
committed
#740 Make sure unsigned binary fields can fit data types.
1 parent 00e2674 commit 1b42a38

File tree

12 files changed

+174
-108
lines changed

12 files changed

+174
-108
lines changed

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryNumberDecoders.scala

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,14 @@ object BinaryNumberDecoders {
8282
if (v<0) null else v
8383
}
8484

85+
def decodeBinaryUnsignedIntBigEndianAsLong(bytes: Array[Byte]): java.lang.Long = {
86+
if (bytes.length < 4) {
87+
return null
88+
}
89+
val v: Long = ((bytes(0) & 255L) << 24L) | ((bytes(1) & 255L) << 16L) | ((bytes(2) & 255L) << 8L) | (bytes(3) & 255L)
90+
if (v<0) null else v
91+
}
92+
8593
def decodeBinaryUnsignedIntLittleEndian(bytes: Array[Byte]): Integer = {
8694
if (bytes.length < 4) {
8795
return null
@@ -90,6 +98,14 @@ object BinaryNumberDecoders {
9098
if (v<0) null else v
9199
}
92100

101+
def decodeBinaryUnsignedIntLittleEndianAsLong(bytes: Array[Byte]): java.lang.Long = {
102+
if (bytes.length < 4) {
103+
return null
104+
}
105+
val v: Long = ((bytes(3) & 255L) << 24L) | ((bytes(2) & 255L) << 16L) | ((bytes(1) & 255L) << 8L) | (bytes(0) & 255L)
106+
if (v<0) null else v
107+
}
108+
93109
def decodeBinarySignedLongBigEndian(bytes: Array[Byte]): java.lang.Long = {
94110
if (bytes.length < 8) {
95111
return null
@@ -112,6 +128,13 @@ object BinaryNumberDecoders {
112128
if (v < 0L) null else v
113129
}
114130

131+
def decodeBinaryUnsignedLongBigEndianAsDecimal(bytes: Array[Byte]): BigDecimal = {
132+
if (bytes.length < 8) {
133+
return null
134+
}
135+
BigDecimal(BigInt(1, bytes).toString())
136+
}
137+
115138
def decodeBinaryUnsignedLongLittleEndian(bytes: Array[Byte]): java.lang.Long = {
116139
if (bytes.length < 8) {
117140
return null
@@ -120,6 +143,13 @@ object BinaryNumberDecoders {
120143
if (v < 0L) null else v
121144
}
122145

146+
def decodeBinaryUnsignedLongLittleEndianAsDecimal(bytes: Array[Byte]): BigDecimal = {
147+
if (bytes.length < 8) {
148+
return null
149+
}
150+
BigDecimal(BigInt(1, bytes.reverse).toString())
151+
}
152+
123153
def decodeBinaryAribtraryPrecision(bytes: Array[Byte], isBigEndian: Boolean, isSigned: Boolean): BigDecimal = {
124154
if (bytes.length == 0) {
125155
return null

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/DecoderSelector.scala

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package za.co.absa.cobrix.cobol.parser.decoders
1919
import java.nio.charset.{Charset, StandardCharsets}
2020
import za.co.absa.cobrix.cobol.parser.ast.datatype._
2121
import za.co.absa.cobrix.cobol.parser.common.Constants
22+
import za.co.absa.cobrix.cobol.parser.common.Constants.{maxIntegerPrecision, maxLongPrecision}
2223
import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat.FloatingPointFormat
2324
import za.co.absa.cobrix.cobol.parser.encoding._
2425
import za.co.absa.cobrix.cobol.parser.encoding.codepage.{CodePage, CodePageCommon}
@@ -255,26 +256,32 @@ object DecoderSelector {
255256
val isSigned = signPosition.nonEmpty
256257

257258
val numOfBytes = BinaryUtils.getBytesCount(compact, precision, isSigned, isExplicitDecimalPt = false, isSignSeparate = false)
259+
val isMaxUnsignedPrecision = precision == maxIntegerPrecision || precision == maxLongPrecision
260+
258261
val decoder = if (strictIntegralPrecision) {
259262
(a: Array[Byte]) => BinaryNumberDecoders.decodeBinaryAribtraryPrecision(a, isBigEndian, isSigned)
260263
} else {
261-
(isSigned, isBigEndian, numOfBytes) match {
262-
case (true, true, 1) => BinaryNumberDecoders.decodeSignedByte _
263-
case (true, true, 2) => BinaryNumberDecoders.decodeBinarySignedShortBigEndian _
264-
case (true, true, 4) => BinaryNumberDecoders.decodeBinarySignedIntBigEndian _
265-
case (true, true, 8) => BinaryNumberDecoders.decodeBinarySignedLongBigEndian _
266-
case (true, false, 1) => BinaryNumberDecoders.decodeSignedByte _
267-
case (true, false, 2) => BinaryNumberDecoders.decodeBinarySignedShortLittleEndian _
268-
case (true, false, 4) => BinaryNumberDecoders.decodeBinarySignedIntLittleEndian _
269-
case (true, false, 8) => BinaryNumberDecoders.decodeBinarySignedLongLittleEndian _
270-
case (false, true, 1) => BinaryNumberDecoders.decodeUnsignedByte _
271-
case (false, true, 2) => BinaryNumberDecoders.decodeBinaryUnsignedShortBigEndian _
272-
case (false, true, 4) => BinaryNumberDecoders.decodeBinaryUnsignedIntBigEndian _
273-
case (false, true, 8) => BinaryNumberDecoders.decodeBinaryUnsignedLongBigEndian _
274-
case (false, false, 1) => BinaryNumberDecoders.decodeUnsignedByte _
275-
case (false, false, 2) => BinaryNumberDecoders.decodeBinaryUnsignedShortLittleEndian _
276-
case (false, false, 4) => BinaryNumberDecoders.decodeBinaryUnsignedIntLittleEndian _
277-
case (false, false, 8) => BinaryNumberDecoders.decodeBinaryUnsignedLongLittleEndian _
264+
(isSigned, isBigEndian, isMaxUnsignedPrecision, numOfBytes) match {
265+
case (true, true, _, 1) => BinaryNumberDecoders.decodeSignedByte _
266+
case (true, true, _, 2) => BinaryNumberDecoders.decodeBinarySignedShortBigEndian _
267+
case (true, true, _, 4) => BinaryNumberDecoders.decodeBinarySignedIntBigEndian _
268+
case (true, true, _, 8) => BinaryNumberDecoders.decodeBinarySignedLongBigEndian _
269+
case (true, false, _, 1) => BinaryNumberDecoders.decodeSignedByte _
270+
case (true, false, _, 2) => BinaryNumberDecoders.decodeBinarySignedShortLittleEndian _
271+
case (true, false, _, 4) => BinaryNumberDecoders.decodeBinarySignedIntLittleEndian _
272+
case (true, false, _, 8) => BinaryNumberDecoders.decodeBinarySignedLongLittleEndian _
273+
case (false, true, _, 1) => BinaryNumberDecoders.decodeUnsignedByte _
274+
case (false, true, _, 2) => BinaryNumberDecoders.decodeBinaryUnsignedShortBigEndian _
275+
case (false, true, false, 4) => BinaryNumberDecoders.decodeBinaryUnsignedIntBigEndian _
276+
case (false, true, true, 4) => BinaryNumberDecoders.decodeBinaryUnsignedIntBigEndianAsLong _
277+
case (false, true, false, 8) => BinaryNumberDecoders.decodeBinaryUnsignedLongBigEndian _
278+
case (false, true, true, 8) => BinaryNumberDecoders.decodeBinaryUnsignedLongBigEndianAsDecimal _
279+
case (false, false, _, 1) => BinaryNumberDecoders.decodeUnsignedByte _
280+
case (false, false, _, 2) => BinaryNumberDecoders.decodeBinaryUnsignedShortLittleEndian _
281+
case (false, false, false, 4) => BinaryNumberDecoders.decodeBinaryUnsignedIntLittleEndian _
282+
case (false, false, true, 4) => BinaryNumberDecoders.decodeBinaryUnsignedIntLittleEndianAsLong _
283+
case (false, false, false, 8) => BinaryNumberDecoders.decodeBinaryUnsignedLongLittleEndian _
284+
case (false, false, true, 8) => BinaryNumberDecoders.decodeBinaryUnsignedLongLittleEndianAsDecimal _
278285
case _ =>
279286
(a: Array[Byte]) => BinaryNumberDecoders.decodeBinaryAribtraryPrecision(a, isBigEndian, isSigned)
280287
}

cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryDecoderSpec.scala

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,10 +460,14 @@ class BinaryDecoderSpec extends AnyFunSuite {
460460
val decoderUnsignedShort = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 3, compact = Some(COMP5()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false)
461461
val decoderSignedInt = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 8, compact = Some(COMP4())), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false)
462462
val decoderUnsignedIntBe = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 8, compact = Some(COMP5()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false)
463+
val decoderUnsignedIntBeAsLong = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 9, compact = Some(COMP5()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false)
463464
val decoderUnsignedIntLe = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 8, compact = Some(COMP9()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false)
465+
val decoderUnsignedIntLeAsLong = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 9, compact = Some(COMP9()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false)
464466
val decoderSignedLong = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 15, compact = Some(COMP4())), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false)
465467
val decoderUnsignedLongBe = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 15, compact = Some(COMP5()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false)
468+
val decoderUnsignedLongBeAsBig = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 18, compact = Some(COMP5()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false)
466469
val decoderUnsignedLongLe = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 15, compact = Some(COMP9()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false)
470+
val decoderUnsignedLongLeAsBig = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 18, compact = Some(COMP9()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false)
467471

468472
val num1 = decoderSignedByte(Array(0x10).map(_.toByte))
469473
assert(num1.isInstanceOf[Integer])
@@ -501,10 +505,18 @@ class BinaryDecoderSpec extends AnyFunSuite {
501505
assert(num9.isInstanceOf[Integer])
502506
assert(num9.asInstanceOf[Integer] == 9437184)
503507

508+
val num9a = decoderUnsignedIntBeAsLong(Array(0x00, 0x90, 0x00, 0x00).map(_.toByte))
509+
assert(num9a.isInstanceOf[java.lang.Long])
510+
assert(num9a.asInstanceOf[java.lang.Long] == 9437184L)
511+
504512
val num10 = decoderUnsignedIntLe(Array(0x00, 0x00, 0x90, 0x00).map(_.toByte))
505513
assert(num10.isInstanceOf[Integer])
506514
assert(num10.asInstanceOf[Integer] == 9437184)
507515

516+
val num10a = decoderUnsignedIntLeAsLong(Array(0x00, 0x00, 0x90, 0x00).map(_.toByte))
517+
assert(num10a.isInstanceOf[java.lang.Long])
518+
assert(num10a.asInstanceOf[java.lang.Long] == 9437184L)
519+
508520
val num11 = decoderSignedLong(Array(0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00).map(_.toByte))
509521
assert(num11.isInstanceOf[Long])
510522
assert(num11.asInstanceOf[Long] == 72057594037927936L)
@@ -517,9 +529,17 @@ class BinaryDecoderSpec extends AnyFunSuite {
517529
assert(num13.isInstanceOf[Long])
518530
assert(num13.asInstanceOf[Long] == 40532396646334464L)
519531

532+
val num13a = decoderUnsignedLongBeAsBig(Array(0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00).map(_.toByte))
533+
assert(num13a.isInstanceOf[BigDecimal])
534+
assert(num13a.asInstanceOf[BigDecimal] == BigDecimal("40532396646334464"))
535+
520536
val num14 = decoderUnsignedLongLe(Array(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00).map(_.toByte))
521537
assert(num14.isInstanceOf[Long])
522538
assert(num14.asInstanceOf[Long] == 40532396646334464L)
539+
540+
val num14a = decoderUnsignedLongLeAsBig(Array(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00).map(_.toByte))
541+
assert(num14a.isInstanceOf[BigDecimal])
542+
assert(num14a.asInstanceOf[BigDecimal] == BigDecimal("40532396646334464"))
523543
}
524544

525545
test("Test Binary strict integral precision numbers") {

data/test17_expected/test17a_schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
}
4141
}, {
4242
"name" : "TAXPAYER",
43-
"type" : "integer",
43+
"type" : "long",
4444
"nullable" : true,
4545
"metadata" : { }
4646
} ]

data/test17_expected/test17b_schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
}
6262
}, {
6363
"name" : "TAXPAYER",
64-
"type" : "integer",
64+
"type" : "long",
6565
"nullable" : true,
6666
"metadata" : { }
6767
} ]

data/test17_expected/test17c_schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
}
4141
}, {
4242
"name" : "TAXPAYER",
43-
"type" : "integer",
43+
"type" : "long",
4444
"nullable" : true,
4545
"metadata" : { }
4646
}, {

data/test18 special_char_expected/test18a_schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
}
4141
}, {
4242
"name" : "TAXPAYER",
43-
"type" : "integer",
43+
"type" : "long",
4444
"nullable" : true,
4545
"metadata" : { }
4646
} ]

data/test24_expected/test24_schema.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,7 @@
712712
}
713713
}, {
714714
"name" : "NUM_BIN_INT07",
715-
"type" : "integer",
715+
"type" : "long",
716716
"nullable" : true,
717717
"metadata" : { }
718718
}, {
@@ -760,7 +760,7 @@
760760
}
761761
}, {
762762
"name" : "NUM_BIN_INT11",
763-
"type" : "long",
763+
"type" : "decimal(20,0)",
764764
"nullable" : true,
765765
"metadata" : { }
766766
}, {

data/test24_expected/test24b_schema.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,7 @@
712712
}
713713
}, {
714714
"name" : "NUM_BIN_INT07",
715-
"type" : "integer",
715+
"type" : "long",
716716
"nullable" : true,
717717
"metadata" : { }
718718
}, {
@@ -760,7 +760,7 @@
760760
}
761761
}, {
762762
"name" : "NUM_BIN_INT11",
763-
"type" : "long",
763+
"type" : "decimal(20,0)",
764764
"nullable" : true,
765765
"metadata" : { }
766766
}, {

data/test6_expected/test6_schema.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@
299299
"metadata" : { }
300300
}, {
301301
"name" : "NUM_BIN_INT07",
302-
"type" : "integer",
302+
"type" : "long",
303303
"nullable" : true,
304304
"metadata" : { }
305305
}, {
@@ -319,7 +319,7 @@
319319
"metadata" : { }
320320
}, {
321321
"name" : "NUM_BIN_INT11",
322-
"type" : "long",
322+
"type" : "decimal(20,0)",
323323
"nullable" : true,
324324
"metadata" : { }
325325
}, {

0 commit comments

Comments
 (0)