Skip to content

Commit 3346e8d

Browse files
committed
#258 Remove dependency on scodec.
1 parent e1ebd95 commit 3346e8d

File tree

7 files changed

+79
-96
lines changed

7 files changed

+79
-96
lines changed

README.md

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -233,23 +233,22 @@ of the dependencies.
233233

234234
#### Getting all Cobrix dependencies
235235

236-
Cobrix's `spark-cobol` data source depends on the COBOL parser that is a part of Cobrix itself and on `scodec` libraries
237-
to decode various binary formats.
236+
Cobrix's `spark-cobol` data source depends on the COBOL parser that is a part of Cobrix itself.
238237

239238
The jars that you need to get are:
240239

241-
* spark-cobol_2.12-2.7.10.jar
242-
* cobol-parser_2.12-2.7.10.jar
243-
* scodec-core_2.12-1.10.3.jar
244-
* scodec-bits_2.12-1.1.4.jar
240+
* spark-cobol_2.12-2.8.0.jar
241+
* cobol-parser_2.12-2.8.0.jar
242+
243+
> Versions older than 2.8.0 also need `scodec-core_2.12-1.10.3.jar` and `scodec-bits_2.12-1.1.4.jar`.
245244
246245
> Versions older than 2.7.1 also need `antlr4-runtime-4.8.jar`.
247246
248247
After that you can specify these jars in `spark-shell` command line. Here is an example:
249248
```
250249
$ spark-shell --packages za.co.absa.cobrix:spark-cobol_2.12:2.7.10
251250
or
252-
$ spark-shell --master yarn --deploy-mode client --driver-cores 4 --driver-memory 4G --jars spark-cobol_2.12-2.7.10.jar,cobol-parser_2.12-2.7.10.jar,scodec-core_2.12-1.10.3.jar,scodec-bits_2.12-1.1.4.jar
251+
$ spark-shell --master yarn --deploy-mode client --driver-cores 4 --driver-memory 4G --jars spark-cobol_2.12-2.8.0.jar,cobol-parser_2.12-2.8.0.jar
253252
254253
Setting default log level to "WARN".
255254
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).

build.sbt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,6 @@ lazy val assemblySettings = Seq(
151151
assembly / assemblyShadeRules:= Seq(
152152
// Spark may rely on a different version of ANTLR runtime. Renaming the package helps avoid the binary incompatibility
153153
ShadeRule.rename("org.antlr.**" -> "za.co.absa.cobrix.cobol.parser.shaded.org.antlr.@1").inAll,
154-
// Shading all 3rd party libraries used by 'spark-cobol' in order to avoid binary conflicts.
155-
ShadeRule.rename("macrocompat.**" -> "za.co.absa.cobrix.spark.cobol.shaded.macrocompat.@1").inAll,
156-
ShadeRule.rename("scodec.**" -> "za.co.absa.cobrix.spark.cobol.shaded.scodec.@1").inAll,
157-
ShadeRule.rename("shapeless.**" -> "za.co.absa.cobrix.spark.cobol.shaded.shapeless.@1").inAll,
158154
// The SLF4j API and implementation are provided by Spark
159155
ShadeRule.zap("org.slf4j.**").inAll
160156
),

cobol-parser/pom.xml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,6 @@
3030
<packaging>jar</packaging>
3131

3232
<dependencies>
33-
<!-- binary codecs -->
34-
<dependency>
35-
<groupId>org.scodec</groupId>
36-
<artifactId>scodec-core_${scala.compat.version}</artifactId>
37-
</dependency>
3833
<!-- parser -->
3934
<dependency>
4035
<groupId>org.antlr</groupId>
@@ -46,6 +41,12 @@
4641
<groupId>org.slf4j</groupId>
4742
<artifactId>slf4j-api</artifactId>
4843
</dependency>
44+
<!-- binary codecs -->
45+
<dependency>
46+
<groupId>org.scodec</groupId>
47+
<artifactId>scodec-core_${scala.compat.version}</artifactId>
48+
<scope>test</scope>
49+
</dependency>
4950
<dependency>
5051
<groupId>org.slf4j</groupId>
5152
<artifactId>slf4j-simple</artifactId>

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryUtils.scala

Lines changed: 0 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,13 @@
1616

1717
package za.co.absa.cobrix.cobol.parser.decoders
1818

19-
import scodec.Codec
20-
import scodec.bits.BitVector
2119
import za.co.absa.cobrix.cobol.parser.ast.datatype._
2220
import za.co.absa.cobrix.cobol.parser.common.Constants
2321
import za.co.absa.cobrix.cobol.parser.encoding.{EBCDIC, Encoding}
2422

25-
import scala.util.control.NonFatal
26-
2723
/** Utilites for decoding Cobol binary data files **/
2824
//noinspection RedundantBlock
2925
object BinaryUtils {
30-
31-
lazy val floatB: Codec[Float] = scodec.codecs.float
32-
lazy val floatL: Codec[Float] = scodec.codecs.floatL
33-
lazy val doubleB: Codec[Double] = scodec.codecs.double
34-
lazy val doubleL: Codec[Double] = scodec.codecs.doubleL
35-
3626
/**
3727
* This is the EBCDIC to ASCII conversion table. This is an "invariant" subset of EBCDIC code pages.
3828
* For full EBCDIC code pages support please use [[za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePage]]
@@ -105,25 +95,6 @@ object BinaryUtils {
10595
/** Convert an ASCII character to EBCDIC */
10696
def asciiToEbcdic(char: Char): Byte = ascii2ebcdic(char.toByte)
10797

108-
/** Get the bit count of a cobol data type
109-
*
110-
* @param codec EBCDIC / ASCII
111-
* @param comp A type of compact stirage
112-
* @param precision The precision (the number of digits) of the type
113-
* @return
114-
*/
115-
def getBitCount(codec: Codec[_ <: AnyVal], comp: Option[Int], precision: Int): Int = {
116-
comp match {
117-
case Some(value) =>
118-
value match {
119-
case compact if compact == 3 =>
120-
(precision + 1) * codec.sizeBound.lowerBound.toInt //bcd
121-
case _ => codec.sizeBound.lowerBound.toInt // bin/float/floatL
122-
}
123-
case None => precision * codec.sizeBound.lowerBound.toInt
124-
}
125-
}
126-
12798
def getBytesCount(compression: Option[Usage], precision: Int, isSigned: Boolean, isExplicitDecimalPt: Boolean, isSignSeparate: Boolean): Int = {
12899
import Constants._
129100
val isRealSigned = if (isSignSeparate) false else isSigned
@@ -273,32 +244,4 @@ object BinaryUtils {
273244
}
274245
addDecimalPoint(value.toString, scale, scaleFactor)
275246
}
276-
277-
/**
278-
* A decoder for IEEE-754 big endian floats
279-
*
280-
* @param bytes A byte array that represents the binary data
281-
* @return A boxed float
282-
*/
283-
def decodeFloat(bytes: Array[Byte]): java.lang.Float = {
284-
try {
285-
floatB.decode(BitVector(bytes)).require.value
286-
} catch {
287-
case NonFatal(_) => null
288-
}
289-
}
290-
291-
/**
292-
* A decoder for IEEE-754 big endian doubles
293-
*
294-
* @param bytes A byte array that represents the binary data
295-
* @return A boxed double
296-
*/
297-
def decodeDouble(bytes: Array[Byte]): java.lang.Double = {
298-
try {
299-
doubleB.decode(BitVector(bytes)).require.value
300-
} catch {
301-
case NonFatal(_) => null
302-
}
303-
}
304247
}

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/FloatingPointDecoders.scala

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,72 @@
1616

1717
package za.co.absa.cobrix.cobol.parser.decoders
1818

19-
import scodec.Codec
20-
import scodec.bits.BitVector
21-
19+
import java.nio.{ByteBuffer, ByteOrder}
2220
import scala.util.control.NonFatal
2321

2422
object FloatingPointDecoders {
25-
private val floatB: Codec[Float] = scodec.codecs.float
26-
private val floatL: Codec[Float] = scodec.codecs.floatL
27-
private val doubleB: Codec[Double] = scodec.codecs.double
28-
private val doubleL: Codec[Double] = scodec.codecs.doubleL
29-
3023
private val BIT_COUNT_MAGIC = 0x000055AFL
3124

25+
/**
26+
* A decoder for IEEE-754 32 bit big endian floats
27+
*
28+
* @param bytes A byte array that represents the binary data
29+
* @return A boxed float
30+
*/
31+
def decodeFloatB(bytes: Array[Byte]): Float = {
32+
require(bytes.length == 4, "Input must be exactly 4 bytes for a 32-bit float")
33+
34+
val byteBuffer = ByteBuffer.wrap(bytes)
35+
byteBuffer.order(ByteOrder.BIG_ENDIAN)
36+
byteBuffer.getFloat
37+
}
38+
39+
/**
40+
* A decoder for IEEE-754 32 bit little endian floats
41+
*
42+
* @param bytes A byte array that represents the binary data
43+
* @return A boxed float
44+
*/
45+
def decodeFloatL(bytes: Array[Byte]): Float = {
46+
require(bytes.length == 4, "Input must be exactly 4 bytes for a 32-bit float")
47+
48+
val byteBuffer = ByteBuffer.wrap(bytes)
49+
byteBuffer.order(ByteOrder.LITTLE_ENDIAN)
50+
byteBuffer.getFloat
51+
}
52+
53+
/**
54+
* A decoder for IEEE-754 64 bit big endian floats
55+
*
56+
* @param bytes A byte array that represents the binary data
57+
* @return A boxed float
58+
*/
59+
def decodeDoubleB(bytes: Array[Byte]): Double = {
60+
require(bytes.length == 8, "Input must be exactly 8 bytes for a 64-bit float")
61+
62+
val byteBuffer = ByteBuffer.wrap(bytes)
63+
byteBuffer.order(ByteOrder.BIG_ENDIAN)
64+
byteBuffer.getDouble
65+
}
66+
67+
/**
68+
* A decoder for IEEE-754 64 bit little endian floats
69+
*
70+
* @param bytes A byte array that represents the binary data
71+
* @return A boxed float
72+
*/
73+
def decodeDoubleL(bytes: Array[Byte]): Double = {
74+
require(bytes.length == 8, "Input must be exactly 8 bytes for a 64-bit float")
75+
76+
val byteBuffer = ByteBuffer.wrap(bytes)
77+
byteBuffer.order(ByteOrder.LITTLE_ENDIAN)
78+
byteBuffer.getDouble
79+
}
80+
3281
/** Decode IEEE754 single precision big endian encoded number. */
3382
def decodeIeee754SingleBigEndian(bytes: Array[Byte]): java.lang.Float = {
3483
try {
35-
floatB.decode(BitVector(bytes)).require.value
84+
decodeFloatB(bytes)
3685
} catch {
3786
case NonFatal(_) => null
3887
}
@@ -41,7 +90,7 @@ object FloatingPointDecoders {
4190
/** Decode IEEE754 double precision big endian encoded number. */
4291
def decodeIeee754DoubleBigEndian(bytes: Array[Byte]): java.lang.Double = {
4392
try {
44-
doubleB.decode(BitVector(bytes)).require.value
93+
decodeDoubleB(bytes)
4594
} catch {
4695
case NonFatal(_) => null
4796
}
@@ -50,7 +99,7 @@ object FloatingPointDecoders {
5099
/** Decode IEEE754 single precision little endian encoded number. */
51100
def decodeIeee754SingleLittleEndian(bytes: Array[Byte]): java.lang.Float = {
52101
try {
53-
floatL.decode(BitVector(bytes)).require.value
102+
decodeFloatL(bytes)
54103
} catch {
55104
case NonFatal(_) => null
56105
}
@@ -59,7 +108,7 @@ object FloatingPointDecoders {
59108
/** Decode IEEE754 double precision little endian encoded number. */
60109
def decodeIeee754DoubleLittleEndian(bytes: Array[Byte]): java.lang.Double = {
61110
try {
62-
doubleL.decode(BitVector(bytes)).require.value
111+
decodeDoubleL(bytes)
63112
} catch {
64113
case NonFatal(_) => null
65114
}

pom.xml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,7 @@
117117
<jackson.version>2.13.1</jackson.version>
118118
<mockito.version>4.11.0</mockito.version>
119119
<scala_logging.version>3.7.2</scala_logging.version>
120-
<scodec_bits.version>1.1.4</scodec_bits.version>
121-
<scodec_core.version>1.10.3</scodec_core.version>
120+
<scodec_core.version>1.11.10</scodec_core.version>
122121
<slf4j.version>1.7.25</slf4j.version>
123122
</properties>
124123

@@ -227,15 +226,11 @@
227226
<artifactId>jul-to-slf4j</artifactId>
228227
<version>${slf4j.version}</version>
229228
</dependency>
230-
<dependency>
231-
<groupId>org.scodec</groupId>
232-
<artifactId>scodec-bits_${scala.compat.version}</artifactId>
233-
<version>${scodec_bits.version}</version>
234-
</dependency>
235229
<dependency>
236230
<groupId>org.scodec</groupId>
237231
<artifactId>scodec-core_${scala.compat.version}</artifactId>
238232
<version>${scodec_core.version}</version>
233+
<scope>test</scope>
239234
</dependency>
240235

241236
<!-- Test scope dependencies -->

project/Dependencies.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,14 @@ object Dependencies {
7272

7373
val CobolParserDependencies: Seq[ModuleID] = Seq(
7474
// compile
75-
"org.scodec" %% "scodec-core" % scodecCoreVersion excludeAll(ExclusionRule(organization = "org.scala-lang")),
7675
"org.antlr" % "antlr4-runtime" % antlrValue,
7776
"org.slf4j" % "slf4j-api" % slf4jVersion,
7877

7978
// test
80-
"org.scalatest" %% "scalatest" % scalatestVersion % Test,
81-
"org.mockito" % "mockito-core" % mockitoVersion % Test,
82-
"org.slf4j" % "slf4j-simple" % slf4jVersion % Test
79+
"org.scalatest" %% "scalatest" % scalatestVersion % Test,
80+
"org.mockito" % "mockito-core" % mockitoVersion % Test,
81+
"org.scodec" %% "scodec-core" % scodecCoreVersion % Test,
82+
"org.slf4j" % "slf4j-simple" % slf4jVersion % Test
8383
)
8484

8585
val CobolParserShadedDependencies: Set[ModuleID] = Set(

0 commit comments

Comments
 (0)