Skip to content

Commit dff5177

Browse files
Merge pull request #70 from AbsaOSS/feature/69-allow-infinity-values-with-different-pattern-than-input-values
InfinitySupport changed from mix-in train to class
2 parents a13a54d + b2099e5 commit dff5177

22 files changed

+606
-115
lines changed

.github/workflows/build.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ jobs:
3232
name: Scala ${{matrix.scala}}
3333
steps:
3434
- name: Checkout code
35-
uses: actions/checkout@v2
36-
- uses: coursier/cache-action@v5
35+
uses: actions/checkout@v4
36+
- uses: coursier/cache-action@v6
3737
- name: Setup Scala
38-
uses: olafurpg/setup-scala@v10
38+
uses: olafurpg/setup-scala@v14
3939
with:
4040
java-version: "adopt@1.8"
4141
- name: Build and run tests

.github/workflows/format_check.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ jobs:
2626
name: Scalafmt Check
2727
steps:
2828
- name: Checkout code
29-
uses: actions/checkout@v2
29+
uses: actions/checkout@v4
3030
with:
3131
fetch-depth: 0
3232
ref: ${{ github.event.pull_request.head.ref }}
33-
33+
- uses: coursier/cache-action@v6
3434
- name: Setup Scala
35-
uses: olafurpg/setup-scala@v10
35+
uses: olafurpg/setup-scala@v14
3636
with:
3737
java-version: "adopt@1.8"
3838

.github/workflows/jacoco_check.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,10 @@ jobs:
3737
changed: 80.0
3838
steps:
3939
- name: Checkout code
40-
uses: actions/checkout@v2
40+
uses: actions/checkout@v4
41+
- uses: coursier/cache-action@v6
4142
- name: Setup Scala
42-
uses: olafurpg/setup-scala@v10
43+
uses: olafurpg/setup-scala@v14
4344
with:
4445
java-version: "adopt@1.8"
4546
- name: Build and run tests

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
[![License](http://img.shields.io/:license-apache-blue.svg)](http://www.apache.org/licenses/LICENSE-2.0.html)
44
[![Release](https://github.com/AbsaOSS/spark-data-standardization/actions/workflows/release.yml/badge.svg)](https://github.com/AbsaOSS/spark-data-standardization/actions/workflows/release.yml)
5+
![Java 8](https://img.shields.io/badge/Java_1.8-ED8B00?style=flat&logo=openjdk&logoColor=black)
56

67
- Dataframe in
78
- Standardized Dataframe out

build.sbt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ ThisBuild / name := "spark-data-standardization"
2121
ThisBuild / organization := "za.co.absa"
2222

2323
lazy val scala211 = "2.11.12"
24-
lazy val scala212 = "2.12.18"
25-
lazy val scala213 = "2.13.11"
24+
lazy val scala212 = "2.12.20"
25+
lazy val scala213 = "2.13.16"
2626

2727
ThisBuild / crossScalaVersions := Seq(scala211, scala212, scala213)
2828
ThisBuild / scalaVersion := scala211
@@ -39,6 +39,8 @@ ThisBuild / printSparkScalaVersion := {
3939
}
4040

4141
Test / parallelExecution := false
42+
Test / logBuffered := false
43+
Test / fork := true
4244

4345
// Only apply scalafmt to files that differ from master (i.e. files changed in the feature branch or so; n/a on Windows)
4446
lazy val fmtFilterExpression: String = System.getProperty("os.name").toLowerCase match {

src/main/scala/za/co/absa/standardization/SchemaValidator.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package za.co.absa.standardization
1818

1919
import org.apache.spark.sql.SparkSession
2020
import org.apache.spark.sql.types._
21-
import za.co.absa.standardization.ErrorMessage
2221
import za.co.absa.standardization.types.{TypeDefaults, TypedStructField}
2322
import za.co.absa.standardization.validation.field.FieldValidationIssue
2423

@@ -116,7 +115,7 @@ object SchemaValidator {
116115
fields += prefixedField
117116
}
118117
}
119-
fields.toSeq
118+
fields.toSeq //has to be here for Scala 2.13 compatibility
120119
}
121120

122121
def flattenArray(field: StructField, arr: ArrayType, structPath: String): Seq[FlatField] = {
@@ -128,7 +127,7 @@ object SchemaValidator {
128127
val prefixedField = FlatField(structPath, field)
129128
arrayFields += prefixedField
130129
}
131-
arrayFields.toSeq
130+
arrayFields.toSeq //has to be here for Scala 2.13 compatibility
132131
}
133132

134133
flattenStruct(schema, "")
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Context
2+
3+
The current support for infinity values in the Standardization library, needs the infinity values to have the same pattern as the input values. For example, if the input values have pattern yyyy-MM-dd, then the infinity values need to be specified as e.g. 1000-01-01 or 9999-01-01.
4+
5+
This results in a problem when patterns like yyMMdd are used, due to truncation dates such as 2010/01/01 or 2000/01/01 because of the leading zeros they can't be interpreted leading to standardizaton error.
6+
7+
8+
# Solution design
9+
10+
As TypeParser (it's inner classes and methods), first replaces the infinity symbol with infinity value and only then converts to timestamp/date using the provided pattern. The solution will defer the replacement based on what pattern the infinity value follows.
11+
12+
TypeParser is to provide the InfinitySupport with the column and the converting function where InfinitySupport now decides if infinity value is encoded by provided pattern or ISO pattern for both positive and negative infinity , where in case no infinity defined there is no transformation else if input column === infinity symbol use the infinity_value converted using ISO pattern and return the converting function result with original column.
13+
14+
# Outcome
15+
16+
InfinitySupport replaced with class providing unified `replaceInfinitySymbols` method with a conversion function, which is extended by InfinitySupportIso which implements the date/time algorithm to parse infinity values matching the input pattern using `defaultInfinityValueInjection` else use `isoCast` for ISO-formatted infinity values using `isOfISODateFormat` and `isOfISOTimestampFormat` to detect SO formats
17+
18+
TypeParser continues orchestrating standardisation and delegating Parsers for source to target types.
19+
20+
21+
![Architecture diagram!](InfinitySupport-ISO-fallback.png)
22+

src/main/scala/za/co/absa/standardization/adr/001-infinity-support-iso-pattern-defaults/InfinitySupport-ISO-fallback.drawio

Lines changed: 93 additions & 0 deletions
Large diffs are not rendered by default.
570 KB
Loading

src/main/scala/za/co/absa/standardization/implicits/StringImplicits.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import java.security.InvalidParameterException
2121
import scala.annotation.tailrec
2222

2323
object StringImplicits {
24-
implicit class StringEnhancements(string: String) {
24+
implicit class StringEnhancements(val string: String) extends AnyVal {
2525

2626
/**
2727
* Replaces all occurrences of the provided characters with their mapped values

0 commit comments

Comments
 (0)