Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
### Fixes

- Use backtick quoting for everything to avoid errors with special characters ([1186](https://github.com/databricks/dbt-databricks/pull/1186))
- Ensure column compare always uses lower case names (since Databricks stores internally as lower case) ([1190](https://github.com/databricks/dbt-databricks/pull/1190))

### Under the Hood

Expand Down
29 changes: 22 additions & 7 deletions dbt/adapters/databricks/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,12 +772,19 @@ def get_persist_doc_columns(
# Since existing_columns are gathered after writing the table, we don't need to include any
# columns from the model that are not in the existing_columns. If we did, it would lead to
# an error when we tried to alter the table.

# Create a case-insensitive lookup for column names
columns_lower = {k.lower(): k for k in columns.keys()}

for column in existing_columns:
name = column.column
if name in columns:
config_column = columns[name]
# Use case-insensitive comparison for column names
name_lower = name.lower()
if name_lower in columns_lower:
original_column_name = columns_lower[name_lower]
config_column = columns[original_column_name]
if isinstance(config_column, dict):
comment = columns[name].get("description")
comment = columns[original_column_name].get("description")
elif hasattr(config_column, "description"):
comment = config_column.description
else:
Expand Down Expand Up @@ -807,13 +814,21 @@ def parse_columns_and_constraints(
list(model_columns.values()), model_constraints
)

# Create a case-insensitive lookup for model column names
model_columns_lower = {k.lower(): k for k in model_columns.keys()}
# Create a case-insensitive lookup for not_null columns
not_null_set_lower = {name.lower() for name in not_null_set}

for column in existing_columns:
if column.name in model_columns:
column_info = model_columns[column.name]
enriched_column = column.enrich(column_info, column.name in not_null_set)
column_name_lower = column.name.lower()
if column_name_lower in model_columns_lower:
original_model_column_name = model_columns_lower[column_name_lower]
column_info = model_columns[original_model_column_name]
is_not_null = column_name_lower in not_null_set_lower
enriched_column = column.enrich(column_info, is_not_null)
enriched_columns.append(enriched_column)
else:
if column.name in not_null_set:
if column_name_lower in not_null_set_lower:
column.not_null = True
enriched_columns.append(column)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def get_diff(self, other: "ColumnCommentsConfig") -> Optional["ColumnCommentsCon
comments = {}
if self.persist:
for column_name, comment in self.comments.items():
if comment != other.comments.get(column_name.lower()):
if comment != other.comments.get(column_name):
column_name = f"`{column_name}`"
comments[column_name] = comment
logger.debug(f"Comments: {comments}")
Expand Down
20 changes: 14 additions & 6 deletions dbt/adapters/databricks/relation_configs/column_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,24 @@ class ColumnMaskConfig(DatabricksComponentConfig):

def get_diff(self, other: "ColumnMaskConfig") -> Optional["ColumnMaskConfig"]:
# Find column masks that need to be unset
# Use case-insensitive comparison for column names (Databricks queries are case-insensitive)
self_column_masks_lower = {k.lower() for k in self.set_column_masks.keys()}
unset_column_mask = [
col for col in other.set_column_masks if col not in self.set_column_masks
col for col in other.set_column_masks if col.lower() not in self_column_masks_lower
]

# Find column masks that need to be set or updated
set_column_mask = {
col: mask
for col, mask in self.set_column_masks.items()
if col not in other.set_column_masks or other.set_column_masks[col] != mask
}
# Use case-insensitive comparison for column names, but preserve exact mask values
other_column_masks_lower = {}
for k, v in other.set_column_masks.items():
other_column_masks_lower[k.lower()] = v

set_column_mask = {}
for col, mask in self.set_column_masks.items():
# Case-insensitive column name lookup, but exact mask value comparison
other_mask = other_column_masks_lower.get(col.lower())
if other_mask is None or other_mask != mask:
set_column_mask[col] = mask

if set_column_mask or unset_column_mask:
return ColumnMaskConfig(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
{%- set source_columns = adapter.get_columns_in_relation(source_relation) | map(attribute='name') | list -%}
{%- set common_columns = [] -%}
{%- for dest_col in dest_columns -%}
{%- if dest_col in source_columns -%}
{%- if dest_col | lower in source_columns | map('lower') | list -%}
{%- do common_columns.append(dest_col) -%}
{%- else -%}
{%- do common_columns.append('DEFAULT') -%}
Expand Down Expand Up @@ -67,7 +67,7 @@ TABLE {{ temp_relation.render() }}
{% macro insert_into_sql_impl(target_relation, dest_columns, source_relation, source_columns) %}
{%- set common_columns = [] -%}
{%- for dest_col in dest_columns -%}
{%- if dest_col in source_columns -%}
{%- if dest_col | lower in source_columns | map('lower') | list -%}
{%- do common_columns.append(dest_col) -%}
{%- else -%}
{%- do common_columns.append('DEFAULT') -%}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import pytest

from dbt.tests import util


class TestMergeCaseSensitiveColumns:
"""Test case for column name case sensitivity bug in merge operations."""

@pytest.fixture(scope="class")
def models(self):
return {
"case_sensitive_merge.sql": """
{{ config(
materialized='incremental',
unique_key='id',
incremental_strategy='merge'
) }}

{% if not is_incremental() %}
select 1 as id, 'Alice' as Name, 25 as AGE
union all
select 2 as id, 'Bob' as Name, 30 as AGE
{% else %}
-- This should update Bob's age and add Charlie
-- But if column comparison is case sensitive, it might fail to match columns
select 2 as id, 'Bob' as Name, 30 as AGE -- existing record
union all
select 3 as id, 'Charlie' as Name, 35 as AGE -- new record
{% endif %}
""",
}

def test_merge_with_capitalized_columns(self, project):
"""Test that merge works when select statement uses capitalized column names."""
# First run - create initial table
util.run_dbt(["run"])

# Check that the initial table was created with mixed case columns
results = project.run_sql("select count(*) from case_sensitive_merge", fetch="all")
assert results[0][0] == 2 # Should have 2 rows initially

# Second run - should perform merge and add new record
util.run_dbt(["run"])

# Check that merge worked correctly
results = project.run_sql("select count(*) from case_sensitive_merge", fetch="all")
assert results[0][0] == 3 # Should have 3 rows after merge

# Verify the actual data content - check that we have Alice, Bob, and Charlie
results = project.run_sql(
"select id, Name, AGE from case_sensitive_merge order by id", fetch="all"
)
expected_data = [(1, "Alice", 25), (2, "Bob", 30), (3, "Charlie", 35)]
assert results == expected_data, f"Expected {expected_data}, got {results}"


class TestInsertIntoCaseSensitiveColumns:
"""Test case for column name case sensitivity bug in insert_into operations."""

@pytest.fixture(scope="class")
def models(self):
return {
"insert_case_sensitive.sql": """
{{ config(
materialized='incremental',
unique_key='id',
incremental_strategy='append'
) }}

select 1 as id, 'Alice' as Name, 25 as AGE
union all
select 2 as id, 'Bob' as Name, 30 as AGE
""",
}

def test_insert_with_capitalized_columns(self, project):
"""Test that insert operations work when select statement uses capitalized column names."""
# First run - create initial table
util.run_dbt(["run"])

# Check that the initial table was created
results = project.run_sql("select count(*) from insert_case_sensitive", fetch="all")
assert results[0][0] == 2 # Should have 2 rows initially

# Second run - should append the same data again
util.run_dbt(["run"])

# Check that append worked correctly
results = project.run_sql("select count(*) from insert_case_sensitive", fetch="all")
assert results[0][0] == 4 # Should have 4 rows after append

# Verify the actual data content - should have Alice and Bob twice each
results = project.run_sql(
"select id, Name, AGE from insert_case_sensitive order by id, Name", fetch="all"
)
expected_data = [(1, "Alice", 25), (1, "Alice", 25), (2, "Bob", 30), (2, "Bob", 30)]
assert results == expected_data, f"Expected {expected_data}, got {results}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import pytest

from tests.unit.macros.base import MacroTestBase


class TestColumnCaseSensitivity(MacroTestBase):
"""Test case sensitivity fixes for column name comparisons."""

@pytest.fixture(scope="class")
def template_name(self) -> str:
return "strategies.sql"

@pytest.fixture(scope="class")
def macro_folders_to_load(self) -> list:
return ["macros/materializations/incremental"]

def test_get_insert_overwrite_sql_case_insensitive_logic(self, template):
"""Test that the column comparison logic is case insensitive."""
# This is a simple integration test to verify the macro compiles
# The real test is in the functional tests where actual database interactions happen
# We just want to ensure our filter changes don't break the macro compilation
try:
# Try to render the macro - if our jinja filter changes broke something, it will fail
sql = self.run_macro_raw(
template,
"insert_into_sql_impl",
"target_relation",
["ID", "Name"], # dest columns with mixed case
"source_relation",
["id", "name"], # source columns with different case
)
# If we get here, the macro compiled successfully with mixed case columns
assert "insert into table" in sql
assert "ID, Name" in sql
except Exception as e:
pytest.fail(f"Macro compilation failed with case-sensitive column names: {e}")

def test_insert_into_sql_impl_case_insensitive(self, template):
"""Test that column comparisons in insert into are case insensitive."""
# Test with mixed case destination and source columns
dest_columns = ["ID", "Name", "AGE"]
source_columns = ["id", "name", "age"]

sql = self.run_macro_raw(
template,
"insert_into_sql_impl",
"target_relation",
dest_columns,
"source_relation",
source_columns,
)

expected = (
"insert into table target_relation (ID, Name, AGE)\n"
"select ID, Name, AGE from source_relation"
)
self.assert_sql_equal(sql, expected)
Loading