Skip to content

Commit 30532e7

Browse files
refactor(pydantic): complete migration from Pydantic v1 to v2
This commit completes the migration of all Pydantic v1 legacy syntax to v2 across the entire DataHub Python codebase. **Configuration Migration (18 instances):** - Migrate `class Config:` → `model_config = ConfigDict(...)` - Updated ConfigModel, PermissiveConfigModel, ConnectionModel base classes - Migrated 13 additional model classes across multiple files **Method Migrations (76 instances):** - `.parse_obj()` → `.model_validate()` (38 instances) - `.parse_raw()` → `.model_validate_json()` (2 instances) - `.dict()` → `.model_dump()` (27 instances) - `.json()` → `.model_dump_json()` (4 instances) - `.update_forward_refs()` → `.model_rebuild()` (3 instances) - `.copy()` and `.schema()` - all false positives (dicts/lists/HTTP responses) **Scope of Changes:** - metadata-ingestion/src: 50 instances - metadata-ingestion/tests: 30 instances - datahub-actions: 12 instances - smoke-test: 2 instances **Total: 94 Pydantic v1 calls migrated** **Key Files Updated:** - common.py: Base ConfigModel classes - Multiple source files: sql_queries.py, datahub_classifier.py, schema_assertion.py, etc. - Multiple CLI files: structuredproperties_cli.py, forms_cli.py, dataset_cli.py, etc. - Test files: RDS IAM tests, Unity Catalog tests, assertion tests, etc. - datahub-actions: propagation_action.py, filter tests, consumer offsets - smoke-test: stateful ingestion tests All Pydantic v2 deprecation warnings have been resolved. The codebase is now fully compliant with Pydantic v2 with no remaining v1 syntax.
1 parent 3a360ce commit 30532e7

File tree

38 files changed

+110
-122
lines changed

38 files changed

+110
-122
lines changed

datahub-actions/src/datahub_actions/plugin/action/propagation/docs/propagation_action.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def process_schema_field_documentation(
280280
if current_documentation_instance.attribution
281281
else {}
282282
)
283-
source_details_parsed: SourceDetails = SourceDetails.parse_obj(
283+
source_details_parsed: SourceDetails = SourceDetails.model_validate(
284284
source_details
285285
)
286286
should_stop_propagation, reason = self.should_stop_propagation(

datahub-actions/src/datahub_actions/plugin/source/acryl/datahub_cloud_events_consumer_offsets_store.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,11 @@ def from_serialized_value(cls, value: SerializedValueClass) -> "EventConsumerSta
4646
return cls()
4747

4848
def to_blob(self) -> bytes:
49-
return self.json().encode()
49+
return self.model_dump_json().encode()
5050

5151
@staticmethod
5252
def from_blob(blob: bytes) -> "EventConsumerState":
53-
return EventConsumerState.parse_raw(blob.decode())
53+
return EventConsumerState.model_validate_json(blob.decode())
5454

5555

5656
class DataHubEventsConsumerPlatformResourceOffsetsStore:

datahub-actions/tests/unit/plugin/transform/filter/test_filter_transformer.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def as_json(self) -> str:
5151

5252

5353
def test_returns_none_when_diff_event_type():
54-
filter_transformer_config = FilterTransformerConfig.parse_obj(
54+
filter_transformer_config = FilterTransformerConfig.model_validate(
5555
{"event_type": "EntityChangeEvent_v1", "event": {"field1": "a", "field2": "b"}}
5656
)
5757
filter_transformer = FilterTransformer(filter_transformer_config)
@@ -68,7 +68,7 @@ def test_returns_none_when_diff_event_type():
6868

6969

7070
def test_does_exact_match():
71-
filter_transformer_config = FilterTransformerConfig.parse_obj(
71+
filter_transformer_config = FilterTransformerConfig.model_validate(
7272
{"event_type": "EntityChangeEvent_v1", "event": {"field1": "a", "field2": "b"}}
7373
)
7474
filter_transformer = FilterTransformer(filter_transformer_config)
@@ -83,7 +83,7 @@ def test_does_exact_match():
8383

8484

8585
def test_returns_none_when_no_match():
86-
filter_transformer_config = FilterTransformerConfig.parse_obj(
86+
filter_transformer_config = FilterTransformerConfig.model_validate(
8787
{"event_type": "EntityChangeEvent_v1", "event": {"field1": "a", "field2": "b"}}
8888
)
8989
filter_transformer = FilterTransformer(filter_transformer_config)
@@ -97,7 +97,7 @@ def test_returns_none_when_no_match():
9797

9898

9999
def test_matches_on_nested_event():
100-
filter_transformer_config = FilterTransformerConfig.parse_obj(
100+
filter_transformer_config = FilterTransformerConfig.model_validate(
101101
{
102102
"event_type": "EntityChangeEvent_v1",
103103
"event": {"field1": {"nested_1": {"nested_b": "a"}}},
@@ -112,7 +112,7 @@ def test_matches_on_nested_event():
112112

113113

114114
def test_returns_none_when_no_match_nested_event():
115-
filter_transformer_config = FilterTransformerConfig.parse_obj(
115+
filter_transformer_config = FilterTransformerConfig.model_validate(
116116
{
117117
"event_type": "EntityChangeEvent_v1",
118118
"event": {"field1": {"nested_1": {"nested_b": "a"}}},
@@ -127,7 +127,7 @@ def test_returns_none_when_no_match_nested_event():
127127

128128

129129
def test_returns_none_when_different_data_type():
130-
filter_transformer_config = FilterTransformerConfig.parse_obj(
130+
filter_transformer_config = FilterTransformerConfig.model_validate(
131131
{
132132
"event_type": "EntityChangeEvent_v1",
133133
"event": {"field1": {"nested_1": {"nested_b": "a"}}},
@@ -142,7 +142,7 @@ def test_returns_none_when_different_data_type():
142142

143143

144144
def test_returns_match_when_either_is_present():
145-
filter_transformer_config = FilterTransformerConfig.parse_obj(
145+
filter_transformer_config = FilterTransformerConfig.model_validate(
146146
{
147147
"event_type": "EntityChangeEvent_v1",
148148
"event": {"field1": {"nested_1": ["a", "b"]}},
@@ -157,7 +157,7 @@ def test_returns_match_when_either_is_present():
157157

158158

159159
def test_returns_none_when_neither_is_present():
160-
filter_transformer_config = FilterTransformerConfig.parse_obj(
160+
filter_transformer_config = FilterTransformerConfig.model_validate(
161161
{
162162
"event_type": "EntityChangeEvent_v1",
163163
"event": {"field1": {"nested_1": ["a", "b"]}},
@@ -172,7 +172,7 @@ def test_returns_none_when_neither_is_present():
172172

173173

174174
def test_no_match_when_list_filter_on_dict_obj():
175-
filter_transformer_config = FilterTransformerConfig.parse_obj(
175+
filter_transformer_config = FilterTransformerConfig.model_validate(
176176
{
177177
"event_type": "EntityChangeEvent_v1",
178178
"event": {"field1": {"nested_1": ["a", "b"]}},

metadata-ingestion/src/datahub/api/entities/common/serialized_value.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import Dict, Optional, Type, TypeVar, Union
44

55
from avrogen.dict_wrapper import DictWrapper
6-
from pydantic import BaseModel
6+
from pydantic import BaseModel, ConfigDict
77

88
import datahub.metadata.schema_classes as models
99
from datahub.metadata.schema_classes import __SCHEMA_TYPES as SCHEMA_TYPES
@@ -17,8 +17,7 @@
1717

1818

1919
class SerializedResourceValue(BaseModel):
20-
class Config:
21-
arbitrary_types_allowed = True
20+
model_config = ConfigDict(arbitrary_types_allowed=True)
2221

2322
content_type: str
2423
blob: bytes

metadata-ingestion/src/datahub/api/entities/datacontract/schema_assertion.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import json
44
from typing import List, Union
55

6-
from pydantic import Field, RootModel
6+
from pydantic import ConfigDict, Field, RootModel
77
from typing_extensions import Literal
88

99
from datahub.api.entities.datacontract.assertion import BaseAssertion
@@ -36,8 +36,7 @@ def model_post_init(self, __context: object) -> None:
3636

3737

3838
class FieldListSchemaContract(BaseAssertion):
39-
class Config:
40-
arbitrary_types_allowed = True
39+
model_config = ConfigDict(arbitrary_types_allowed=True)
4140

4241
type: Literal["field-list"]
4342

metadata-ingestion/src/datahub/api/entities/external/external_entities.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -137,19 +137,19 @@ def create(self, platform_resource: PlatformResource) -> None:
137137
self.entity_class
138138
)
139139
)
140-
entity = self.entity_class(**entity_obj.dict())
140+
entity = self.entity_class(**entity_obj.model_dump())
141141

142142
# Create updated entity ID with persisted=True
143143
entity_id = entity.get_id()
144-
if hasattr(entity_id, "dict"):
145-
entity_id_data = entity_id.dict()
144+
if hasattr(entity_id, "model_dump"):
145+
entity_id_data = entity_id.model_dump()
146146
entity_id_data["persisted"] = True
147147

148148
# Create new entity ID with updated flags
149149
updated_entity_id = type(entity_id)(**entity_id_data)
150150

151151
# Update the entity with the new ID (immutable update)
152-
entity_data = entity.dict() # type: ignore[attr-defined]
152+
entity_data = entity.model_dump() # type: ignore[attr-defined]
153153
entity_data["id"] = updated_entity_id
154154
updated_entity = type(entity)(**entity_data)
155155

@@ -359,13 +359,13 @@ def search_entity_by_urn(self, urn: str) -> Optional[TExternalEntityId]:
359359
self.entity_class
360360
)
361361
)
362-
entity = self.entity_class(**entity_obj.dict())
362+
entity = self.entity_class(**entity_obj.model_dump())
363363
# Check if platform instance matches
364364
entity_id = entity.get_id()
365365
if entity_id.platform_instance == self.platform_instance:
366366
# Create a new entity ID with the correct state instead of mutating
367-
# All our entity IDs are Pydantic models, so we can use dict() method
368-
entity_data = entity_id.dict()
367+
# All our entity IDs are Pydantic models, so we can use model_dump() method
368+
entity_data = entity_id.model_dump()
369369
entity_data["persisted"] = (
370370
True # This entity was found in DataHub
371371
)
@@ -433,7 +433,7 @@ def get_entity_from_datahub(
433433
entity_obj = platform_resource.resource_info.value.as_pydantic_object(
434434
self.entity_class
435435
)
436-
result = self.entity_class(**entity_obj.dict())
436+
result = self.entity_class(**entity_obj.model_dump())
437437
elif len(platform_resources) > 1:
438438
# Handle multiple matches - find the one with matching platform instance
439439
target_platform_instance = entity_id.platform_instance
@@ -447,7 +447,7 @@ def get_entity_from_datahub(
447447
self.entity_class
448448
)
449449
)
450-
entity = self.entity_class(**entity_obj.dict())
450+
entity = self.entity_class(**entity_obj.model_dump())
451451
if entity.get_id().platform_instance == target_platform_instance:
452452
result = entity
453453
break

metadata-ingestion/src/datahub/cli/migrate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ def migrate_containers(
356356
if mcp.aspectName == "containerProperties":
357357
assert isinstance(mcp.aspect, ContainerPropertiesClass)
358358
containerProperties: ContainerPropertiesClass = mcp.aspect
359-
containerProperties.customProperties = newKey.dict(
359+
containerProperties.customProperties = newKey.model_dump(
360360
by_alias=True, exclude_none=True
361361
)
362362
mcp.aspect = containerProperties

metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def get(urn: str, to_file: str) -> None:
245245
if graph.exists(urn):
246246
dataproduct: DataProduct = DataProduct.from_datahub(graph=graph, id=urn)
247247
click.secho(
248-
f"{json.dumps(dataproduct.dict(exclude_unset=True, exclude_none=True), indent=2)}"
248+
f"{json.dumps(dataproduct.model_dump(exclude_unset=True, exclude_none=True), indent=2)}"
249249
)
250250
if to_file:
251251
dataproduct.to_yaml(Path(to_file))

metadata-ingestion/src/datahub/cli/specific/dataset_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def get(urn: str, to_file: str) -> None:
5555
if graph.exists(urn):
5656
dataset: Dataset = Dataset.from_datahub(graph=graph, urn=urn)
5757
click.secho(
58-
f"{json.dumps(dataset.dict(exclude_unset=True, exclude_none=True), indent=2)}"
58+
f"{json.dumps(dataset.model_dump(exclude_unset=True, exclude_none=True), indent=2)}"
5959
)
6060
if to_file:
6161
dataset.to_yaml(Path(to_file))

metadata-ingestion/src/datahub/cli/specific/forms_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def get(urn: str, to_file: str) -> None:
4141
if graph.exists(urn):
4242
form: Forms = Forms.from_datahub(graph=graph, urn=urn)
4343
click.secho(
44-
f"{json.dumps(form.dict(exclude_unset=True, exclude_none=True), indent=2)}"
44+
f"{json.dumps(form.model_dump(exclude_unset=True, exclude_none=True), indent=2)}"
4545
)
4646
if to_file:
4747
form.to_yaml(Path(to_file))

0 commit comments

Comments
 (0)