@@ -30,8 +30,8 @@ def replicate_single_table_with_initial_load(
30
30
Returns:
31
31
None
32
32
"""
33
- # create destination pipeline
34
- dest_pl = dlt .pipeline (
33
+ # create replication pipeline
34
+ repl_pl = dlt .pipeline (
35
35
pipeline_name = "pg_replication_pipeline" ,
36
36
destination = "duckdb" ,
37
37
dataset_name = "replicate_with_initial_load" ,
@@ -50,15 +50,15 @@ def replicate_single_table_with_initial_load(
50
50
)
51
51
52
52
# perform initial load to capture all records present in source table prior to replication initialization
53
- load_info = dest_pl .run (snapshot )
53
+ load_info = repl_pl .run (snapshot )
54
54
print (load_info )
55
- print (dest_pl .last_trace .last_normalize_info )
55
+ print (repl_pl .last_trace .last_normalize_info )
56
56
57
57
# assuming there were changes in the source table, propagate change to destination
58
58
changes = replication_resource (slot_name , pub_name )
59
- load_info = dest_pl .run (changes )
59
+ load_info = repl_pl .run (changes )
60
60
print (load_info )
61
- print (dest_pl .last_trace .last_normalize_info )
61
+ print (repl_pl .last_trace .last_normalize_info )
62
62
63
63
64
64
def replicate_single_table_demo () -> None :
@@ -69,9 +69,9 @@ def replicate_single_table_demo() -> None:
69
69
to show how replication works end-to-end. In production, you would have an existing
70
70
PostgreSQL database with real changes instead of simulating them.
71
71
"""
72
- # create source and destination pipelines
73
- src_pl = get_postgres_pipeline ()
74
- dest_pl = dlt .pipeline (
72
+ # create simulation and replication pipelines
73
+ sim_pl = get_postgres_pipeline ()
74
+ repl_pl = dlt .pipeline (
75
75
pipeline_name = "pg_replication_pipeline" ,
76
76
destination = "duckdb" ,
77
77
dataset_name = "replicate_single_table" ,
@@ -80,7 +80,7 @@ def replicate_single_table_demo() -> None:
80
80
81
81
# create table "my_source_table" in source to demonstrate replication
82
82
create_source_table (
83
- src_pl , "CREATE TABLE {table_name} (id integer PRIMARY KEY, val bool);"
83
+ sim_pl , "CREATE TABLE {table_name} (id integer PRIMARY KEY, val bool);"
84
84
)
85
85
86
86
# initialize replication for the source table—this creates a replication slot and publication
@@ -89,7 +89,7 @@ def replicate_single_table_demo() -> None:
89
89
init_replication ( # requires the Postgres user to have the REPLICATION attribute assigned
90
90
slot_name = slot_name ,
91
91
pub_name = pub_name ,
92
- schema_name = src_pl .dataset_name ,
92
+ schema_name = sim_pl .dataset_name ,
93
93
table_names = "my_source_table" ,
94
94
reset = True ,
95
95
)
@@ -99,20 +99,20 @@ def replicate_single_table_demo() -> None:
99
99
100
100
# insert two records in source table and propagate changes to destination
101
101
change_source_table (
102
- src_pl , "INSERT INTO {table_name} VALUES (1, true), (2, false);"
102
+ sim_pl , "INSERT INTO {table_name} VALUES (1, true), (2, false);"
103
103
)
104
- dest_pl .run (changes )
105
- show_destination_table (dest_pl )
104
+ repl_pl .run (changes )
105
+ show_destination_table (repl_pl )
106
106
107
107
# update record in source table and propagate change to destination
108
- change_source_table (src_pl , "UPDATE {table_name} SET val = true WHERE id = 2;" )
109
- dest_pl .run (changes )
110
- show_destination_table (dest_pl )
108
+ change_source_table (sim_pl , "UPDATE {table_name} SET val = true WHERE id = 2;" )
109
+ repl_pl .run (changes )
110
+ show_destination_table (repl_pl )
111
111
112
112
# delete record from source table and propagate change to destination
113
- change_source_table (src_pl , "DELETE FROM {table_name} WHERE id = 2;" )
114
- dest_pl .run (changes )
115
- show_destination_table (dest_pl )
113
+ change_source_table (sim_pl , "DELETE FROM {table_name} WHERE id = 2;" )
114
+ repl_pl .run (changes )
115
+ show_destination_table (repl_pl )
116
116
117
117
118
118
def replicate_with_initial_load_demo () -> None :
@@ -122,9 +122,9 @@ def replicate_with_initial_load_demo() -> None:
122
122
This demo creates a source table with existing data, then simulates additional changes to show how
123
123
initial load captures pre-existing records and replication handles subsequent changes.
124
124
"""
125
- # create source and destination pipelines
126
- src_pl = get_postgres_pipeline ()
127
- dest_pl = dlt .pipeline (
125
+ # create simulation and replication pipelines
126
+ sim_pl = get_postgres_pipeline ()
127
+ repl_pl = dlt .pipeline (
128
128
pipeline_name = "pg_replication_pipeline" ,
129
129
destination = "duckdb" ,
130
130
dataset_name = "replicate_with_initial_load" ,
@@ -133,12 +133,12 @@ def replicate_with_initial_load_demo() -> None:
133
133
134
134
# create table "my_source_table" in source to demonstrate replication
135
135
create_source_table (
136
- src_pl , "CREATE TABLE {table_name} (id integer PRIMARY KEY, val bool);"
136
+ sim_pl , "CREATE TABLE {table_name} (id integer PRIMARY KEY, val bool);"
137
137
)
138
138
139
139
# insert records before initializing replication
140
140
change_source_table (
141
- src_pl , "INSERT INTO {table_name} VALUES (1, true), (2, false);"
141
+ sim_pl , "INSERT INTO {table_name} VALUES (1, true), (2, false);"
142
142
)
143
143
144
144
# initialize replication for the source table
@@ -147,21 +147,21 @@ def replicate_with_initial_load_demo() -> None:
147
147
snapshot = init_replication ( # requires the Postgres user to have the REPLICATION attribute assigned
148
148
slot_name = slot_name ,
149
149
pub_name = pub_name ,
150
- schema_name = src_pl .dataset_name ,
150
+ schema_name = sim_pl .dataset_name ,
151
151
table_names = "my_source_table" ,
152
152
persist_snapshots = True , # persist snapshot table(s) and let function return resource(s) for initial load
153
153
reset = True ,
154
154
)
155
155
156
156
# perform initial load to capture all records present in source table prior to replication initialization
157
- dest_pl .run (snapshot )
158
- show_destination_table (dest_pl )
157
+ repl_pl .run (snapshot )
158
+ show_destination_table (repl_pl )
159
159
160
160
# insert record in source table and propagate change to destination
161
- change_source_table (src_pl , "INSERT INTO {table_name} VALUES (3, true);" )
161
+ change_source_table (sim_pl , "INSERT INTO {table_name} VALUES (3, true);" )
162
162
changes = replication_resource (slot_name , pub_name )
163
- dest_pl .run (changes )
164
- show_destination_table (dest_pl )
163
+ repl_pl .run (changes )
164
+ show_destination_table (repl_pl )
165
165
166
166
167
167
def replicate_entire_schema_demo () -> None :
@@ -174,9 +174,9 @@ def replicate_entire_schema_demo() -> None:
174
174
Schema replication requires PostgreSQL server version 15 or higher. An exception
175
175
is raised if that's not the case.
176
176
"""
177
- # create source and destination pipelines
178
- src_pl = get_postgres_pipeline ()
179
- dest_pl = dlt .pipeline (
177
+ # create simulation and replication pipelines
178
+ sim_pl = get_postgres_pipeline ()
179
+ repl_pl = dlt .pipeline (
180
180
pipeline_name = "pg_replication_pipeline" ,
181
181
destination = "duckdb" ,
182
182
dataset_name = "replicate_entire_schema" ,
@@ -185,12 +185,12 @@ def replicate_entire_schema_demo() -> None:
185
185
186
186
# create two source tables to demonstrate schema replication
187
187
create_source_table (
188
- src_pl ,
188
+ sim_pl ,
189
189
"CREATE TABLE {table_name} (id integer PRIMARY KEY, val bool);" ,
190
190
"tbl_x" ,
191
191
)
192
192
create_source_table (
193
- src_pl ,
193
+ sim_pl ,
194
194
"CREATE TABLE {table_name} (id integer PRIMARY KEY, val varchar);" ,
195
195
"tbl_y" ,
196
196
)
@@ -201,7 +201,7 @@ def replicate_entire_schema_demo() -> None:
201
201
init_replication ( # initializing schema replication requires the Postgres user to be a superuser
202
202
slot_name = slot_name ,
203
203
pub_name = pub_name ,
204
- schema_name = src_pl .dataset_name ,
204
+ schema_name = sim_pl .dataset_name ,
205
205
reset = True ,
206
206
)
207
207
@@ -210,22 +210,22 @@ def replicate_entire_schema_demo() -> None:
210
210
211
211
# insert records in source tables and propagate changes to destination
212
212
change_source_table (
213
- src_pl , "INSERT INTO {table_name} VALUES (1, true), (2, false);" , "tbl_x"
213
+ sim_pl , "INSERT INTO {table_name} VALUES (1, true), (2, false);" , "tbl_x"
214
214
)
215
- change_source_table (src_pl , "INSERT INTO {table_name} VALUES (1, 'foo');" , "tbl_y" )
216
- dest_pl .run (changes )
217
- show_destination_table (dest_pl , "tbl_x" )
218
- show_destination_table (dest_pl , "tbl_y" )
215
+ change_source_table (sim_pl , "INSERT INTO {table_name} VALUES (1, 'foo');" , "tbl_y" )
216
+ repl_pl .run (changes )
217
+ show_destination_table (repl_pl , "tbl_x" )
218
+ show_destination_table (repl_pl , "tbl_y" )
219
219
220
220
# tables added to the schema later are also included in the replication
221
221
create_source_table (
222
- src_pl , "CREATE TABLE {table_name} (id integer PRIMARY KEY, val date);" , "tbl_z"
222
+ sim_pl , "CREATE TABLE {table_name} (id integer PRIMARY KEY, val date);" , "tbl_z"
223
223
)
224
224
change_source_table (
225
- src_pl , "INSERT INTO {table_name} VALUES (1, '2023-03-18');" , "tbl_z"
225
+ sim_pl , "INSERT INTO {table_name} VALUES (1, '2023-03-18');" , "tbl_z"
226
226
)
227
- dest_pl .run (changes )
228
- show_destination_table (dest_pl , "tbl_z" )
227
+ repl_pl .run (changes )
228
+ show_destination_table (repl_pl , "tbl_z" )
229
229
230
230
231
231
def replicate_with_column_selection_demo () -> None :
@@ -235,9 +235,9 @@ def replicate_with_column_selection_demo() -> None:
235
235
This demo creates source tables and simulates changes to show how column selection works,
236
236
where some tables have filtered columns while others include all columns by default.
237
237
"""
238
- # create source and destination pipelines
239
- src_pl = get_postgres_pipeline ()
240
- dest_pl = dlt .pipeline (
238
+ # create simulation and replication pipelines
239
+ sim_pl = get_postgres_pipeline ()
240
+ repl_pl = dlt .pipeline (
241
241
pipeline_name = "pg_replication_pipeline" ,
242
242
destination = "duckdb" ,
243
243
dataset_name = "replicate_with_column_selection" ,
@@ -246,12 +246,12 @@ def replicate_with_column_selection_demo() -> None:
246
246
247
247
# create two source tables to demonstrate schema replication
248
248
create_source_table (
249
- src_pl ,
249
+ sim_pl ,
250
250
"CREATE TABLE {table_name} (c1 integer PRIMARY KEY, c2 bool, c3 varchar);" ,
251
251
"tbl_x" ,
252
252
)
253
253
create_source_table (
254
- src_pl ,
254
+ sim_pl ,
255
255
"CREATE TABLE {table_name} (c1 integer PRIMARY KEY, c2 bool, c3 varchar);" ,
256
256
"tbl_y" ,
257
257
)
@@ -262,7 +262,7 @@ def replicate_with_column_selection_demo() -> None:
262
262
init_replication ( # requires the Postgres user to have the REPLICATION attribute assigned
263
263
slot_name = slot_name ,
264
264
pub_name = pub_name ,
265
- schema_name = src_pl .dataset_name ,
265
+ schema_name = sim_pl .dataset_name ,
266
266
table_names = ("tbl_x" , "tbl_y" ),
267
267
reset = True ,
268
268
)
@@ -278,18 +278,18 @@ def replicate_with_column_selection_demo() -> None:
278
278
279
279
# insert records in source tables and propagate changes to destination
280
280
change_source_table (
281
- src_pl , "INSERT INTO {table_name} VALUES (1, true, 'foo');" , "tbl_x"
281
+ sim_pl , "INSERT INTO {table_name} VALUES (1, true, 'foo');" , "tbl_x"
282
282
)
283
283
change_source_table (
284
- src_pl , "INSERT INTO {table_name} VALUES (1, false, 'bar');" , "tbl_y"
284
+ sim_pl , "INSERT INTO {table_name} VALUES (1, false, 'bar');" , "tbl_y"
285
285
)
286
- dest_pl .run (changes )
286
+ repl_pl .run (changes )
287
287
288
288
# show columns in schema for both tables
289
289
# column c3 is not in the schema for tbl_x because we did not include it
290
290
# tbl_y does have column c3 because we didn't specify include columns for this table and by default all columns are included
291
- print ("tbl_x" , ":" , list (dest_pl .default_schema .get_table_columns ("tbl_x" ).keys ()))
292
- print ("tbl_y" , ":" , list (dest_pl .default_schema .get_table_columns ("tbl_y" ).keys ()))
291
+ print ("tbl_x" , ":" , list (repl_pl .default_schema .get_table_columns ("tbl_x" ).keys ()))
292
+ print ("tbl_y" , ":" , list (repl_pl .default_schema .get_table_columns ("tbl_y" ).keys ()))
293
293
294
294
295
295
# define some helper methods to make examples more readable
0 commit comments