@@ -1692,15 +1692,26 @@ def extract_lineages(self) -> List[KafkaConnectLineage]:
16921692 source_platform = parser .source_platform
16931693 server_name = parser .server_name
16941694 database_name = parser .database_name
1695+
1696+ if not self .connector_manifest .topic_names :
1697+ return lineages
1698+
1699+ # Check for EventRouter transform - requires special handling
1700+ if self ._has_event_router_transform ():
1701+ logger .debug (
1702+ f"Connector { self .connector_manifest .name } uses EventRouter transform - using table-based lineage extraction"
1703+ )
1704+ return self ._extract_lineages_for_event_router (
1705+ source_platform , database_name
1706+ )
1707+
1708+ # Standard Debezium topic processing
16951709 # Escape server_name to handle cases where topic.prefix contains dots
16961710 # Some users configure topic.prefix like "my.server" which breaks the regex
16971711 server_name = server_name or ""
16981712 # Regex pattern (\w+\.\w+(?:\.\w+)?) supports BOTH 2-part and 3-part table names
16991713 topic_naming_pattern = rf"({ re .escape (server_name )} )\.(\w+\.\w+(?:\.\w+)?)"
17001714
1701- if not self .connector_manifest .topic_names :
1702- return lineages
1703-
17041715 # Handle connectors with 2-level container (database + schema) in topic pattern
17051716 connector_class = self .connector_manifest .config .get (CONNECTOR_CLASS , "" )
17061717 maybe_duplicated_database_name = (
@@ -1749,6 +1760,137 @@ def extract_lineages(self) -> List[KafkaConnectLineage]:
17491760
17501761 return []
17511762
1763+ def _has_event_router_transform (self ) -> bool :
1764+ """Check if connector uses Debezium EventRouter transform."""
1765+ transforms_config = self .connector_manifest .config .get ("transforms" , "" )
1766+ if not transforms_config :
1767+ return False
1768+
1769+ transform_names = parse_comma_separated_list (transforms_config )
1770+ for name in transform_names :
1771+ transform_type = self .connector_manifest .config .get (
1772+ f"transforms.{ name } .type" , ""
1773+ )
1774+ if transform_type == "io.debezium.transforms.outbox.EventRouter" :
1775+ return True
1776+
1777+ return False
1778+
1779+ def _extract_lineages_for_event_router (
1780+ self , source_platform : str , database_name : Optional [str ]
1781+ ) -> List [KafkaConnectLineage ]:
1782+ """
1783+ Extract lineages for connectors using EventRouter transform.
1784+
1785+ EventRouter is a data-dependent transform that reads fields from row data
1786+ to determine output topics. We cannot predict output topics from configuration alone,
1787+ so we extract source tables from table.include.list and try to match them to
1788+ actual topics using RegexRouter patterns.
1789+
1790+ Reference: https://debezium.io/documentation/reference/transformations/outbox-event-router.html
1791+ """
1792+ lineages : List [KafkaConnectLineage ] = []
1793+
1794+ # Extract source tables from configuration
1795+ table_config = self .connector_manifest .config .get (
1796+ "table.include.list"
1797+ ) or self .connector_manifest .config .get ("table.whitelist" )
1798+
1799+ if not table_config :
1800+ logger .warning (
1801+ f"EventRouter connector { self .connector_manifest .name } has no table.include.list config"
1802+ )
1803+ return lineages
1804+
1805+ table_names = parse_comma_separated_list (table_config )
1806+
1807+ # Try to filter topics using RegexRouter replacement pattern (if available)
1808+ filtered_topics = self ._filter_topics_for_event_router ()
1809+
1810+ # For each source table, create lineages to filtered topics
1811+ for table_name in table_names :
1812+ # Clean quoted table names
1813+ clean_table = table_name .strip ('"' )
1814+
1815+ # Apply database name if present
1816+ if database_name :
1817+ source_dataset = get_dataset_name (database_name , clean_table )
1818+ else :
1819+ source_dataset = clean_table
1820+
1821+ # Create lineages from this source table to filtered topics
1822+ for topic in filtered_topics :
1823+ lineage = KafkaConnectLineage (
1824+ source_dataset = source_dataset ,
1825+ source_platform = source_platform ,
1826+ target_dataset = topic ,
1827+ target_platform = KAFKA ,
1828+ )
1829+ lineages .append (lineage )
1830+
1831+ logger .info (
1832+ f"Created { len (lineages )} EventRouter lineages from { len (table_names )} source tables "
1833+ f"to { len (filtered_topics )} topics for connector { self .connector_manifest .name } "
1834+ )
1835+
1836+ return lineages
1837+
1838+ def _filter_topics_for_event_router (self ) -> List [str ]:
1839+ """
1840+ Filter topics for EventRouter connectors using RegexRouter replacement pattern.
1841+
1842+ EventRouter often works with RegexRouter to rename output topics. We can use
1843+ the RegexRouter replacement pattern to identify which topics belong to this connector.
1844+ """
1845+ # Look for RegexRouter transform configuration
1846+ transforms_config = self .connector_manifest .config .get ("transforms" , "" )
1847+ if not transforms_config :
1848+ return list (self .connector_manifest .topic_names )
1849+
1850+ transform_names = parse_comma_separated_list (transforms_config )
1851+
1852+ # Find RegexRouter configuration
1853+ regex_replacement = None
1854+ for name in transform_names :
1855+ transform_type = self .connector_manifest .config .get (
1856+ f"transforms.{ name } .type" , ""
1857+ )
1858+ if transform_type in [
1859+ "org.apache.kafka.connect.transforms.RegexRouter" ,
1860+ "io.confluent.connect.cloud.transforms.TopicRegexRouter" ,
1861+ ]:
1862+ # Extract the replacement pattern
1863+ # Example: "dev.ern.cashout.$1" -> we want topics starting with "dev.ern.cashout."
1864+ replacement = self .connector_manifest .config .get (
1865+ f"transforms.{ name } .replacement" , ""
1866+ )
1867+ if replacement :
1868+ # Extract prefix from replacement pattern (before first $)
1869+ # "dev.ern.cashout.$1" -> "dev.ern.cashout."
1870+ if "$" in replacement :
1871+ regex_replacement = replacement .split ("$" )[0 ]
1872+ else :
1873+ regex_replacement = replacement
1874+ break
1875+
1876+ # Filter topics using the replacement prefix
1877+ if regex_replacement :
1878+ filtered_topics = [
1879+ topic
1880+ for topic in self .connector_manifest .topic_names
1881+ if topic .startswith (regex_replacement )
1882+ ]
1883+ logger .debug (
1884+ f"Filtered EventRouter topics to { len (filtered_topics )} topics matching prefix '{ regex_replacement } '"
1885+ )
1886+ return filtered_topics
1887+
1888+ # No RegexRouter found - use all topics (risky but best effort)
1889+ logger .warning (
1890+ f"EventRouter connector { self .connector_manifest .name } has no RegexRouter - cannot filter topics accurately"
1891+ )
1892+ return list (self .connector_manifest .topic_names )
1893+
17521894
17531895@dataclass
17541896class ConfigDrivenSourceConnector (BaseConnector ):
0 commit comments