From 5b440cd20c9c4e08ab7a8691304f1b51be9fa587 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Mon, 3 Nov 2025 14:21:06 -0700 Subject: [PATCH 01/23] copy nicos branch --- apps/framework-cli/src/cli/local_webserver.rs | 1 + .../framework-cli/src/cli/routines/migrate.rs | 8 +++++- .../framework/core/infra_reality_checker.rs | 1 + .../framework/core/infrastructure/table.rs | 6 +++++ .../src/framework/core/infrastructure_map.rs | 3 +++ .../core/partial_infrastructure_map.rs | 4 +++ apps/framework-cli/src/framework/core/plan.rs | 1 + .../src/framework/data_model/model.rs | 1 + .../src/framework/python/generate.rs | 11 ++++++++ .../src/framework/typescript/generate.rs | 11 ++++++++ .../olap/clickhouse/diff_strategy.rs | 2 ++ .../infrastructure/olap/clickhouse/mapper.rs | 1 + .../src/infrastructure/olap/clickhouse/mod.rs | 21 +++++++++++++--- .../infrastructure/olap/clickhouse/model.rs | 2 ++ .../infrastructure/olap/clickhouse/queries.rs | 24 +++++++++++++++--- .../src/infrastructure/olap/ddl_ordering.rs | 25 +++++++++++++++++++ packages/protobuf/infrastructure_map.proto | 3 +++ .../ts-moose-lib/src/dmv2/sdk/olapTable.ts | 7 ++++++ 18 files changed, 125 insertions(+), 7 deletions(-) diff --git a/apps/framework-cli/src/cli/local_webserver.rs b/apps/framework-cli/src/cli/local_webserver.rs index 36ab73027a..87e3a8ed90 100644 --- a/apps/framework-cli/src/cli/local_webserver.rs +++ b/apps/framework-cli/src/cli/local_webserver.rs @@ -3556,6 +3556,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, } } diff --git a/apps/framework-cli/src/cli/routines/migrate.rs b/apps/framework-cli/src/cli/routines/migrate.rs index 5f5bb4da96..6d3e98e393 100644 --- a/apps/framework-cli/src/cli/routines/migrate.rs +++ b/apps/framework-cli/src/cli/routines/migrate.rs @@ -218,7 +218,11 @@ fn validate_table_databases( SerializableOlapOperation::CreateTable { table } => { validate_db(&table.database, &table.name); } - SerializableOlapOperation::DropTable { table, database } => { + SerializableOlapOperation::DropTable { + table, + database, + cluster_name: _, + } => { validate_db(database, table); } SerializableOlapOperation::AddTableColumn { @@ -659,6 +663,7 @@ mod tests { engine_params_hash: None, table_settings: None, table_ttl_setting: None, + cluster_name: None, } } @@ -999,6 +1004,7 @@ mod tests { SerializableOlapOperation::DropTable { table: "test".to_string(), database: Some("bad_db".to_string()), + cluster_name: None, }, SerializableOlapOperation::AddTableColumn { table: "test".to_string(), diff --git a/apps/framework-cli/src/framework/core/infra_reality_checker.rs b/apps/framework-cli/src/framework/core/infra_reality_checker.rs index bf665073fb..80b1f6458b 100644 --- a/apps/framework-cli/src/framework/core/infra_reality_checker.rs +++ b/apps/framework-cli/src/framework/core/infra_reality_checker.rs @@ -420,6 +420,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, } } diff --git a/apps/framework-cli/src/framework/core/infrastructure/table.rs b/apps/framework-cli/src/framework/core/infrastructure/table.rs index f27f6bbcdf..03e358cbdb 100644 --- a/apps/framework-cli/src/framework/core/infrastructure/table.rs +++ b/apps/framework-cli/src/framework/core/infrastructure/table.rs @@ -299,6 +299,9 @@ pub struct Table { /// Table-level TTL expression (without leading 'TTL') #[serde(skip_serializing_if = "Option::is_none", default)] pub table_ttl_setting: Option, + /// Optional cluster name for ON CLUSTER support in ClickHouse + #[serde(skip_serializing_if = "Option::is_none", default)] + pub cluster_name: Option, } impl Table { @@ -472,6 +475,7 @@ impl Table { .or_else(|| self.compute_non_alterable_params_hash()), table_settings: self.table_settings.clone().unwrap_or_default(), table_ttl_setting: self.table_ttl_setting.clone(), + cluster_name: self.cluster_name.clone(), metadata: MessageField::from_option(self.metadata.as_ref().map(|m| { infrastructure_map::Metadata { description: m.description.clone().unwrap_or_default(), @@ -578,6 +582,7 @@ impl Table { .collect(), database: proto.database, table_ttl_setting: proto.table_ttl_setting, + cluster_name: proto.cluster_name, } } } @@ -1643,6 +1648,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; assert_eq!(table1.id(DEFAULT_DATABASE_NAME), "local_users"); diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index d5be6a00a4..0d6bb33d40 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -3017,6 +3017,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let after = Table { @@ -3072,6 +3073,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let diff = compute_table_columns_diff(&before, &after); @@ -3247,6 +3249,7 @@ mod diff_tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, } } diff --git a/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs b/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs index 2ec987f84c..0359633d6b 100644 --- a/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs @@ -256,6 +256,9 @@ struct PartialTable { /// Optional database name for multi-database support #[serde(default)] pub database: Option, + /// Optional cluster name for ON CLUSTER support + #[serde(default)] + pub cluster: Option, } /// Represents a topic definition from user code before it's converted into a complete [`Topic`]. @@ -699,6 +702,7 @@ impl PartialInfrastructureMap { indexes: partial_table.indexes.clone(), table_ttl_setting, database: partial_table.database.clone(), + cluster_name: partial_table.cluster.clone(), }; Ok((table.id(default_database), table)) }) diff --git a/apps/framework-cli/src/framework/core/plan.rs b/apps/framework-cli/src/framework/core/plan.rs index ee87431c67..8957ed0563 100644 --- a/apps/framework-cli/src/framework/core/plan.rs +++ b/apps/framework-cli/src/framework/core/plan.rs @@ -456,6 +456,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, } } diff --git a/apps/framework-cli/src/framework/data_model/model.rs b/apps/framework-cli/src/framework/data_model/model.rs index f8918853d1..c4b2ac1435 100644 --- a/apps/framework-cli/src/framework/data_model/model.rs +++ b/apps/framework-cli/src/framework/data_model/model.rs @@ -70,6 +70,7 @@ impl DataModel { indexes: vec![], database: None, // Database defaults to global config table_ttl_setting: None, + cluster_name: None, }; // Compute hash that includes both engine params and database diff --git a/apps/framework-cli/src/framework/python/generate.rs b/apps/framework-cli/src/framework/python/generate.rs index b97842eac8..4cdc3f653f 100644 --- a/apps/framework-cli/src/framework/python/generate.rs +++ b/apps/framework-cli/src/framework/python/generate.rs @@ -1065,6 +1065,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1158,6 +1159,7 @@ foo_table = OlapTable[Foo]("Foo", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1276,6 +1278,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1354,6 +1357,7 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1411,6 +1415,7 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1479,6 +1484,7 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1550,6 +1556,7 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1632,6 +1639,7 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: None, table_ttl_setting: Some("timestamp + INTERVAL 90 DAY DELETE".to_string()), + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1696,6 +1704,7 @@ user_table = OlapTable[User]("User", OlapConfig( ], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1760,6 +1769,7 @@ user_table = OlapTable[User]("User", OlapConfig( table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1813,6 +1823,7 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: Some("analytics_db".to_string()), table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); diff --git a/apps/framework-cli/src/framework/typescript/generate.rs b/apps/framework-cli/src/framework/typescript/generate.rs index 7e9e556260..3a89d3645f 100644 --- a/apps/framework-cli/src/framework/typescript/generate.rs +++ b/apps/framework-cli/src/framework/typescript/generate.rs @@ -1002,6 +1002,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1083,6 +1084,7 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1133,6 +1135,7 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1202,6 +1205,7 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1246,6 +1250,7 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1322,6 +1327,7 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1384,6 +1390,7 @@ export const UserTable = new OlapTable("User", { ], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1454,6 +1461,7 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1531,6 +1539,7 @@ export const TaskTable = new OlapTable("Task", { indexes: vec![], database: None, table_ttl_setting: Some("timestamp + INTERVAL 90 DAY DELETE".to_string()), + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1597,6 +1606,7 @@ export const TaskTable = new OlapTable("Task", { table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1644,6 +1654,7 @@ export const TaskTable = new OlapTable("Task", { indexes: vec![], database: Some("analytics_db".to_string()), table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index da040b6700..51cbfa62c1 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -687,6 +687,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, } } @@ -1493,6 +1494,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; assert!(ClickHouseTableDiffStrategy::is_s3queue_table(&s3_table)); diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs index 9cf81a5c1a..a22012beb3 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs @@ -354,6 +354,7 @@ pub fn std_table_to_clickhouse_table(table: &Table) -> Result, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, /// Add a column to a table AddTableColumn { @@ -436,8 +438,19 @@ pub async fn execute_atomic_operation( SerializableOlapOperation::CreateTable { table } => { execute_create_table(db_name, table, client, is_dev).await?; } - SerializableOlapOperation::DropTable { table, database } => { - execute_drop_table(db_name, table, database.as_deref(), client).await?; + SerializableOlapOperation::DropTable { + table, + database, + cluster_name, + } => { + execute_drop_table( + db_name, + table, + database.as_deref(), + cluster_name.as_deref(), + client, + ) + .await?; } SerializableOlapOperation::AddTableColumn { table, @@ -662,12 +675,13 @@ async fn execute_drop_table( db_name: &str, table_name: &str, table_database: Option<&str>, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { log::info!("Executing DropTable: {:?}", table_name); // Use table's database if specified, otherwise use global database let target_database = table_database.unwrap_or(db_name); - let drop_query = drop_table_query(target_database, table_name)?; + let drop_query = drop_table_query(target_database, table_name, cluster_name)?; run_query(&drop_query, client) .await .map_err(|e| ClickhouseChangesError::ClickhouseClient { @@ -1765,6 +1779,7 @@ impl OlapOperations for ConfiguredDBClient { indexes, database: Some(database), table_ttl_setting, + cluster_name: None, // Cluster info not extracted from introspection }; debug!("Created table object: {:?}", table); diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs index a51e6c656f..fbc134ea65 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs @@ -657,6 +657,8 @@ pub struct ClickHouseTable { pub indexes: Vec, /// Optional TTL expression at table level (without leading 'TTL') pub table_ttl_setting: Option, + /// Optional cluster name for ON CLUSTER support + pub cluster_name: Option, } impl ClickHouseTable { diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 7bd972a70f..486409916b 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -121,7 +121,8 @@ pub fn create_alias_for_table( } static CREATE_TABLE_TEMPLATE: &str = r#" -CREATE TABLE IF NOT EXISTS `{{db_name}}`.`{{table_name}}` +CREATE TABLE IF NOT EXISTS `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} +ON CLUSTER {{cluster_name}}{{/if}} ( {{#each fields}} `{{field_name}}` {{{field_type}}} {{field_nullable}}{{#if field_default}} DEFAULT {{{field_default}}}{{/if}}{{#if field_comment}} COMMENT '{{{field_comment}}}'{{/if}}{{#if field_ttl}} TTL {{{field_ttl}}}{{/if}}{{#unless @last}}, {{/unless}}{{/each}}{{#if has_indexes}}, {{#each indexes}}{{this}}{{#unless @last}}, {{/unless}}{{/each}}{{/if}} @@ -2409,6 +2410,7 @@ pub fn create_table_query( let template_context = json!({ "db_name": db_name, "table_name": table.name, + "cluster_name": table.cluster_name.as_deref(), "fields": builds_field_context(&table.columns)?, "has_fields": !table.columns.is_empty(), "has_indexes": has_indexes, @@ -2439,16 +2441,21 @@ pub fn create_table_query( } pub static DROP_TABLE_TEMPLATE: &str = r#" -DROP TABLE IF EXISTS `{{db_name}}`.`{{table_name}}`; +DROP TABLE IF EXISTS `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}}{{/if}}; "#; -pub fn drop_table_query(db_name: &str, table_name: &str) -> Result { +pub fn drop_table_query( + db_name: &str, + table_name: &str, + cluster_name: Option<&str>, +) -> Result { let mut reg = Handlebars::new(); reg.register_escape_fn(no_escape); let context = json!({ "db_name": db_name, "table_name": table_name, + "cluster_name": cluster_name, }); Ok(reg.render_template(DROP_TABLE_TEMPLATE, &context)?) @@ -2917,6 +2924,7 @@ mod tests { table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -2954,6 +2962,7 @@ PRIMARY KEY (`id`) table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -2990,6 +2999,7 @@ ENGINE = MergeTree table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3089,6 +3099,7 @@ ENGINE = MergeTree table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3128,6 +3139,7 @@ ORDER BY (`id`) "#; table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let result = create_table_query("test_db", table, false); @@ -3174,6 +3186,7 @@ ORDER BY (`id`) "#; table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3236,6 +3249,7 @@ ORDER BY (`id`) "#; table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3277,6 +3291,7 @@ ORDER BY (`id`) "#; table_settings: None, table_ttl_setting: None, indexes: vec![], + cluster_name: None, }; let result = create_table_query("test_db", table, false); @@ -3432,6 +3447,7 @@ ORDER BY (`id`) "#; table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3497,6 +3513,7 @@ ORDER BY (`id`) "#; table_settings: Some(settings), indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3969,6 +3986,7 @@ SETTINGS keeper_path = '/clickhouse/s3queue/test_table', mode = 'unordered', s3q table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); diff --git a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs index 493ec82196..77349acd58 100644 --- a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs +++ b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs @@ -181,6 +181,7 @@ impl AtomicOlapOperation { } => SerializableOlapOperation::DropTable { table: table.name.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::AddTableColumn { table, @@ -1311,6 +1312,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create some atomic operations @@ -1385,6 +1387,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create table B - depends on table A @@ -1407,6 +1410,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create view C - depends on table B @@ -1501,6 +1505,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create table B - target for materialized view @@ -1523,6 +1528,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create view C - depends on table B @@ -1637,6 +1643,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let view = View { @@ -1793,6 +1800,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let table_b = Table { @@ -1814,6 +1822,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let table_c = Table { @@ -1835,6 +1844,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Test operations @@ -1925,6 +1935,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let table_b = Table { @@ -1946,6 +1957,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let table_c = Table { @@ -1967,6 +1979,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let table_d = Table { @@ -1988,6 +2001,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let table_e = Table { @@ -2009,6 +2023,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let op_create_a = AtomicOlapOperation::CreateTable { @@ -2162,6 +2177,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create table B - target for materialized view @@ -2184,6 +2200,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create SQL resource for a materialized view @@ -2305,6 +2322,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create table B - target for materialized view @@ -2327,6 +2345,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create SQL resource for a materialized view @@ -2453,6 +2472,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let table_b = Table { @@ -2474,6 +2494,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create SQL resource for materialized view @@ -2680,6 +2701,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create a column @@ -2789,6 +2811,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create operations with signatures that work with the current implementation @@ -2908,6 +2931,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let after_table = Table { @@ -2952,6 +2976,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create column changes (remove old_column, add new_column) diff --git a/packages/protobuf/infrastructure_map.proto b/packages/protobuf/infrastructure_map.proto index 3fea0ba852..36cdaa2920 100644 --- a/packages/protobuf/infrastructure_map.proto +++ b/packages/protobuf/infrastructure_map.proto @@ -151,6 +151,9 @@ message Table { // Optional database name for multi-database support // When not specified, uses the global ClickHouse config database optional string database = 17; + + // Optional cluster name for ON CLUSTER support in ClickHouse + optional string cluster_name = 18; } // Structured representation of ORDER BY to support either explicit fields diff --git a/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts b/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts index 513dda5584..54411ec219 100644 --- a/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts +++ b/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts @@ -226,6 +226,13 @@ export type BaseOlapConfig = ( * When not specified, uses the global ClickHouse config database. */ database?: string; + /** + * Optional cluster name for ON CLUSTER support. + * Use this to enable replicated tables across ClickHouse clusters. + * The cluster must be defined in config.toml (dev environment only). + * Example: cluster: "prod_cluster" + */ + cluster?: string; }; /** From 191f4f55d0a0b4c5295b4ef96cf94b4fcf5f5b63 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Mon, 3 Nov 2025 16:33:27 -0700 Subject: [PATCH 02/23] hook up cluster ts api with ch --- apps/framework-cli/src/framework/core/plan.rs | 6 ++ .../src/framework/core/plan_validator.rs | 61 +++++++++++++++++ .../olap/clickhouse/diff_strategy.rs | 65 +++++++++++++++++++ .../src/utilities/prod-docker-compose.yml.hbs | 3 + packages/ts-moose-lib/src/dmv2/internal.ts | 3 + 5 files changed, 138 insertions(+) diff --git a/apps/framework-cli/src/framework/core/plan.rs b/apps/framework-cli/src/framework/core/plan.rs index 8957ed0563..50257985e5 100644 --- a/apps/framework-cli/src/framework/core/plan.rs +++ b/apps/framework-cli/src/framework/core/plan.rs @@ -172,6 +172,12 @@ pub async fn reconcile_with_reality( // that might have authentication parameters. table.engine_params_hash = infra_map_table.engine_params_hash.clone(); + // Keep the cluster_name from the infra map because it cannot be reliably detected + // from ClickHouse's system tables. The ON CLUSTER clause is only used during + // DDL execution and is not stored in the table schema. While it appears in + // system.distributed_ddl_queue, those entries are ephemeral and get cleaned up. + table.cluster_name = infra_map_table.cluster_name.clone(); + reconciled_map .tables .insert(reality_table.id(&reconciled_map.default_database), table); diff --git a/apps/framework-cli/src/framework/core/plan_validator.rs b/apps/framework-cli/src/framework/core/plan_validator.rs index 65f4862187..970126a1e4 100644 --- a/apps/framework-cli/src/framework/core/plan_validator.rs +++ b/apps/framework-cli/src/framework/core/plan_validator.rs @@ -10,11 +10,72 @@ pub enum ValidationError { #[error("Table validation failed: {0}")] TableValidation(String), + + #[error("Cluster validation failed: {0}")] + ClusterValidation(String), +} + +/// Validates that all tables with cluster_name reference clusters defined in the config +fn validate_cluster_references(project: &Project, plan: &InfraPlan) -> Result<(), ValidationError> { + let defined_clusters = project.clickhouse_config.clusters.as_ref(); + + // Get all cluster names from the defined clusters + let cluster_names: Option> = + defined_clusters.map(|clusters| clusters.iter().map(|c| c.name.clone()).collect()); + + // Check all tables in the target infrastructure map + for table in plan.target_infra_map.tables.values() { + if let Some(cluster_name) = &table.cluster_name { + // If table has a cluster_name, verify it's defined in the config + match &cluster_names { + None => { + // No clusters defined in config but table references one + return Err(ValidationError::ClusterValidation(format!( + "Table '{}' references cluster '{}', but no clusters are defined in moose.config.toml.\n\ + \n\ + To fix this, add the cluster definition to your config:\n\ + \n\ + [[clickhouse_config.clusters]]\n\ + name = \"{}\"\n", + table.name, cluster_name, cluster_name + ))); + } + Some(names) if !names.contains(cluster_name) => { + // Table references a cluster that's not defined + return Err(ValidationError::ClusterValidation(format!( + "Table '{}' references cluster '{}', which is not defined in moose.config.toml.\n\ + \n\ + Available clusters: {}\n\ + \n\ + To fix this, either:\n\ + 1. Add the cluster to your config:\n\ + [[clickhouse_config.clusters]]\n\ + name = \"{}\"\n\ + \n\ + 2. Or change the table to use an existing cluster: {}\n", + table.name, + cluster_name, + names.join(", "), + cluster_name, + names.join(", ") + ))); + } + _ => { + // Cluster is defined, continue validation + } + } + } + } + + Ok(()) } pub fn validate(project: &Project, plan: &InfraPlan) -> Result<(), ValidationError> { stream::validate_changes(project, &plan.changes.streaming_engine_changes)?; + // Validate cluster references + validate_cluster_references(project, plan)?; + // Check for validation errors in OLAP changes for change in &plan.changes.olap_changes { if let OlapChange::Table(TableChange::ValidationError { message, .. }) = change { diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index 51cbfa62c1..240b04bd51 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -50,6 +50,49 @@ fn format_database_change_error(table_name: &str, before_db: &str, after_db: &st ) } +/// Generates a formatted error message for cluster field changes. +/// +/// This function creates a user-friendly error message explaining that cluster field +/// changes require manual intervention to prevent data loss. Cluster changes are even +/// more critical than database changes because they affect replication topology. +/// +/// # Arguments +/// * `table_name` - The name of the table being changed +/// * `before_cluster` - The original cluster name (or "" if None) +/// * `after_cluster` - The new cluster name (or "" if None) +/// +/// # Returns +/// A formatted string with migration instructions +fn format_cluster_change_error( + table_name: &str, + before_cluster: &str, + after_cluster: &str, +) -> String { + format!( + "\n\n\ + ERROR: Cluster field change detected for table '{}'\n\ + \n\ + The cluster field changed from '{}' to '{}'\n\ + \n\ + Changing the cluster field is a destructive operation that requires\n\ + manual intervention to ensure data safety and proper replication.\n\ + \n\ + The cluster name is embedded in the table's replication path and cannot\n\ + be changed without recreating the table. This would cause data loss.\n\ + \n\ + To migrate this table to a different cluster:\n\ + \n\ + 1. Export your existing data\n\ + 2. Delete the old table definition from your code\n\ + 3. Create a new table definition with the target cluster\n\ + 4. Re-import your data if needed\n\ + \n\ + This ensures you maintain control over data migration and prevents\n\ + accidental data loss.\n", + table_name, before_cluster, after_cluster + ) +} + /// ClickHouse-specific table diff strategy /// /// ClickHouse has several limitations that require drop+create operations instead of ALTER: @@ -472,6 +515,28 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { })]; } + // Check if cluster has changed + // Cluster name cannot be changed after table creation for replicated tables + // as it's embedded in the replication path + let cluster_changed = before.cluster_name != after.cluster_name; + + if cluster_changed { + let before_cluster = before.cluster_name.as_deref().unwrap_or(""); + let after_cluster = after.cluster_name.as_deref().unwrap_or(""); + + let error_message = + format_cluster_change_error(&before.name, before_cluster, after_cluster); + + log::error!("{}", error_message); + + return vec![OlapChange::Table(TableChange::ValidationError { + table_name: before.name.clone(), + message: error_message, + before: Box::new(before.clone()), + after: Box::new(after.clone()), + })]; + } + // Check if PARTITION BY has changed let partition_by_changed = partition_by_change.before != partition_by_change.after; if partition_by_changed { diff --git a/apps/framework-cli/src/utilities/prod-docker-compose.yml.hbs b/apps/framework-cli/src/utilities/prod-docker-compose.yml.hbs index 1dcaf89134..823862a003 100644 --- a/apps/framework-cli/src/utilities/prod-docker-compose.yml.hbs +++ b/apps/framework-cli/src/utilities/prod-docker-compose.yml.hbs @@ -151,6 +151,9 @@ services: {{/if}} - clickhouse-0-logs:/var/log/clickhouse-server/ - clickhouse-0-users:/etc/clickhouse-server/users.d +{{#if clickhouse_clusters_file}} + - "{{clickhouse_clusters_file}}:/etc/clickhouse-server/config.d/clusters.xml:ro" +{{/if}} environment: - CLICKHOUSE_DB=${DB_NAME:-local} - CLICKHOUSE_USER=${CLICKHOUSE_USER:-panda} diff --git a/packages/ts-moose-lib/src/dmv2/internal.ts b/packages/ts-moose-lib/src/dmv2/internal.ts index f4852cc39f..060adc12fc 100644 --- a/packages/ts-moose-lib/src/dmv2/internal.ts +++ b/packages/ts-moose-lib/src/dmv2/internal.ts @@ -210,6 +210,8 @@ interface TableJson { ttl?: string; /** Optional database name for multi-database support. */ database?: string; + /** Optional cluster name for ON CLUSTER support. */ + cluster?: string; } /** * Represents a target destination for data flow, typically a stream. @@ -726,6 +728,7 @@ export const toInfraMap = (registry: typeof moose_internal) => { })) || [], ttl: table.config.ttl, database: table.config.database, + cluster: table.config.cluster, }; }); From e2757b07c413d5a2ef58fff7037636585b34208c Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Mon, 3 Nov 2025 17:18:19 -0700 Subject: [PATCH 03/23] add unit tests --- apps/framework-cli/src/framework/core/plan.rs | 104 ++++++++ .../src/framework/core/plan_validator.rs | 222 ++++++++++++++++++ .../olap/clickhouse/diff_strategy.rs | 167 +++++++++++++ .../infrastructure/olap/clickhouse/queries.rs | 111 +++++++++ 4 files changed, 604 insertions(+) diff --git a/apps/framework-cli/src/framework/core/plan.rs b/apps/framework-cli/src/framework/core/plan.rs index 50257985e5..76b8b3e7c8 100644 --- a/apps/framework-cli/src/framework/core/plan.rs +++ b/apps/framework-cli/src/framework/core/plan.rs @@ -924,4 +924,108 @@ mod tests { // but they don't directly use clickhouse_config.db_name. // The bug in ENG-1160 is specifically about default_database being hardcoded to "local". } + + #[tokio::test] + async fn test_reconcile_preserves_cluster_name() { + // Create a test table with a cluster name + let mut table = create_test_table("clustered_table"); + table.cluster_name = Some("test_cluster".to_string()); + + // Create mock OLAP client with the table (but cluster_name will be lost in reality) + let mut table_from_reality = table.clone(); + table_from_reality.cluster_name = None; // ClickHouse system.tables doesn't preserve this + + let mock_client = MockOlapClient { + tables: vec![table_from_reality], + }; + + // Create infrastructure map with the table including cluster_name + let mut infra_map = InfrastructureMap::default(); + infra_map + .tables + .insert(table.id(DEFAULT_DATABASE_NAME), table.clone()); + + // Create test project + let project = create_test_project(); + + let target_table_names = HashSet::new(); + // Reconcile the infrastructure map + let reconciled = + reconcile_with_reality(&project, &infra_map, &target_table_names, mock_client) + .await + .unwrap(); + + // The reconciled map should preserve cluster_name from the infra map + assert_eq!(reconciled.tables.len(), 1); + let reconciled_table = reconciled.tables.values().next().unwrap(); + assert_eq!( + reconciled_table.cluster_name, + Some("test_cluster".to_string()), + "cluster_name should be preserved from infra map" + ); + } + + #[tokio::test] + async fn test_reconcile_with_reality_mismatched_table_preserves_cluster() { + // Create a table that exists in both places but with different schemas + let mut infra_table = create_test_table("mismatched_table"); + infra_table.cluster_name = Some("production_cluster".to_string()); + + let mut reality_table = create_test_table("mismatched_table"); + // Reality table has no cluster_name (as ClickHouse doesn't preserve it) + reality_table.cluster_name = None; + // Add a column difference to make them mismatched + reality_table + .columns + .push(crate::framework::core::infrastructure::table::Column { + name: "extra_col".to_string(), + data_type: crate::framework::core::infrastructure::table::ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![], + comment: None, + ttl: None, + }); + + // Create mock OLAP client with the reality table + let mock_client = MockOlapClient { + tables: vec![reality_table.clone()], + }; + + // Create infrastructure map with the infra table + let mut infra_map = InfrastructureMap::default(); + infra_map + .tables + .insert(infra_table.id(DEFAULT_DATABASE_NAME), infra_table.clone()); + + // Create test project + let project = create_test_project(); + + let target_table_names = HashSet::new(); + // Reconcile the infrastructure map + let reconciled = + reconcile_with_reality(&project, &infra_map, &target_table_names, mock_client) + .await + .unwrap(); + + // The reconciled map should still have the table + assert_eq!(reconciled.tables.len(), 1); + let reconciled_table = reconciled.tables.values().next().unwrap(); + + // The cluster_name should be preserved from the infra map + assert_eq!( + reconciled_table.cluster_name, + Some("production_cluster".to_string()), + "cluster_name should be preserved from infra map even when schema differs" + ); + + // But the columns should be updated from reality + assert_eq!( + reconciled_table.columns.len(), + reality_table.columns.len(), + "columns should be updated from reality" + ); + } } diff --git a/apps/framework-cli/src/framework/core/plan_validator.rs b/apps/framework-cli/src/framework/core/plan_validator.rs index 970126a1e4..c65cd28744 100644 --- a/apps/framework-cli/src/framework/core/plan_validator.rs +++ b/apps/framework-cli/src/framework/core/plan_validator.rs @@ -85,3 +85,225 @@ pub fn validate(project: &Project, plan: &InfraPlan) -> Result<(), ValidationErr Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::framework::core::infrastructure::table::{Column, ColumnType, OrderBy, Table}; + use crate::framework::core::infrastructure_map::{ + InfrastructureMap, PrimitiveSignature, PrimitiveTypes, + }; + use crate::framework::core::partial_infrastructure_map::LifeCycle; + use crate::framework::core::plan::InfraPlan; + use crate::framework::versions::Version; + use crate::infrastructure::olap::clickhouse::config::{ClickHouseConfig, ClusterConfig}; + use crate::project::{Project, ProjectFeatures}; + use std::collections::HashMap; + use std::path::PathBuf; + + fn create_test_project(clusters: Option>) -> Project { + Project { + language: crate::framework::languages::SupportedLanguages::Typescript, + redpanda_config: crate::infrastructure::stream::kafka::models::KafkaConfig::default(), + clickhouse_config: ClickHouseConfig { + db_name: "local".to_string(), + user: "default".to_string(), + password: "".to_string(), + use_ssl: false, + host: "localhost".to_string(), + host_port: 18123, + native_port: 9000, + host_data_path: None, + additional_databases: vec![], + clusters, + }, + http_server_config: crate::cli::local_webserver::LocalWebserverConfig::default(), + redis_config: crate::infrastructure::redis::redis_client::RedisConfig::default(), + git_config: crate::utilities::git::GitConfig::default(), + temporal_config: + crate::infrastructure::orchestration::temporal::TemporalConfig::default(), + state_config: crate::project::StateConfig::default(), + migration_config: crate::project::MigrationConfig::default(), + language_project_config: crate::project::LanguageProjectConfig::default(), + project_location: PathBuf::from("/test"), + is_production: false, + supported_old_versions: HashMap::new(), + jwt: None, + authentication: crate::project::AuthenticationConfig::default(), + features: ProjectFeatures::default(), + load_infra: None, + typescript_config: crate::project::TypescriptConfig::default(), + source_dir: crate::project::default_source_dir(), + } + } + + fn create_test_table(name: &str, cluster_name: Option) -> Table { + Table { + name: name.to_string(), + columns: vec![Column { + name: "id".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: true, + default: None, + annotations: vec![], + comment: None, + ttl: None, + }], + order_by: OrderBy::Fields(vec!["id".to_string()]), + partition_by: None, + sample_by: None, + engine: None, + version: Some(Version::from_string("1.0.0".to_string())), + source_primitive: PrimitiveSignature { + name: name.to_string(), + primitive_type: PrimitiveTypes::DataModel, + }, + metadata: None, + life_cycle: LifeCycle::FullyManaged, + engine_params_hash: None, + table_settings: None, + indexes: vec![], + database: None, + table_ttl_setting: None, + cluster_name, + } + } + + fn create_test_plan(tables: Vec) -> InfraPlan { + let mut table_map = HashMap::new(); + for table in tables { + table_map.insert(format!("local_{}", table.name), table); + } + + InfraPlan { + target_infra_map: InfrastructureMap { + default_database: "local".to_string(), + tables: table_map, + topics: HashMap::new(), + api_endpoints: HashMap::new(), + views: HashMap::new(), + topic_to_table_sync_processes: HashMap::new(), + topic_to_topic_sync_processes: HashMap::new(), + function_processes: HashMap::new(), + block_db_processes: crate::framework::core::infrastructure::olap_process::OlapProcess {}, + consumption_api_web_server: crate::framework::core::infrastructure::consumption_webserver::ConsumptionApiWebServer {}, + orchestration_workers: HashMap::new(), + sql_resources: HashMap::new(), + workflows: HashMap::new(), + web_apps: HashMap::new(), + }, + changes: Default::default(), + } + } + + #[test] + fn test_validate_no_clusters_defined_but_table_references_one() { + let project = create_test_project(None); + let table = create_test_table("test_table", Some("test_cluster".to_string())); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_err()); + match result { + Err(ValidationError::ClusterValidation(msg)) => { + assert!(msg.contains("test_table")); + assert!(msg.contains("test_cluster")); + assert!(msg.contains("no clusters are defined")); + } + _ => panic!("Expected ClusterValidation error"), + } + } + + #[test] + fn test_validate_table_references_undefined_cluster() { + let project = create_test_project(Some(vec![ + ClusterConfig { + name: "cluster_a".to_string(), + }, + ClusterConfig { + name: "cluster_b".to_string(), + }, + ])); + let table = create_test_table("test_table", Some("cluster_c".to_string())); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_err()); + match result { + Err(ValidationError::ClusterValidation(msg)) => { + assert!(msg.contains("test_table")); + assert!(msg.contains("cluster_c")); + assert!(msg.contains("cluster_a")); + assert!(msg.contains("cluster_b")); + } + _ => panic!("Expected ClusterValidation error"), + } + } + + #[test] + fn test_validate_table_references_valid_cluster() { + let project = create_test_project(Some(vec![ClusterConfig { + name: "test_cluster".to_string(), + }])); + let table = create_test_table("test_table", Some("test_cluster".to_string())); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_ok()); + } + + #[test] + fn test_validate_table_with_no_cluster_is_allowed() { + let project = create_test_project(Some(vec![ClusterConfig { + name: "test_cluster".to_string(), + }])); + let table = create_test_table("test_table", None); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_ok()); + } + + #[test] + fn test_validate_multiple_tables_different_clusters() { + let project = create_test_project(Some(vec![ + ClusterConfig { + name: "cluster_a".to_string(), + }, + ClusterConfig { + name: "cluster_b".to_string(), + }, + ])); + let table1 = create_test_table("table1", Some("cluster_a".to_string())); + let table2 = create_test_table("table2", Some("cluster_b".to_string())); + let plan = create_test_plan(vec![table1, table2]); + + let result = validate(&project, &plan); + + assert!(result.is_ok()); + } + + #[test] + fn test_validate_empty_clusters_list() { + let project = create_test_project(Some(vec![])); + let table = create_test_table("test_table", Some("test_cluster".to_string())); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_err()); + match result { + Err(ValidationError::ClusterValidation(msg)) => { + assert!(msg.contains("test_table")); + assert!(msg.contains("test_cluster")); + } + _ => panic!("Expected ClusterValidation error"), + } + } +} diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index 240b04bd51..98de0a007f 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -1688,4 +1688,171 @@ mod tests { error_msg.contains("INSERT INTO target_db.my_table SELECT * FROM source_db.my_table") ); } + + #[test] + fn test_cluster_change_from_none_to_some() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Change cluster from None to Some + before.cluster_name = None; + after.cluster_name = Some("test_cluster".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); + + // Should return exactly one ValidationError + assert_eq!(changes.len(), 1); + assert!(matches!( + changes[0], + OlapChange::Table(TableChange::ValidationError { .. }) + )); + + // Check the error message contains expected information + if let OlapChange::Table(TableChange::ValidationError { + table_name, + message, + .. + }) = &changes[0] + { + assert_eq!(table_name, "test"); + assert!(message.contains("")); + assert!(message.contains("test_cluster")); + assert!(message.contains("manual intervention")); + } else { + panic!("Expected ValidationError variant"); + } + } + + #[test] + fn test_cluster_change_from_some_to_none() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Change cluster from Some to None + before.cluster_name = Some("test_cluster".to_string()); + after.cluster_name = None; + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); + + // Should return exactly one ValidationError + assert_eq!(changes.len(), 1); + assert!(matches!( + changes[0], + OlapChange::Table(TableChange::ValidationError { .. }) + )); + + // Check the error message contains expected information + if let OlapChange::Table(TableChange::ValidationError { + table_name, + message, + .. + }) = &changes[0] + { + assert_eq!(table_name, "test"); + assert!(message.contains("test_cluster")); + assert!(message.contains("")); + assert!(message.contains("manual intervention")); + } else { + panic!("Expected ValidationError variant"); + } + } + + #[test] + fn test_cluster_change_between_different_clusters() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Change cluster from one to another + before.cluster_name = Some("cluster_a".to_string()); + after.cluster_name = Some("cluster_b".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); + + // Should return exactly one ValidationError + assert_eq!(changes.len(), 1); + assert!(matches!( + changes[0], + OlapChange::Table(TableChange::ValidationError { .. }) + )); + + // Check the error message contains expected information + if let OlapChange::Table(TableChange::ValidationError { + table_name, + message, + .. + }) = &changes[0] + { + assert_eq!(table_name, "test"); + assert!(message.contains("cluster_a")); + assert!(message.contains("cluster_b")); + assert!(message.contains("replication path")); + } else { + panic!("Expected ValidationError variant"); + } + } + + #[test] + fn test_no_cluster_change_both_none() { + let strategy = ClickHouseTableDiffStrategy; + + let before = create_test_table("test", vec!["id".to_string()], false); + let after = create_test_table("test", vec!["id".to_string()], false); + + // Both None - no cluster change + assert_eq!(before.cluster_name, None); + assert_eq!(after.cluster_name, None); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); + + // Should not trigger a validation error - no changes at all + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_no_cluster_change_both_same() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Both have the same cluster + before.cluster_name = Some("test_cluster".to_string()); + after.cluster_name = Some("test_cluster".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); + + // Should not trigger a validation error - no changes at all + assert_eq!(changes.len(), 0); + } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 486409916b..d27ea2c7f9 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -4503,4 +4503,115 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; _ => panic!("Expected ReplacingMergeTree"), } } + + #[test] + fn test_create_table_with_cluster_includes_on_cluster() { + let table = ClickHouseTable { + version: Some(Version::from_string("1".to_string())), + name: "test_table".to_string(), + columns: vec![ClickHouseColumn { + name: "id".to_string(), + column_type: ClickHouseColumnType::ClickhouseInt(ClickHouseInt::Int32), + required: true, + primary_key: true, + unique: false, + default: None, + comment: None, + ttl: None, + }], + order_by: OrderBy::Fields(vec![]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::ReplicatedMergeTree { + keeper_path: None, + replica_name: None, + }, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + cluster_name: Some("test_cluster".to_string()), + }; + + let query = create_table_query("test_db", table, false).unwrap(); + + // Should include ON CLUSTER clause + assert!( + query.contains("ON CLUSTER test_cluster"), + "Query should contain ON CLUSTER clause" + ); + + // ON CLUSTER should come after CREATE TABLE but before column definitions + let create_idx = query.find("CREATE TABLE").unwrap(); + let on_cluster_idx = query.find("ON CLUSTER").unwrap(); + let engine_idx = query.find("ENGINE").unwrap(); + + assert!( + create_idx < on_cluster_idx && on_cluster_idx < engine_idx, + "ON CLUSTER should be between CREATE TABLE and ENGINE" + ); + } + + #[test] + fn test_create_table_without_cluster_no_on_cluster() { + let table = ClickHouseTable { + version: Some(Version::from_string("1".to_string())), + name: "test_table".to_string(), + columns: vec![ClickHouseColumn { + name: "id".to_string(), + column_type: ClickHouseColumnType::ClickhouseInt(ClickHouseInt::Int32), + required: true, + primary_key: true, + unique: false, + default: None, + comment: None, + ttl: None, + }], + order_by: OrderBy::Fields(vec![]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + cluster_name: None, + }; + + let query = create_table_query("test_db", table, false).unwrap(); + + // Should NOT include ON CLUSTER clause + assert!( + !query.contains("ON CLUSTER"), + "Query should not contain ON CLUSTER clause when cluster_name is None" + ); + } + + #[test] + fn test_drop_table_with_cluster() { + let cluster_name = Some("test_cluster"); + let query = drop_table_query("test_db", "test_table", cluster_name).unwrap(); + + // Should include ON CLUSTER clause + assert!( + query.contains("ON CLUSTER test_cluster"), + "DROP query should contain ON CLUSTER clause" + ); + + // ON CLUSTER should come after DROP TABLE but before the table name or right after table name + assert!(query.contains("DROP TABLE")); + } + + #[test] + fn test_drop_table_without_cluster() { + let cluster_name = None; + let query = drop_table_query("test_db", "test_table", cluster_name).unwrap(); + + // Should NOT include ON CLUSTER clause + assert!( + !query.contains("ON CLUSTER"), + "DROP query should not contain ON CLUSTER clause when cluster_name is None" + ); + + // Should still have DROP TABLE + assert!(query.contains("DROP TABLE")); + } } From e3bfd634926a1d542e2834cf390b2779a4784a6c Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 4 Nov 2025 09:23:01 -0700 Subject: [PATCH 04/23] add validation for cluster & keeper configs --- .../src/framework/core/plan_validator.rs | 220 ++++++++++++++++++ .../py-moose-lib/moose_lib/dmv2/olap_table.py | 6 + packages/py-moose-lib/moose_lib/internal.py | 3 + 3 files changed, 229 insertions(+) diff --git a/apps/framework-cli/src/framework/core/plan_validator.rs b/apps/framework-cli/src/framework/core/plan_validator.rs index c65cd28744..619424719e 100644 --- a/apps/framework-cli/src/framework/core/plan_validator.rs +++ b/apps/framework-cli/src/framework/core/plan_validator.rs @@ -70,12 +70,59 @@ fn validate_cluster_references(project: &Project, plan: &InfraPlan) -> Result<() Ok(()) } +/// Validates that replicated engines either have keeper path/replica name OR a cluster defined +fn validate_replicated_engine_args(plan: &InfraPlan) -> Result<(), ValidationError> { + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; + + for table in plan.target_infra_map.tables.values() { + let needs_args = match &table.engine { + Some(ClickhouseEngine::ReplicatedMergeTree { + keeper_path, + replica_name, + }) => keeper_path.is_none() && replica_name.is_none(), + Some(ClickhouseEngine::ReplicatedReplacingMergeTree { + keeper_path, + replica_name, + .. + }) => keeper_path.is_none() && replica_name.is_none(), + Some(ClickhouseEngine::ReplicatedAggregatingMergeTree { + keeper_path, + replica_name, + }) => keeper_path.is_none() && replica_name.is_none(), + Some(ClickhouseEngine::ReplicatedSummingMergeTree { + keeper_path, + replica_name, + .. + }) => keeper_path.is_none() && replica_name.is_none(), + _ => false, + }; + + // If engine args are missing AND no cluster is defined, that's an error + if needs_args && table.cluster_name.is_none() { + return Err(ValidationError::TableValidation(format!( + "Table '{}' uses a replicated engine but neither cluster nor keeper path/replica name are specified.\n\ + \n\ + You must either:\n\ + 1. Specify a cluster in the table config: cluster = \"prod_cluster\"\n\ + (and define it in moose.config.toml)\n\ + 2. Or provide explicit keeper path and replica name in the engine config\n", + table.name + ))); + } + } + + Ok(()) +} + pub fn validate(project: &Project, plan: &InfraPlan) -> Result<(), ValidationError> { stream::validate_changes(project, &plan.changes.streaming_engine_changes)?; // Validate cluster references validate_cluster_references(project, plan)?; + // Validate replicated engine args + validate_replicated_engine_args(plan)?; + // Check for validation errors in OLAP changes for change in &plan.changes.olap_changes { if let OlapChange::Table(TableChange::ValidationError { message, .. }) = change { @@ -306,4 +353,177 @@ mod tests { _ => panic!("Expected ClusterValidation error"), } } + + // Helper to create a table with a specific engine + fn create_table_with_engine( + name: &str, + cluster_name: Option, + engine: Option, + ) -> Table { + Table { + name: name.to_string(), + columns: vec![Column { + name: "id".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: true, + default: None, + annotations: vec![], + comment: None, + ttl: None, + }], + order_by: OrderBy::Fields(vec!["id".to_string()]), + partition_by: None, + sample_by: None, + engine, + version: Some(Version::from_string("1.0.0".to_string())), + source_primitive: PrimitiveSignature { + name: name.to_string(), + primitive_type: PrimitiveTypes::DataModel, + }, + metadata: None, + life_cycle: LifeCycle::FullyManaged, + engine_params_hash: None, + table_settings: None, + indexes: vec![], + database: None, + table_ttl_setting: None, + cluster_name, + } + } + + #[test] + fn test_replicated_engine_without_args_or_cluster_fails() { + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; + + let project = create_test_project(None); + let table = create_table_with_engine( + "test_table", + None, + Some(ClickhouseEngine::ReplicatedMergeTree { + keeper_path: None, + replica_name: None, + }), + ); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_err()); + match result { + Err(ValidationError::TableValidation(msg)) => { + assert!(msg.contains("test_table")); + assert!(msg.contains("replicated engine")); + assert!(msg.contains("cluster")); + } + _ => panic!("Expected TableValidation error"), + } + } + + #[test] + fn test_replicated_engine_with_cluster_but_no_args_succeeds() { + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; + + let project = create_test_project(Some(vec![ClusterConfig { + name: "prod_cluster".to_string(), + }])); + let table = create_table_with_engine( + "test_table", + Some("prod_cluster".to_string()), + Some(ClickhouseEngine::ReplicatedMergeTree { + keeper_path: None, + replica_name: None, + }), + ); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_ok()); + } + + #[test] + fn test_replicated_engine_with_args_but_no_cluster_succeeds() { + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; + + let project = create_test_project(None); + let table = create_table_with_engine( + "test_table", + None, + Some(ClickhouseEngine::ReplicatedMergeTree { + keeper_path: Some("/clickhouse/tables/{database}/{table}".to_string()), + replica_name: Some("{replica}".to_string()), + }), + ); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_ok()); + } + + #[test] + fn test_replicated_engine_with_both_args_and_cluster_succeeds() { + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; + + let project = create_test_project(Some(vec![ClusterConfig { + name: "prod_cluster".to_string(), + }])); + let table = create_table_with_engine( + "test_table", + Some("prod_cluster".to_string()), + Some(ClickhouseEngine::ReplicatedMergeTree { + keeper_path: Some("/clickhouse/tables/{database}/{table}".to_string()), + replica_name: Some("{replica}".to_string()), + }), + ); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_ok()); + } + + #[test] + fn test_replicated_replacing_merge_tree_without_args_or_cluster_fails() { + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; + + let project = create_test_project(None); + let table = create_table_with_engine( + "test_table", + None, + Some(ClickhouseEngine::ReplicatedReplacingMergeTree { + keeper_path: None, + replica_name: None, + ver: Some("version".to_string()), + is_deleted: None, + }), + ); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_err()); + match result { + Err(ValidationError::TableValidation(msg)) => { + assert!(msg.contains("test_table")); + assert!(msg.contains("replicated engine")); + } + _ => panic!("Expected TableValidation error"), + } + } + + #[test] + fn test_non_replicated_engine_without_cluster_succeeds() { + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; + + let project = create_test_project(None); + let table = create_table_with_engine("test_table", None, Some(ClickhouseEngine::MergeTree)); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_ok()); + } } diff --git a/packages/py-moose-lib/moose_lib/dmv2/olap_table.py b/packages/py-moose-lib/moose_lib/dmv2/olap_table.py index d5abfdf5f4..1c52b7da4e 100644 --- a/packages/py-moose-lib/moose_lib/dmv2/olap_table.py +++ b/packages/py-moose-lib/moose_lib/dmv2/olap_table.py @@ -121,6 +121,10 @@ class OlapConfig(BaseModel): life_cycle: Determines how changes in code will propagate to the resources. settings: Optional table-level settings that can be modified with ALTER TABLE MODIFY SETTING. These are alterable settings that can be changed without recreating the table. + cluster: Optional cluster name for ON CLUSTER support in ClickHouse. + Use this to enable replicated tables across ClickHouse clusters. + The cluster must be defined in moose.config.toml (dev environment only). + Example: cluster="prod_cluster" """ order_by_fields: list[str] = [] order_by_expression: Optional[str] = None @@ -133,6 +137,8 @@ class OlapConfig(BaseModel): settings: Optional[dict[str, str]] = None # Optional table-level TTL expression (without leading 'TTL') ttl: Optional[str] = None + # Optional cluster name for ON CLUSTER support in ClickHouse + cluster: Optional[str] = None # Optional secondary/data-skipping indexes class TableIndex(BaseModel): diff --git a/packages/py-moose-lib/moose_lib/internal.py b/packages/py-moose-lib/moose_lib/internal.py index ab4e806fa9..1bd6ab7fba 100644 --- a/packages/py-moose-lib/moose_lib/internal.py +++ b/packages/py-moose-lib/moose_lib/internal.py @@ -198,6 +198,7 @@ class TableConfig(BaseModel): metadata: Optional metadata for the table. life_cycle: Lifecycle management setting for the table. table_settings: Optional table-level settings that can be modified with ALTER TABLE MODIFY SETTING. + cluster: Optional cluster name for ON CLUSTER support in ClickHouse. """ model_config = model_config @@ -214,6 +215,7 @@ class TableConfig(BaseModel): indexes: list[OlapConfig.TableIndex] = [] ttl: Optional[str] = None database: Optional[str] = None + cluster: Optional[str] = None class TopicConfig(BaseModel): @@ -696,6 +698,7 @@ def to_infra_map() -> dict: indexes=table.config.indexes, ttl=table.config.ttl, database=table.config.database, + cluster=table.config.cluster, ) for name, stream in get_streams().items(): From 7cae3c0dbcc1965cbe8b7566b9a831c33bbfc1aa Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 4 Nov 2025 12:08:04 -0700 Subject: [PATCH 05/23] e2e tests --- .github/workflows/test-framework-cli.yaml | 73 ++++ apps/framework-cli-e2e/test/cluster.test.ts | 390 ++++++++++++++++++ apps/framework-cli-e2e/test/constants.ts | 4 + templates/python-cluster/README.md | 10 + templates/python-cluster/app/__init__.py | 0 .../python-cluster/app/ingest/__init__.py | 0 templates/python-cluster/app/ingest/models.py | 60 +++ templates/python-cluster/app/main.py | 1 + templates/python-cluster/moose.config.toml | 62 +++ templates/python-cluster/requirements.txt | 7 + templates/python-cluster/setup.py | 13 + templates/python-cluster/template.config.toml | 22 + templates/typescript-cluster/README.md | 10 + .../typescript-cluster/moose.config.toml | 65 +++ templates/typescript-cluster/package.json | 26 ++ templates/typescript-cluster/src/index.ts | 1 + .../typescript-cluster/src/ingest/models.ts | 48 +++ .../typescript-cluster/template.config.toml | 22 + templates/typescript-cluster/tsconfig.json | 16 + 19 files changed, 830 insertions(+) create mode 100644 apps/framework-cli-e2e/test/cluster.test.ts create mode 100644 templates/python-cluster/README.md create mode 100644 templates/python-cluster/app/__init__.py create mode 100644 templates/python-cluster/app/ingest/__init__.py create mode 100644 templates/python-cluster/app/ingest/models.py create mode 100644 templates/python-cluster/app/main.py create mode 100644 templates/python-cluster/moose.config.toml create mode 100644 templates/python-cluster/requirements.txt create mode 100644 templates/python-cluster/setup.py create mode 100644 templates/python-cluster/template.config.toml create mode 100644 templates/typescript-cluster/README.md create mode 100644 templates/typescript-cluster/moose.config.toml create mode 100644 templates/typescript-cluster/package.json create mode 100644 templates/typescript-cluster/src/index.ts create mode 100644 templates/typescript-cluster/src/ingest/models.ts create mode 100644 templates/typescript-cluster/template.config.toml create mode 100644 templates/typescript-cluster/tsconfig.json diff --git a/.github/workflows/test-framework-cli.yaml b/.github/workflows/test-framework-cli.yaml index b318935553..c6fafb16de 100644 --- a/.github/workflows/test-framework-cli.yaml +++ b/.github/workflows/test-framework-cli.yaml @@ -716,6 +716,76 @@ jobs: run: | cat ~/.moose/*-cli.log + test-e2e-cluster: + needs: + [detect-changes, check, test-cli, test-ts-moose-lib, test-py-moose-lib] + if: needs.detect-changes.outputs.should_run == 'true' + name: Test E2E Cluster Support (Node 20, Python 3.13) + runs-on: ubuntu-latest + permissions: + contents: read + env: + RUST_BACKTRACE: full + steps: + - name: Install Protoc (Needed for Temporal) + uses: arduino/setup-protoc@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + version: "23.x" + + - name: Checkout + uses: actions/checkout@v4 + with: + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.sha }} + + # Login to Docker hub to get higher rate limits when moose pulls images + - name: Login to Docker Hub + uses: ./.github/actions/docker-login + with: + op-service-account-token: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }} + + - uses: pnpm/action-setup@v4 + + - name: Install node + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: "pnpm" + + - name: Get system info + id: system + run: | + echo "version=$(lsb_release -rs)" >> $GITHUB_OUTPUT + echo "distro=$(lsb_release -is)" >> $GITHUB_OUTPUT + + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: stable + cache: true + cache-shared-key: ${{ runner.os }}-${{ steps.system.outputs.distro }}-${{ steps.system.outputs.version }}-${{ runner.arch }}-rust + cache-on-failure: true + cache-all-crates: true + cache-workspace-crates: true + + - name: Setup Python 3.13 + uses: actions/setup-python@v4 + with: + python-version: "3.13" + + - name: Upgrade Python build tools + run: pip install --upgrade pip setuptools wheel + + - name: Run Cluster E2E Tests + run: pnpm install --frozen-lockfile && pnpm --filter=framework-cli-e2e run test -- --grep "Cluster Support" + env: + MOOSE_TELEMETRY_ENABLED: false + + - name: Inspect Logs + if: always() + run: | + cat ~/.moose/*-cli.log + lints: needs: detect-changes if: needs.detect-changes.outputs.should_run == 'true' @@ -778,6 +848,7 @@ jobs: test-e2e-python-tests, test-e2e-backward-compatibility-typescript, test-e2e-backward-compatibility-python, + test-e2e-cluster, lints, ] if: always() @@ -807,6 +878,7 @@ jobs: [[ "${{ needs.test-e2e-python-tests.result }}" == "failure" ]] || \ [[ "${{ needs.test-e2e-backward-compatibility-typescript.result }}" == "failure" ]] || \ [[ "${{ needs.test-e2e-backward-compatibility-python.result }}" == "failure" ]] || \ + [[ "${{ needs.test-e2e-cluster.result }}" == "failure" ]] || \ [[ "${{ needs.lints.result }}" == "failure" ]]; then echo "One or more required jobs failed" exit 1 @@ -822,6 +894,7 @@ jobs: [[ "${{ needs.test-e2e-python-tests.result }}" == "success" ]] && \ [[ "${{ needs.test-e2e-backward-compatibility-typescript.result }}" == "success" ]] && \ [[ "${{ needs.test-e2e-backward-compatibility-python.result }}" == "success" ]] && \ + [[ "${{ needs.test-e2e-cluster.result }}" == "success" ]] && \ [[ "${{ needs.lints.result }}" == "success" ]]; then echo "All required jobs succeeded" exit 0 diff --git a/apps/framework-cli-e2e/test/cluster.test.ts b/apps/framework-cli-e2e/test/cluster.test.ts new file mode 100644 index 0000000000..871cf10aec --- /dev/null +++ b/apps/framework-cli-e2e/test/cluster.test.ts @@ -0,0 +1,390 @@ +/// +/// +/// +/** + * Cluster Support E2E Tests + * + * Tests the ON CLUSTER functionality for ClickHouse tables in MooseStack. + * + * The tests verify: + * 1. Tables are created with ON CLUSTER clause when cluster is specified + * 2. ClickHouse clusters are properly configured from moose.config.toml + * 3. cluster_name appears correctly in the infrastructure map + * 4. Mixed environments (some tables with cluster, some without) work correctly + * 5. Both TypeScript and Python SDKs support cluster configuration + */ + +import { spawn, ChildProcess } from "child_process"; +import { expect } from "chai"; +import * as fs from "fs"; +import * as path from "path"; +import { promisify } from "util"; +import { createClient } from "@clickhouse/client"; + +// Import constants and utilities +import { + TIMEOUTS, + SERVER_CONFIG, + TEMPLATE_NAMES, + APP_NAMES, + CLICKHOUSE_CONFIG, +} from "./constants"; + +import { + stopDevProcess, + waitForServerStart, + cleanupDocker, + removeTestProject, + createTempTestDirectory, + setupTypeScriptProject, + setupPythonProject, +} from "./utils"; + +const execAsync = promisify(require("child_process").exec); +const setTimeoutAsync = (ms: number) => + new Promise((resolve) => global.setTimeout(resolve, ms)); + +const CLI_PATH = path.resolve(__dirname, "../../../target/debug/moose-cli"); +const MOOSE_LIB_PATH = path.resolve( + __dirname, + "../../../packages/ts-moose-lib", +); +const MOOSE_PY_LIB_PATH = path.resolve( + __dirname, + "../../../packages/py-moose-lib", +); + +// Admin API key hash for authentication +const TEST_ADMIN_HASH = + "deadbeefdeadbeefdeadbeefdeadbeef.0123456789abcdef0123456789abcdef"; + +/** + * Query ClickHouse to verify cluster configuration + */ +async function verifyClustersInClickHouse( + expectedClusters: string[], +): Promise { + const client = createClient({ + url: CLICKHOUSE_CONFIG.url, + username: CLICKHOUSE_CONFIG.username, + password: CLICKHOUSE_CONFIG.password, + }); + + try { + const result = await client.query({ + query: "SELECT DISTINCT cluster FROM system.clusters ORDER BY cluster", + format: "JSONEachRow", + }); + + const clusters = await result.json<{ cluster: string }>(); + const clusterNames = clusters.map((row) => row.cluster); + + console.log("Clusters found in ClickHouse:", clusterNames); + + for (const expected of expectedClusters) { + expect( + clusterNames, + `Cluster '${expected}' should be configured in ClickHouse`, + ).to.include(expected); + } + } finally { + await client.close(); + } +} + +/** + * Query inframap to verify cluster_name is set correctly + */ +async function verifyInfraMapClusters( + expectedTables: { name: string; cluster: string | null }[], +): Promise { + const response = await fetch(`${SERVER_CONFIG.url}/admin/inframap`, { + headers: { + Authorization: `Bearer ${TEST_ADMIN_HASH}`, + }, + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error( + `inframap endpoint returned ${response.status}: ${errorText}`, + ); + } + + const response_data = await response.json(); + console.log("InfraMap response:", JSON.stringify(response_data, null, 2)); + + // Handle both direct format and wrapped format + const infraMap = response_data.infra_map || response_data; + + expect(infraMap.tables, "InfraMap should have tables field").to.exist; + + console.log("InfraMap tables:", Object.keys(infraMap.tables)); + + for (const expectedTable of expectedTables) { + const tableKey = `local_${expectedTable.name}`; + const table = infraMap.tables[tableKey]; + + expect(table, `Table ${expectedTable.name} should exist in inframap`).to + .exist; + + // Normalize undefined to null for comparison (undefined means field not present) + const actualCluster = + table.cluster_name === undefined ? null : table.cluster_name; + expect( + actualCluster, + `Table ${expectedTable.name} should have correct cluster_name`, + ).to.equal(expectedTable.cluster); + } +} + +/** + * Verify that the clickhouse_clusters.xml file was generated + */ +function verifyClusterXmlGenerated(projectDir: string): void { + const clusterXmlPath = path.join( + projectDir, + ".moose/clickhouse_clusters.xml", + ); + + expect( + fs.existsSync(clusterXmlPath), + "clickhouse_clusters.xml should be generated in .moose directory", + ).to.be.true; + + const xmlContent = fs.readFileSync(clusterXmlPath, "utf-8"); + console.log("Generated cluster XML:", xmlContent); + + // Verify XML contains expected cluster definitions + expect(xmlContent).to.include(""); + expect(xmlContent).to.include(""); + expect(xmlContent).to.include(""); + expect(xmlContent).to.include(""); + expect(xmlContent).to.include(""); +} + +/** + * Verify table exists in ClickHouse + * + * Note: ON CLUSTER is a DDL execution directive and is NOT stored in the table schema. + * SHOW CREATE TABLE will never display ON CLUSTER, even if it was used during creation. + * To verify cluster support, we rely on: + * 1. The inframap showing cluster_name (preserved in our state) + * 2. The table being successfully created (which would fail if cluster was misconfigured) + */ +async function verifyTableExists(tableName: string): Promise { + const client = createClient({ + url: CLICKHOUSE_CONFIG.url, + username: CLICKHOUSE_CONFIG.username, + password: CLICKHOUSE_CONFIG.password, + database: CLICKHOUSE_CONFIG.database, + }); + + try { + const result = await client.query({ + query: `SELECT name, engine FROM system.tables WHERE database = '${CLICKHOUSE_CONFIG.database}' AND name = '${tableName}'`, + format: "JSONEachRow", + }); + + const rows = await result.json<{ name: string; engine: string }>(); + expect( + rows.length, + `Table ${tableName} should exist in ClickHouse`, + ).to.equal(1); + console.log(`Table ${tableName} exists with engine: ${rows[0].engine}`); + } finally { + await client.close(); + } +} + +/** + * Configuration for cluster template tests + */ +interface ClusterTestConfig { + language: "typescript" | "python"; + templateName: string; + appName: string; + projectDirSuffix: string; + displayName: string; +} + +const CLUSTER_CONFIGS: ClusterTestConfig[] = [ + { + language: "typescript", + templateName: TEMPLATE_NAMES.TYPESCRIPT_CLUSTER, + appName: APP_NAMES.TYPESCRIPT_CLUSTER, + projectDirSuffix: "ts-cluster", + displayName: "TypeScript Cluster Template", + }, + { + language: "python", + templateName: TEMPLATE_NAMES.PYTHON_CLUSTER, + appName: APP_NAMES.PYTHON_CLUSTER, + projectDirSuffix: "py-cluster", + displayName: "Python Cluster Template", + }, +]; + +/** + * Creates a test suite for a specific cluster template configuration + */ +const createClusterTestSuite = (config: ClusterTestConfig) => { + describe(config.displayName, function () { + let devProcess: ChildProcess | null = null; + let TEST_PROJECT_DIR: string; + + before(async function () { + this.timeout(TIMEOUTS.TEST_SETUP_MS); + + // Verify CLI exists + try { + await fs.promises.access(CLI_PATH, fs.constants.F_OK); + } catch (err) { + console.error( + `CLI not found at ${CLI_PATH}. It should be built in the pretest step.`, + ); + throw err; + } + + // Create temporary directory for this test + TEST_PROJECT_DIR = createTempTestDirectory(config.projectDirSuffix); + + // Setup project based on language + if (config.language === "typescript") { + await setupTypeScriptProject( + TEST_PROJECT_DIR, + config.templateName, + CLI_PATH, + MOOSE_LIB_PATH, + config.appName, + "npm", + ); + } else { + await setupPythonProject( + TEST_PROJECT_DIR, + config.templateName, + CLI_PATH, + MOOSE_PY_LIB_PATH, + config.appName, + ); + } + + // Start dev server + console.log("Starting dev server..."); + const devEnv = + config.language === "python" ? + { + ...process.env, + MOOSE_TELEMETRY_ENABLED: "false", + PYTHONDONTWRITEBYTECODE: "1", + } + : { ...process.env, MOOSE_TELEMETRY_ENABLED: "false" }; + + devProcess = spawn(CLI_PATH, ["dev"], { + cwd: TEST_PROJECT_DIR, + stdio: "pipe", + env: devEnv, + }); + + let serverOutput = ""; + devProcess.stdout?.on("data", (data) => { + serverOutput += data.toString(); + process.stdout.write(data); + }); + + devProcess.stderr?.on("data", (data) => { + serverOutput += data.toString(); + process.stderr.write(data); + }); + + // Wait for server to start + await waitForServerStart( + devProcess, + TIMEOUTS.SERVER_STARTUP_MS, + SERVER_CONFIG.startupMessage, + SERVER_CONFIG.url, + ); + await setTimeoutAsync(3000); // Additional wait for infrastructure setup + }); + + after(async function () { + this.timeout(TIMEOUTS.CLEANUP_MS); + + if (devProcess) { + await stopDevProcess(devProcess); + devProcess = null; + } + + await cleanupDocker(TEST_PROJECT_DIR, config.appName); + + // Clean up test directory + if (TEST_PROJECT_DIR && fs.existsSync(TEST_PROJECT_DIR)) { + removeTestProject(TEST_PROJECT_DIR); + } + }); + + it("should create tables with ON CLUSTER clauses", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + + // Verify TableA and TableB were created in ClickHouse + const client = createClient({ + url: CLICKHOUSE_CONFIG.url, + username: CLICKHOUSE_CONFIG.username, + password: CLICKHOUSE_CONFIG.password, + database: CLICKHOUSE_CONFIG.database, + }); + + try { + const result = await client.query({ + query: + "SELECT name FROM system.tables WHERE database = 'local' AND name IN ('TableA', 'TableB', 'TableC') ORDER BY name", + format: "JSONEachRow", + }); + + const tables = await result.json<{ name: string }>(); + const tableNames = tables.map((t) => t.name); + + expect(tableNames).to.include("TableA"); + expect(tableNames).to.include("TableB"); + expect(tableNames).to.include("TableC"); + } finally { + await client.close(); + } + }); + + it("should configure ClickHouse clusters from moose.config.toml", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + await verifyClustersInClickHouse(["cluster_a", "cluster_b"]); + }); + + it("should generate clickhouse_clusters.xml file", async function () { + verifyClusterXmlGenerated(TEST_PROJECT_DIR); + }); + + it("should show correct cluster_name in inframap", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + + await verifyInfraMapClusters([ + { name: "TableA", cluster: "cluster_a" }, + { name: "TableB", cluster: "cluster_b" }, + { name: "TableC", cluster: null }, + ]); + }); + + it("should create tables successfully with cluster configuration", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + + // Verify tables were created successfully + // (If cluster was misconfigured, table creation would have failed) + await verifyTableExists("TableA"); + await verifyTableExists("TableB"); + await verifyTableExists("TableC"); + }); + }); +}; + +// Test suite for Cluster Support +describe("Cluster Support E2E Tests", function () { + // Generate test suites for each cluster configuration + CLUSTER_CONFIGS.forEach(createClusterTestSuite); +}); diff --git a/apps/framework-cli-e2e/test/constants.ts b/apps/framework-cli-e2e/test/constants.ts index f6eba08eac..1b1641fd2e 100644 --- a/apps/framework-cli-e2e/test/constants.ts +++ b/apps/framework-cli-e2e/test/constants.ts @@ -96,6 +96,8 @@ export const TEMPLATE_NAMES = { TYPESCRIPT_TESTS: "typescript-tests", PYTHON_DEFAULT: "python", PYTHON_TESTS: "python-tests", + TYPESCRIPT_CLUSTER: "typescript-cluster", + PYTHON_CLUSTER: "python-cluster", } as const; export const APP_NAMES = { @@ -103,4 +105,6 @@ export const APP_NAMES = { TYPESCRIPT_TESTS: "moose-ts-tests-app", PYTHON_DEFAULT: "moose-py-default-app", PYTHON_TESTS: "moose-py-tests-app", + TYPESCRIPT_CLUSTER: "moose-ts-cluster-app", + PYTHON_CLUSTER: "moose-py-cluster-app", } as const; diff --git a/templates/python-cluster/README.md b/templates/python-cluster/README.md new file mode 100644 index 0000000000..22a9df3b1c --- /dev/null +++ b/templates/python-cluster/README.md @@ -0,0 +1,10 @@ +# Python Cluster Test Template + +This is a test-only template for E2E testing of ClickHouse cluster support in MooseStack. + +## Features + +- Three OLAP tables demonstrating different cluster configurations +- `table_a`: Uses `cluster_a` +- `table_b`: Uses `cluster_b` +- `table_c`: No cluster (for mixed environment testing) diff --git a/templates/python-cluster/app/__init__.py b/templates/python-cluster/app/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/templates/python-cluster/app/ingest/__init__.py b/templates/python-cluster/app/ingest/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/templates/python-cluster/app/ingest/models.py b/templates/python-cluster/app/ingest/models.py new file mode 100644 index 0000000000..a6198feb4e --- /dev/null +++ b/templates/python-cluster/app/ingest/models.py @@ -0,0 +1,60 @@ +""" +Test models for ClickHouse cluster support +""" + +from moose_lib import Key, OlapTable, OlapConfig, ReplicatedMergeTreeEngine, MergeTreeEngine +from pydantic import BaseModel + + +# Table using cluster_a +class TableA(BaseModel): + id: Key[str] + value: str + timestamp: float + + +# Table using cluster_b +class TableB(BaseModel): + id: Key[str] + count: int + timestamp: float + + +# Table without cluster (for mixed testing) +class TableC(BaseModel): + id: Key[str] + data: str + timestamp: float + + +# OLAP Tables + +# table_a: Uses cluster_a with ReplicatedMergeTree +table_a = OlapTable[TableA]( + "TableA", + OlapConfig( + order_by_fields=["id"], + engine=ReplicatedMergeTreeEngine(), + cluster="cluster_a", + ), +) + +# table_b: Uses cluster_b with ReplicatedMergeTree +table_b = OlapTable[TableB]( + "TableB", + OlapConfig( + order_by_fields=["id"], + engine=ReplicatedMergeTreeEngine(), + cluster="cluster_b", + ), +) + +# TableC: No cluster, uses plain MergeTree (not replicated) +table_c = OlapTable[TableC]( + "TableC", + OlapConfig( + order_by_fields=["id"], + engine=MergeTreeEngine(), + ), +) + diff --git a/templates/python-cluster/app/main.py b/templates/python-cluster/app/main.py new file mode 100644 index 0000000000..43c27928fc --- /dev/null +++ b/templates/python-cluster/app/main.py @@ -0,0 +1 @@ +from app.ingest import models diff --git a/templates/python-cluster/moose.config.toml b/templates/python-cluster/moose.config.toml new file mode 100644 index 0000000000..bb3d1ecb55 --- /dev/null +++ b/templates/python-cluster/moose.config.toml @@ -0,0 +1,62 @@ +language = "Python" +source_dir = "app" + +[redpanda_config] +broker = "localhost:19092" +message_timeout_ms = 1000 +retention_ms = 30000 +replication_factor = 1 + +[clickhouse_config] +db_name = "local" +user = "panda" +password = "pandapass" +use_ssl = false +host = "localhost" +host_port = 18123 +native_port = 9000 + +# Define two clusters for testing ON CLUSTER support +[[clickhouse_config.clusters]] +name = "cluster_a" + +[[clickhouse_config.clusters]] +name = "cluster_b" + +[http_server_config] +host = "localhost" +port = 4000 +management_port = 5001 + +[redis_config] +url = "redis://127.0.0.1:6379" +key_prefix = "MS" + +[git_config] +main_branch_name = "main" + +[temporal_config] +db_user = "temporal" +db_password = "temporal" +db_port = 5432 +temporal_port = 7233 +temporal_version = "1.22.3" +admin_tools_version = "1.22.3" +ui_version = "2.21.3" +ui_port = 8080 +ui_cors_origins = "http://localhost:3000" +config_path = "config/dynamicconfig/development-sql.yaml" +postgresql_version = "13" + +[supported_old_versions] + +[authentication] +admin_api_key = "445fd4696cfc5c49e28995c4aba05de44303a112" + +[features] +olap = true +streaming_engine = false +workflows = false +data_model_v2 = true +apis = false + diff --git a/templates/python-cluster/requirements.txt b/templates/python-cluster/requirements.txt new file mode 100644 index 0000000000..df5e13942d --- /dev/null +++ b/templates/python-cluster/requirements.txt @@ -0,0 +1,7 @@ +kafka-python-ng==2.2.2 +clickhouse-connect==0.7.16 +requests==2.32.4 +moose-cli +moose-lib +faker +sqlglot[rs]>=27.16.3 \ No newline at end of file diff --git a/templates/python-cluster/setup.py b/templates/python-cluster/setup.py new file mode 100644 index 0000000000..36c813a049 --- /dev/null +++ b/templates/python-cluster/setup.py @@ -0,0 +1,13 @@ + +from setuptools import setup +import os + +requirements_path = os.path.join(os.path.dirname(__file__), "requirements.txt") +with open(requirements_path, "r") as f: + requirements = f.read().splitlines() + +setup( + name='py', + version='0.0', + install_requires=requirements, +) diff --git a/templates/python-cluster/template.config.toml b/templates/python-cluster/template.config.toml new file mode 100644 index 0000000000..a0e8e09bb4 --- /dev/null +++ b/templates/python-cluster/template.config.toml @@ -0,0 +1,22 @@ +language = "python" +description = "Test-only template: Python project for testing ClickHouse cluster support" +post_install_print = """ +Test Template: ClickHouse Cluster Support + +This template is designed for E2E testing of ON CLUSTER functionality. + +--------------------------------------------------------- + +📂 Go to your project directory: + $ cd {project_dir} + +📦 Install Dependencies: + $ pip install -e . + +🛠️ Start dev server: + $ moose dev +""" +default_sloan_telemetry="standard" + +visible=false + diff --git a/templates/typescript-cluster/README.md b/templates/typescript-cluster/README.md new file mode 100644 index 0000000000..2ff8400255 --- /dev/null +++ b/templates/typescript-cluster/README.md @@ -0,0 +1,10 @@ +# TypeScript Cluster Test Template + +This is a test-only template for E2E testing of ClickHouse cluster support in MooseStack. + +## Features + +- Three OLAP tables demonstrating different cluster configurations +- `TableA`: Uses `cluster_a` +- `TableB`: Uses `cluster_b` +- `TableC`: No cluster (for mixed environment testing) diff --git a/templates/typescript-cluster/moose.config.toml b/templates/typescript-cluster/moose.config.toml new file mode 100644 index 0000000000..1f121c2045 --- /dev/null +++ b/templates/typescript-cluster/moose.config.toml @@ -0,0 +1,65 @@ +language = "Typescript" +source_dir = "src" + +[typescript_config] +package_manager = "npm" + +[redpanda_config] +broker = "localhost:19092" +message_timeout_ms = 1000 +retention_ms = 30000 +replication_factor = 1 + +[clickhouse_config] +db_name = "local" +user = "panda" +password = "pandapass" +use_ssl = false +host = "localhost" +host_port = 18123 +native_port = 9000 + +# Define two clusters for testing ON CLUSTER support +[[clickhouse_config.clusters]] +name = "cluster_a" + +[[clickhouse_config.clusters]] +name = "cluster_b" + +[http_server_config] +host = "localhost" +port = 4000 +management_port = 5001 + +[redis_config] +url = "redis://127.0.0.1:6379" +key_prefix = "MS" + +[git_config] +main_branch_name = "main" + +[temporal_config] +db_user = "temporal" +db_password = "temporal" +db_port = 5432 +temporal_port = 7233 +temporal_version = "1.22.3" +admin_tools_version = "1.22.3" +ui_version = "2.21.3" +ui_port = 8080 +ui_cors_origins = "http://localhost:3000" +config_path = "config/dynamicconfig/development-sql.yaml" +postgresql_version = "13" + +[supported_old_versions] + +[authentication] +admin_api_key = "445fd4696cfc5c49e28995c4aba05de44303a112" + +[features] +olap = true +streaming_engine = false +workflows = false +data_model_v2 = true +apis = false + diff --git a/templates/typescript-cluster/package.json b/templates/typescript-cluster/package.json new file mode 100644 index 0000000000..2612f6cd2c --- /dev/null +++ b/templates/typescript-cluster/package.json @@ -0,0 +1,26 @@ +{ + "name": "moose-cluster-test-app", + "version": "0.0", + "description": "Test template for ClickHouse cluster support", + "scripts": { + "moose": "moose-cli", + "build": "moose-cli build --docker", + "dev": "moose-cli dev" + }, + "dependencies": { + "@514labs/moose-lib": "latest", + "ts-patch": "^3.3.0", + "tsconfig-paths": "^4.2.0", + "typia": "^9.6.1" + }, + "devDependencies": { + "@types/node": "^22.10.0", + "tsx": "^4.19.2", + "typescript": "^5.7.2" + }, + "pnpm": { + "onlyBuiltDependencies": [ + "@confluentinc/kafka-javascript" + ] + } +} diff --git a/templates/typescript-cluster/src/index.ts b/templates/typescript-cluster/src/index.ts new file mode 100644 index 0000000000..f263549a1c --- /dev/null +++ b/templates/typescript-cluster/src/index.ts @@ -0,0 +1 @@ +import "./ingest/models"; diff --git a/templates/typescript-cluster/src/ingest/models.ts b/templates/typescript-cluster/src/ingest/models.ts new file mode 100644 index 0000000000..e6c21ded39 --- /dev/null +++ b/templates/typescript-cluster/src/ingest/models.ts @@ -0,0 +1,48 @@ +import { OlapTable, Key, ClickHouseEngines } from "@514labs/moose-lib"; + +/** + * Test models for ClickHouse cluster support + */ + +/** Table using cluster_a */ +export interface TableA { + id: Key; + value: string; + timestamp: number; +} + +/** Table using cluster_b */ +export interface TableB { + id: Key; + count: number; + timestamp: number; +} + +/** Table without cluster (for mixed testing) */ +export interface TableC { + id: Key; + data: string; + timestamp: number; +} + +/** OLAP Tables */ + +// TableA: Uses cluster_a with ReplicatedMergeTree +export const tableA = new OlapTable("TableA", { + orderByFields: ["id"], + engine: ClickHouseEngines.ReplicatedMergeTree, + cluster: "cluster_a", +}); + +// TableB: Uses cluster_b with ReplicatedMergeTree +export const tableB = new OlapTable("TableB", { + orderByFields: ["id"], + engine: ClickHouseEngines.ReplicatedMergeTree, + cluster: "cluster_b", +}); + +// TableC: No cluster, uses plain MergeTree (not replicated) +export const tableC = new OlapTable("TableC", { + orderByFields: ["id"], + engine: ClickHouseEngines.MergeTree, +}); diff --git a/templates/typescript-cluster/template.config.toml b/templates/typescript-cluster/template.config.toml new file mode 100644 index 0000000000..10ff95831a --- /dev/null +++ b/templates/typescript-cluster/template.config.toml @@ -0,0 +1,22 @@ +language = "typescript" +description = "Test-only template: TypeScript project for testing ClickHouse cluster support" +post_install_print = """ +Test Template: ClickHouse Cluster Support + +This template is designed for E2E testing of ON CLUSTER functionality. + +--------------------------------------------------------- + +📂 Go to your project directory: + $ cd {project_dir} + +📦 Install Dependencies: + $ npm install + +🛠️ Start dev server: + $ moose dev +""" +default_sloan_telemetry="standard" + +visible=false + diff --git a/templates/typescript-cluster/tsconfig.json b/templates/typescript-cluster/tsconfig.json new file mode 100644 index 0000000000..36598821c7 --- /dev/null +++ b/templates/typescript-cluster/tsconfig.json @@ -0,0 +1,16 @@ +{ + "compilerOptions": { + "outDir": "dist", + "esModuleInterop": true, + "plugins": [ + { + "transform": "./node_modules/@514labs/moose-lib/dist/compilerPlugin.js", + "transformProgram": true + }, + { + "transform": "typia/lib/transform" + } + ], + "strictNullChecks": true + } +} From 38fad2c2761401dac106c2da335d56196b980cfa Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 4 Nov 2025 12:25:27 -0700 Subject: [PATCH 06/23] refactor e2e test --- apps/framework-cli-e2e/test/cluster.test.ts | 65 ++++++++++----------- 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/apps/framework-cli-e2e/test/cluster.test.ts b/apps/framework-cli-e2e/test/cluster.test.ts index 871cf10aec..927cd78b21 100644 --- a/apps/framework-cli-e2e/test/cluster.test.ts +++ b/apps/framework-cli-e2e/test/cluster.test.ts @@ -31,18 +31,17 @@ import { } from "./constants"; import { - stopDevProcess, waitForServerStart, - cleanupDocker, - removeTestProject, createTempTestDirectory, setupTypeScriptProject, setupPythonProject, + cleanupTestSuite, + performGlobalCleanup, + cleanupClickhouseData, + waitForInfrastructureReady, } from "./utils"; const execAsync = promisify(require("child_process").exec); -const setTimeoutAsync = (ms: number) => - new Promise((resolve) => global.setTimeout(resolve, ms)); const CLI_PATH = path.resolve(__dirname, "../../../target/debug/moose-cli"); const MOOSE_LIB_PATH = path.resolve( @@ -275,52 +274,35 @@ const createClusterTestSuite = (config: ClusterTestConfig) => { config.language === "python" ? { ...process.env, - MOOSE_TELEMETRY_ENABLED: "false", - PYTHONDONTWRITEBYTECODE: "1", + VIRTUAL_ENV: path.join(TEST_PROJECT_DIR, ".venv"), + PATH: `${path.join(TEST_PROJECT_DIR, ".venv", "bin")}:${process.env.PATH}`, } - : { ...process.env, MOOSE_TELEMETRY_ENABLED: "false" }; + : { ...process.env }; devProcess = spawn(CLI_PATH, ["dev"], { - cwd: TEST_PROJECT_DIR, stdio: "pipe", + cwd: TEST_PROJECT_DIR, env: devEnv, }); - let serverOutput = ""; - devProcess.stdout?.on("data", (data) => { - serverOutput += data.toString(); - process.stdout.write(data); - }); - - devProcess.stderr?.on("data", (data) => { - serverOutput += data.toString(); - process.stderr.write(data); - }); - - // Wait for server to start await waitForServerStart( devProcess, TIMEOUTS.SERVER_STARTUP_MS, SERVER_CONFIG.startupMessage, SERVER_CONFIG.url, ); - await setTimeoutAsync(3000); // Additional wait for infrastructure setup + console.log("Server started, cleaning up old data..."); + await cleanupClickhouseData(); + console.log("Waiting for infrastructure to be ready..."); + await waitForInfrastructureReady(); + console.log("All components ready, starting tests..."); }); after(async function () { this.timeout(TIMEOUTS.CLEANUP_MS); - - if (devProcess) { - await stopDevProcess(devProcess); - devProcess = null; - } - - await cleanupDocker(TEST_PROJECT_DIR, config.appName); - - // Clean up test directory - if (TEST_PROJECT_DIR && fs.existsSync(TEST_PROJECT_DIR)) { - removeTestProject(TEST_PROJECT_DIR); - } + await cleanupTestSuite(devProcess, TEST_PROJECT_DIR, config.appName, { + logPrefix: config.displayName, + }); }); it("should create tables with ON CLUSTER clauses", async function () { @@ -383,6 +365,21 @@ const createClusterTestSuite = (config: ClusterTestConfig) => { }); }; +// Global setup to clean Docker state from previous runs (useful for local dev) +// Github hosted runners start with a clean slate. +before(async function () { + this.timeout(TIMEOUTS.GLOBAL_CLEANUP_MS); + await performGlobalCleanup( + "Running global setup for cluster tests - cleaning Docker state from previous runs...", + ); +}); + +// Global cleanup to ensure no hanging processes +after(async function () { + this.timeout(TIMEOUTS.GLOBAL_CLEANUP_MS); + await performGlobalCleanup(); +}); + // Test suite for Cluster Support describe("Cluster Support E2E Tests", function () { // Generate test suites for each cluster configuration From 45e75b572b732d797b87e6c1ff1fc7e4a1ba1011 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 4 Nov 2025 12:36:15 -0700 Subject: [PATCH 07/23] add test --- apps/framework-cli-e2e/test/cluster.test.ts | 43 ++++++++++++++++++- templates/python-cluster/app/ingest/models.py | 19 ++++++++ .../typescript-cluster/src/ingest/models.ts | 15 +++++++ 3 files changed, 75 insertions(+), 2 deletions(-) diff --git a/apps/framework-cli-e2e/test/cluster.test.ts b/apps/framework-cli-e2e/test/cluster.test.ts index 927cd78b21..6dbb6d9527 100644 --- a/apps/framework-cli-e2e/test/cluster.test.ts +++ b/apps/framework-cli-e2e/test/cluster.test.ts @@ -12,6 +12,7 @@ * 3. cluster_name appears correctly in the infrastructure map * 4. Mixed environments (some tables with cluster, some without) work correctly * 5. Both TypeScript and Python SDKs support cluster configuration + * 6. ReplicatedMergeTree with explicit keeper_path/replica_name (no cluster) works correctly */ import { spawn, ChildProcess } from "child_process"; @@ -308,7 +309,7 @@ const createClusterTestSuite = (config: ClusterTestConfig) => { it("should create tables with ON CLUSTER clauses", async function () { this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); - // Verify TableA and TableB were created in ClickHouse + // Verify all tables were created in ClickHouse const client = createClient({ url: CLICKHOUSE_CONFIG.url, username: CLICKHOUSE_CONFIG.username, @@ -319,7 +320,7 @@ const createClusterTestSuite = (config: ClusterTestConfig) => { try { const result = await client.query({ query: - "SELECT name FROM system.tables WHERE database = 'local' AND name IN ('TableA', 'TableB', 'TableC') ORDER BY name", + "SELECT name FROM system.tables WHERE database = 'local' AND name IN ('TableA', 'TableB', 'TableC', 'TableD') ORDER BY name", format: "JSONEachRow", }); @@ -329,6 +330,7 @@ const createClusterTestSuite = (config: ClusterTestConfig) => { expect(tableNames).to.include("TableA"); expect(tableNames).to.include("TableB"); expect(tableNames).to.include("TableC"); + expect(tableNames).to.include("TableD"); } finally { await client.close(); } @@ -350,6 +352,7 @@ const createClusterTestSuite = (config: ClusterTestConfig) => { { name: "TableA", cluster: "cluster_a" }, { name: "TableB", cluster: "cluster_b" }, { name: "TableC", cluster: null }, + { name: "TableD", cluster: null }, ]); }); @@ -361,6 +364,42 @@ const createClusterTestSuite = (config: ClusterTestConfig) => { await verifyTableExists("TableA"); await verifyTableExists("TableB"); await verifyTableExists("TableC"); + await verifyTableExists("TableD"); + }); + + it("should create TableD with explicit keeper args and no cluster", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + + // Verify TableD was created with explicit keeper_path and replica_name + const client = createClient({ + url: CLICKHOUSE_CONFIG.url, + username: CLICKHOUSE_CONFIG.username, + password: CLICKHOUSE_CONFIG.password, + database: CLICKHOUSE_CONFIG.database, + }); + + try { + const result = await client.query({ + query: "SHOW CREATE TABLE local.TableD", + format: "JSONEachRow", + }); + + const data = await result.json<{ statement: string }>(); + const createStatement = data[0].statement; + + // Verify it's ReplicatedMergeTree + expect(createStatement).to.include("ReplicatedMergeTree"); + // Verify it has explicit keeper path + expect(createStatement).to.include( + "/clickhouse/tables/{database}/{table}", + ); + // Verify it has explicit replica name + expect(createStatement).to.include("{replica}"); + // Verify it does NOT have ON CLUSTER (since no cluster is specified) + expect(createStatement).to.not.include("ON CLUSTER"); + } finally { + await client.close(); + } }); }); }; diff --git a/templates/python-cluster/app/ingest/models.py b/templates/python-cluster/app/ingest/models.py index a6198feb4e..1bcd83f816 100644 --- a/templates/python-cluster/app/ingest/models.py +++ b/templates/python-cluster/app/ingest/models.py @@ -27,6 +27,13 @@ class TableC(BaseModel): timestamp: float +# Table with explicit keeper args but no cluster +class TableD(BaseModel): + id: Key[str] + metric: int + timestamp: float + + # OLAP Tables # table_a: Uses cluster_a with ReplicatedMergeTree @@ -58,3 +65,15 @@ class TableC(BaseModel): ), ) +# TableD: ReplicatedMergeTree with explicit keeper args, no cluster +table_d = OlapTable[TableD]( + "TableD", + OlapConfig( + order_by_fields=["id"], + engine=ReplicatedMergeTreeEngine( + keeper_path="/clickhouse/tables/{database}/{table}", + replica_name="{replica}", + ), + ), +) + diff --git a/templates/typescript-cluster/src/ingest/models.ts b/templates/typescript-cluster/src/ingest/models.ts index e6c21ded39..05849b5fe8 100644 --- a/templates/typescript-cluster/src/ingest/models.ts +++ b/templates/typescript-cluster/src/ingest/models.ts @@ -25,6 +25,13 @@ export interface TableC { timestamp: number; } +/** Table with explicit keeper args but no cluster */ +export interface TableD { + id: Key; + metric: number; + timestamp: number; +} + /** OLAP Tables */ // TableA: Uses cluster_a with ReplicatedMergeTree @@ -46,3 +53,11 @@ export const tableC = new OlapTable("TableC", { orderByFields: ["id"], engine: ClickHouseEngines.MergeTree, }); + +// TableD: ReplicatedMergeTree with explicit keeper args, no cluster +export const tableD = new OlapTable("TableD", { + orderByFields: ["id"], + engine: ClickHouseEngines.ReplicatedMergeTree, + keeperPath: "/clickhouse/tables/{database}/{table}", + replicaName: "{replica}", +}); From 2d3ddca4c66fbaef09ac9d3860add317726c962d Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 4 Nov 2025 14:05:29 -0700 Subject: [PATCH 08/23] fix --- .../src/framework/core/plan_validator.rs | 168 ------------------ .../infrastructure/olap/clickhouse/queries.rs | 164 ++++++++++++++++- 2 files changed, 159 insertions(+), 173 deletions(-) diff --git a/apps/framework-cli/src/framework/core/plan_validator.rs b/apps/framework-cli/src/framework/core/plan_validator.rs index 619424719e..ddbb638338 100644 --- a/apps/framework-cli/src/framework/core/plan_validator.rs +++ b/apps/framework-cli/src/framework/core/plan_validator.rs @@ -70,59 +70,12 @@ fn validate_cluster_references(project: &Project, plan: &InfraPlan) -> Result<() Ok(()) } -/// Validates that replicated engines either have keeper path/replica name OR a cluster defined -fn validate_replicated_engine_args(plan: &InfraPlan) -> Result<(), ValidationError> { - use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; - - for table in plan.target_infra_map.tables.values() { - let needs_args = match &table.engine { - Some(ClickhouseEngine::ReplicatedMergeTree { - keeper_path, - replica_name, - }) => keeper_path.is_none() && replica_name.is_none(), - Some(ClickhouseEngine::ReplicatedReplacingMergeTree { - keeper_path, - replica_name, - .. - }) => keeper_path.is_none() && replica_name.is_none(), - Some(ClickhouseEngine::ReplicatedAggregatingMergeTree { - keeper_path, - replica_name, - }) => keeper_path.is_none() && replica_name.is_none(), - Some(ClickhouseEngine::ReplicatedSummingMergeTree { - keeper_path, - replica_name, - .. - }) => keeper_path.is_none() && replica_name.is_none(), - _ => false, - }; - - // If engine args are missing AND no cluster is defined, that's an error - if needs_args && table.cluster_name.is_none() { - return Err(ValidationError::TableValidation(format!( - "Table '{}' uses a replicated engine but neither cluster nor keeper path/replica name are specified.\n\ - \n\ - You must either:\n\ - 1. Specify a cluster in the table config: cluster = \"prod_cluster\"\n\ - (and define it in moose.config.toml)\n\ - 2. Or provide explicit keeper path and replica name in the engine config\n", - table.name - ))); - } - } - - Ok(()) -} - pub fn validate(project: &Project, plan: &InfraPlan) -> Result<(), ValidationError> { stream::validate_changes(project, &plan.changes.streaming_engine_changes)?; // Validate cluster references validate_cluster_references(project, plan)?; - // Validate replicated engine args - validate_replicated_engine_args(plan)?; - // Check for validation errors in OLAP changes for change in &plan.changes.olap_changes { if let OlapChange::Table(TableChange::ValidationError { message, .. }) = change { @@ -393,127 +346,6 @@ mod tests { } } - #[test] - fn test_replicated_engine_without_args_or_cluster_fails() { - use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; - - let project = create_test_project(None); - let table = create_table_with_engine( - "test_table", - None, - Some(ClickhouseEngine::ReplicatedMergeTree { - keeper_path: None, - replica_name: None, - }), - ); - let plan = create_test_plan(vec![table]); - - let result = validate(&project, &plan); - - assert!(result.is_err()); - match result { - Err(ValidationError::TableValidation(msg)) => { - assert!(msg.contains("test_table")); - assert!(msg.contains("replicated engine")); - assert!(msg.contains("cluster")); - } - _ => panic!("Expected TableValidation error"), - } - } - - #[test] - fn test_replicated_engine_with_cluster_but_no_args_succeeds() { - use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; - - let project = create_test_project(Some(vec![ClusterConfig { - name: "prod_cluster".to_string(), - }])); - let table = create_table_with_engine( - "test_table", - Some("prod_cluster".to_string()), - Some(ClickhouseEngine::ReplicatedMergeTree { - keeper_path: None, - replica_name: None, - }), - ); - let plan = create_test_plan(vec![table]); - - let result = validate(&project, &plan); - - assert!(result.is_ok()); - } - - #[test] - fn test_replicated_engine_with_args_but_no_cluster_succeeds() { - use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; - - let project = create_test_project(None); - let table = create_table_with_engine( - "test_table", - None, - Some(ClickhouseEngine::ReplicatedMergeTree { - keeper_path: Some("/clickhouse/tables/{database}/{table}".to_string()), - replica_name: Some("{replica}".to_string()), - }), - ); - let plan = create_test_plan(vec![table]); - - let result = validate(&project, &plan); - - assert!(result.is_ok()); - } - - #[test] - fn test_replicated_engine_with_both_args_and_cluster_succeeds() { - use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; - - let project = create_test_project(Some(vec![ClusterConfig { - name: "prod_cluster".to_string(), - }])); - let table = create_table_with_engine( - "test_table", - Some("prod_cluster".to_string()), - Some(ClickhouseEngine::ReplicatedMergeTree { - keeper_path: Some("/clickhouse/tables/{database}/{table}".to_string()), - replica_name: Some("{replica}".to_string()), - }), - ); - let plan = create_test_plan(vec![table]); - - let result = validate(&project, &plan); - - assert!(result.is_ok()); - } - - #[test] - fn test_replicated_replacing_merge_tree_without_args_or_cluster_fails() { - use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; - - let project = create_test_project(None); - let table = create_table_with_engine( - "test_table", - None, - Some(ClickhouseEngine::ReplicatedReplacingMergeTree { - keeper_path: None, - replica_name: None, - ver: Some("version".to_string()), - is_deleted: None, - }), - ); - let plan = create_test_plan(vec![table]); - - let result = validate(&project, &plan); - - assert!(result.is_err()); - match result { - Err(ValidationError::TableValidation(msg)) => { - assert!(msg.contains("test_table")); - assert!(msg.contains("replicated engine")); - } - _ => panic!("Expected TableValidation error"), - } - } - #[test] fn test_non_replicated_engine_without_cluster_succeeds() { use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index d27ea2c7f9..7fa16b1dd0 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -2018,6 +2018,7 @@ fn build_summing_merge_tree_ddl(columns: &Option>) -> String { fn build_replication_params( keeper_path: &Option, replica_name: &Option, + cluster_name: &Option, engine_name: &str, table_name: &str, is_dev: bool, @@ -2027,17 +2028,19 @@ fn build_replication_params( Ok(vec![format!("'{}'", path), format!("'{}'", name)]) } (None, None) => { - if is_dev { - // In dev mode, inject default parameters for local ClickHouse + // Auto-inject parameters if cluster is defined, otherwise use defaults + // This supports both dev (local ClickHouse) and prod (with clusters) + if cluster_name.is_some() || is_dev { + // In dev mode OR when cluster is defined, inject default parameters // Use table name to ensure unique paths per table, avoiding conflicts - // {shard}, {replica}, and {database} macros are configured in docker-compose + // {shard}, {replica}, and {database} macros are configured in docker-compose or cluster // Note: {uuid} macro only works with ON CLUSTER queries, so we use table name instead Ok(vec![ format!("'/clickhouse/tables/{{database}}/{{shard}}/{}'", table_name), "'{replica}'".to_string(), ]) } else { - // In production, return empty parameters - let ClickHouse handle defaults + // In production without cluster, return empty parameters // This works for ClickHouse Cloud and properly configured servers Ok(vec![]) } @@ -2055,12 +2058,14 @@ fn build_replication_params( fn build_replicated_merge_tree_ddl( keeper_path: &Option, replica_name: &Option, + cluster_name: &Option, table_name: &str, is_dev: bool, ) -> Result { let params = build_replication_params( keeper_path, replica_name, + cluster_name, "ReplicatedMergeTree", table_name, is_dev, @@ -2069,9 +2074,11 @@ fn build_replicated_merge_tree_ddl( } /// Generate DDL for ReplicatedReplacingMergeTree engine +#[allow(clippy::too_many_arguments)] fn build_replicated_replacing_merge_tree_ddl( keeper_path: &Option, replica_name: &Option, + cluster_name: &Option, ver: &Option, is_deleted: &Option, order_by_empty: bool, @@ -2094,6 +2101,7 @@ fn build_replicated_replacing_merge_tree_ddl( let mut params = build_replication_params( keeper_path, replica_name, + cluster_name, "ReplicatedReplacingMergeTree", table_name, is_dev, @@ -2116,12 +2124,14 @@ fn build_replicated_replacing_merge_tree_ddl( fn build_replicated_aggregating_merge_tree_ddl( keeper_path: &Option, replica_name: &Option, + cluster_name: &Option, table_name: &str, is_dev: bool, ) -> Result { let params = build_replication_params( keeper_path, replica_name, + cluster_name, "ReplicatedAggregatingMergeTree", table_name, is_dev, @@ -2136,6 +2146,7 @@ fn build_replicated_aggregating_merge_tree_ddl( fn build_replicated_summing_merge_tree_ddl( keeper_path: &Option, replica_name: &Option, + cluster_name: &Option, columns: &Option>, table_name: &str, is_dev: bool, @@ -2143,6 +2154,7 @@ fn build_replicated_summing_merge_tree_ddl( let mut params = build_replication_params( keeper_path, replica_name, + cluster_name, "ReplicatedSummingMergeTree", table_name, is_dev, @@ -2182,7 +2194,13 @@ pub fn create_table_query( ClickhouseEngine::ReplicatedMergeTree { keeper_path, replica_name, - } => build_replicated_merge_tree_ddl(keeper_path, replica_name, &table.name, is_dev)?, + } => build_replicated_merge_tree_ddl( + keeper_path, + replica_name, + &table.cluster_name, + &table.name, + is_dev, + )?, ClickhouseEngine::ReplicatedReplacingMergeTree { keeper_path, replica_name, @@ -2191,6 +2209,7 @@ pub fn create_table_query( } => build_replicated_replacing_merge_tree_ddl( keeper_path, replica_name, + &table.cluster_name, ver, is_deleted, table.order_by.is_empty(), @@ -2203,6 +2222,7 @@ pub fn create_table_query( } => build_replicated_aggregating_merge_tree_ddl( keeper_path, replica_name, + &table.cluster_name, &table.name, is_dev, )?, @@ -2213,6 +2233,7 @@ pub fn create_table_query( } => build_replicated_summing_merge_tree_ddl( keeper_path, replica_name, + &table.cluster_name, columns, &table.name, is_dev, @@ -4614,4 +4635,137 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; // Should still have DROP TABLE assert!(query.contains("DROP TABLE")); } + + #[test] + fn test_replication_params_dev_no_cluster_no_keeper_args_auto_injects() { + let result = build_replication_params( + &None, + &None, + &None, + "ReplicatedMergeTree", + "test_table", + true, // is_dev + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + // Should auto-inject params in dev mode + assert_eq!(params.len(), 2); + assert!(params[0].contains("/clickhouse/tables/")); + assert!(params[1].contains("{replica}")); + } + + #[test] + fn test_replication_params_dev_with_cluster_no_keeper_args_succeeds() { + let result = build_replication_params( + &None, + &None, + &Some("test_cluster".to_string()), + "ReplicatedMergeTree", + "test_table", + true, // is_dev + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + // Should auto-inject params + assert_eq!(params.len(), 2); + assert!(params[0].contains("/clickhouse/tables/")); + assert!(params[1].contains("{replica}")); + } + + #[test] + fn test_replication_params_dev_no_cluster_with_keeper_args_succeeds() { + let result = build_replication_params( + &Some("/clickhouse/tables/{database}/{table}".to_string()), + &Some("{replica}".to_string()), + &None, + "ReplicatedMergeTree", + "test_table", + true, // is_dev + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + assert_eq!(params.len(), 2); + assert_eq!(params[0], "'/clickhouse/tables/{database}/{table}'"); + assert_eq!(params[1], "'{replica}'"); + } + + #[test] + fn test_replication_params_prod_no_cluster_no_keeper_args_succeeds() { + let result = build_replication_params( + &None, + &None, + &None, + "ReplicatedMergeTree", + "test_table", + false, // is_dev = false (production) + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + // Should return empty params for ClickHouse Cloud + assert_eq!(params.len(), 0); + } + + #[test] + fn test_replication_params_dev_with_cluster_and_keeper_args_succeeds() { + let result = build_replication_params( + &Some("/clickhouse/tables/{database}/{table}".to_string()), + &Some("{replica}".to_string()), + &Some("test_cluster".to_string()), + "ReplicatedMergeTree", + "test_table", + true, // is_dev + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + // Should use explicit params, not auto-inject + assert_eq!(params.len(), 2); + assert_eq!(params[0], "'/clickhouse/tables/{database}/{table}'"); + assert_eq!(params[1], "'{replica}'"); + } + + #[test] + fn test_replication_params_prod_with_cluster_no_keeper_args_auto_injects() { + let result = build_replication_params( + &None, + &None, + &Some("test_cluster".to_string()), + "ReplicatedMergeTree", + "test_table", + false, // is_dev = false (production) + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + // Should auto-inject params when cluster is defined, even in prod + assert_eq!(params.len(), 2); + assert!(params[0].contains("/clickhouse/tables/")); + assert!(params[1].contains("{replica}")); + } + + #[test] + fn test_replication_params_mismatched_keeper_args_fails() { + // Only keeper_path, no replica_name + let result = build_replication_params( + &Some("/clickhouse/tables/{database}/{table}".to_string()), + &None, + &Some("test_cluster".to_string()), + "ReplicatedMergeTree", + "test_table", + true, + ); + + assert!(result.is_err()); + let err = result.unwrap_err(); + match err { + ClickhouseError::InvalidParameters { message } => { + assert!(message.contains("requires both keeper_path and replica_name")); + } + _ => panic!("Expected InvalidParameters error"), + } + } } From 095a5fc24fa52358cdebc4bcd629f09b8b6b0370 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 4 Nov 2025 21:52:22 -0700 Subject: [PATCH 09/23] fixes --- apps/framework-cli-e2e/test/cluster.test.ts | 39 +++- .../src/cli/display/infrastructure.rs | 5 + .../framework/core/infrastructure/table.rs | 4 + .../src/framework/core/infrastructure_map.rs | 4 + .../olap/clickhouse/diff_strategy.rs | 146 +++------------ .../src/infrastructure/olap/clickhouse/mod.rs | 6 + .../infrastructure/olap/clickhouse/queries.rs | 175 +++++++++++++++++- .../src/utilities/migration_plan_schema.json | 9 +- .../py-moose-lib/moose_lib/dmv2/olap_table.py | 23 +++ .../tests/test_cluster_validation.py | 86 +++++++++ .../ts-moose-lib/src/dmv2/sdk/olapTable.ts | 14 ++ .../tests/cluster-validation.test.ts | 121 ++++++++++++ templates/python-cluster/app/ingest/models.py | 17 ++ .../typescript-cluster/src/ingest/models.ts | 14 ++ 14 files changed, 542 insertions(+), 121 deletions(-) create mode 100644 packages/py-moose-lib/tests/test_cluster_validation.py create mode 100644 packages/ts-moose-lib/tests/cluster-validation.test.ts diff --git a/apps/framework-cli-e2e/test/cluster.test.ts b/apps/framework-cli-e2e/test/cluster.test.ts index 6dbb6d9527..8397a75e59 100644 --- a/apps/framework-cli-e2e/test/cluster.test.ts +++ b/apps/framework-cli-e2e/test/cluster.test.ts @@ -13,6 +13,7 @@ * 4. Mixed environments (some tables with cluster, some without) work correctly * 5. Both TypeScript and Python SDKs support cluster configuration * 6. ReplicatedMergeTree with explicit keeper_path/replica_name (no cluster) works correctly + * 7. ReplicatedMergeTree with auto-injected params (ClickHouse Cloud mode) works correctly */ import { spawn, ChildProcess } from "child_process"; @@ -320,7 +321,7 @@ const createClusterTestSuite = (config: ClusterTestConfig) => { try { const result = await client.query({ query: - "SELECT name FROM system.tables WHERE database = 'local' AND name IN ('TableA', 'TableB', 'TableC', 'TableD') ORDER BY name", + "SELECT name FROM system.tables WHERE database = 'local' AND name IN ('TableA', 'TableB', 'TableC', 'TableD', 'TableE') ORDER BY name", format: "JSONEachRow", }); @@ -331,6 +332,7 @@ const createClusterTestSuite = (config: ClusterTestConfig) => { expect(tableNames).to.include("TableB"); expect(tableNames).to.include("TableC"); expect(tableNames).to.include("TableD"); + expect(tableNames).to.include("TableE"); } finally { await client.close(); } @@ -353,6 +355,7 @@ const createClusterTestSuite = (config: ClusterTestConfig) => { { name: "TableB", cluster: "cluster_b" }, { name: "TableC", cluster: null }, { name: "TableD", cluster: null }, + { name: "TableE", cluster: null }, ]); }); @@ -365,6 +368,7 @@ const createClusterTestSuite = (config: ClusterTestConfig) => { await verifyTableExists("TableB"); await verifyTableExists("TableC"); await verifyTableExists("TableD"); + await verifyTableExists("TableE"); }); it("should create TableD with explicit keeper args and no cluster", async function () { @@ -401,6 +405,39 @@ const createClusterTestSuite = (config: ClusterTestConfig) => { await client.close(); } }); + + it("should create TableE with auto-injected params (ClickHouse Cloud mode)", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + + // Verify TableE was created with ReplicatedMergeTree and auto-injected params + const client = createClient({ + url: CLICKHOUSE_CONFIG.url, + username: CLICKHOUSE_CONFIG.username, + password: CLICKHOUSE_CONFIG.password, + database: CLICKHOUSE_CONFIG.database, + }); + + try { + const result = await client.query({ + query: "SHOW CREATE TABLE local.TableE", + format: "JSONEachRow", + }); + + const data = await result.json<{ statement: string }>(); + const createStatement = data[0].statement; + + console.log(`TableE CREATE statement: ${createStatement}`); + + // Verify it's ReplicatedMergeTree + expect(createStatement).to.include("ReplicatedMergeTree"); + // Verify it has auto-injected params (Moose injects these in dev mode) + expect(createStatement).to.match(/ReplicatedMergeTree\(/); + // Verify it does NOT have ON CLUSTER (no cluster specified) + expect(createStatement).to.not.include("ON CLUSTER"); + } finally { + await client.close(); + } + }); }); }; diff --git a/apps/framework-cli/src/cli/display/infrastructure.rs b/apps/framework-cli/src/cli/display/infrastructure.rs index 14859fc927..64769ea83f 100644 --- a/apps/framework-cli/src/cli/display/infrastructure.rs +++ b/apps/framework-cli/src/cli/display/infrastructure.rs @@ -236,6 +236,11 @@ fn format_table_display( details.push(format!("Order by: {}", table.order_by)); } + // Cluster section (if present) + if let Some(ref cluster) = table.cluster_name { + details.push(format!("Cluster: {}", cluster)); + } + // Engine section (if present) if let Some(ref engine) = table.engine { details.push(format!("Engine: {}", Into::::into(engine.clone()))); diff --git a/apps/framework-cli/src/framework/core/infrastructure/table.rs b/apps/framework-cli/src/framework/core/infrastructure/table.rs index 03e358cbdb..72c4fdff41 100644 --- a/apps/framework-cli/src/framework/core/infrastructure/table.rs +++ b/apps/framework-cli/src/framework/core/infrastructure/table.rs @@ -515,6 +515,10 @@ impl Table { }) }); + // Normalize auto-injected replication params so they match user code + // This prevents spurious diffs when comparing tables from inframap with code + let engine = engine.map(|e| e.normalize_auto_injected_params(&proto.name)); + // Engine settings are now handled via table_settings field let fallback = || -> OrderBy { diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index 0d6bb33d40..9ee4d97a52 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -1854,6 +1854,9 @@ impl InfrastructureMap { // Detect engine change (e.g., MergeTree -> ReplacingMergeTree) let engine_changed = table.engine != target_table.engine; + // Detect cluster name change + let cluster_changed = table.cluster_name != target_table.cluster_name; + let order_by_change = if order_by_changed { OrderByChange { before: table.order_by.clone(), @@ -1899,6 +1902,7 @@ impl InfrastructureMap { || partition_by_changed || engine_changed || indexes_changed + || cluster_changed { // Use the strategy to determine the appropriate changes let strategy_changes = strategy.diff_table_update( diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index 98de0a007f..175bc447c3 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -50,49 +50,6 @@ fn format_database_change_error(table_name: &str, before_db: &str, after_db: &st ) } -/// Generates a formatted error message for cluster field changes. -/// -/// This function creates a user-friendly error message explaining that cluster field -/// changes require manual intervention to prevent data loss. Cluster changes are even -/// more critical than database changes because they affect replication topology. -/// -/// # Arguments -/// * `table_name` - The name of the table being changed -/// * `before_cluster` - The original cluster name (or "" if None) -/// * `after_cluster` - The new cluster name (or "" if None) -/// -/// # Returns -/// A formatted string with migration instructions -fn format_cluster_change_error( - table_name: &str, - before_cluster: &str, - after_cluster: &str, -) -> String { - format!( - "\n\n\ - ERROR: Cluster field change detected for table '{}'\n\ - \n\ - The cluster field changed from '{}' to '{}'\n\ - \n\ - Changing the cluster field is a destructive operation that requires\n\ - manual intervention to ensure data safety and proper replication.\n\ - \n\ - The cluster name is embedded in the table's replication path and cannot\n\ - be changed without recreating the table. This would cause data loss.\n\ - \n\ - To migrate this table to a different cluster:\n\ - \n\ - 1. Export your existing data\n\ - 2. Delete the old table definition from your code\n\ - 3. Create a new table definition with the target cluster\n\ - 4. Re-import your data if needed\n\ - \n\ - This ensures you maintain control over data migration and prevents\n\ - accidental data loss.\n", - table_name, before_cluster, after_cluster - ) -} - /// ClickHouse-specific table diff strategy /// /// ClickHouse has several limitations that require drop+create operations instead of ALTER: @@ -516,25 +473,15 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { } // Check if cluster has changed - // Cluster name cannot be changed after table creation for replicated tables - // as it's embedded in the replication path - let cluster_changed = before.cluster_name != after.cluster_name; - - if cluster_changed { - let before_cluster = before.cluster_name.as_deref().unwrap_or(""); - let after_cluster = after.cluster_name.as_deref().unwrap_or(""); - - let error_message = - format_cluster_change_error(&before.name, before_cluster, after_cluster); - - log::error!("{}", error_message); - - return vec![OlapChange::Table(TableChange::ValidationError { - table_name: before.name.clone(), - message: error_message, - before: Box::new(before.clone()), - after: Box::new(after.clone()), - })]; + if before.cluster_name != after.cluster_name { + log::warn!( + "ClickHouse: cluster changed for table '{}' (from {:?} to {:?}), requiring drop+create", + before.name, before.cluster_name, after.cluster_name + ); + return vec![ + OlapChange::Table(TableChange::Removed(before.clone())), + OlapChange::Table(TableChange::Added(after.clone())), + ]; } // Check if PARTITION BY has changed @@ -1707,27 +1654,16 @@ mod tests { let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); - // Should return exactly one ValidationError - assert_eq!(changes.len(), 1); + // Should return DROP + CREATE (like ORDER BY changes) + assert_eq!(changes.len(), 2); assert!(matches!( changes[0], - OlapChange::Table(TableChange::ValidationError { .. }) + OlapChange::Table(TableChange::Removed(_)) + )); + assert!(matches!( + changes[1], + OlapChange::Table(TableChange::Added(_)) )); - - // Check the error message contains expected information - if let OlapChange::Table(TableChange::ValidationError { - table_name, - message, - .. - }) = &changes[0] - { - assert_eq!(table_name, "test"); - assert!(message.contains("")); - assert!(message.contains("test_cluster")); - assert!(message.contains("manual intervention")); - } else { - panic!("Expected ValidationError variant"); - } } #[test] @@ -1748,27 +1684,16 @@ mod tests { let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); - // Should return exactly one ValidationError - assert_eq!(changes.len(), 1); + // Should return DROP + CREATE (like ORDER BY changes) + assert_eq!(changes.len(), 2); assert!(matches!( changes[0], - OlapChange::Table(TableChange::ValidationError { .. }) + OlapChange::Table(TableChange::Removed(_)) + )); + assert!(matches!( + changes[1], + OlapChange::Table(TableChange::Added(_)) )); - - // Check the error message contains expected information - if let OlapChange::Table(TableChange::ValidationError { - table_name, - message, - .. - }) = &changes[0] - { - assert_eq!(table_name, "test"); - assert!(message.contains("test_cluster")); - assert!(message.contains("")); - assert!(message.contains("manual intervention")); - } else { - panic!("Expected ValidationError variant"); - } } #[test] @@ -1789,27 +1714,16 @@ mod tests { let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); - // Should return exactly one ValidationError - assert_eq!(changes.len(), 1); + // Should return DROP + CREATE (like ORDER BY changes) + assert_eq!(changes.len(), 2); assert!(matches!( changes[0], - OlapChange::Table(TableChange::ValidationError { .. }) + OlapChange::Table(TableChange::Removed(_)) + )); + assert!(matches!( + changes[1], + OlapChange::Table(TableChange::Added(_)) )); - - // Check the error message contains expected information - if let OlapChange::Table(TableChange::ValidationError { - table_name, - message, - .. - }) = &changes[0] - { - assert_eq!(table_name, "test"); - assert!(message.contains("cluster_a")); - assert!(message.contains("cluster_b")); - assert!(message.contains("replication path")); - } else { - panic!("Expected ValidationError variant"); - } } #[test] diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index 112e3335bb..ecb34f4a08 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -1733,6 +1733,12 @@ impl OlapOperations for ConfiguredDBClient { debug!("Could not extract engine from CREATE TABLE query, falling back to system.tables engine column"); engine.as_str().try_into().ok() }; + + // Normalize auto-injected replication params so they match user code + // This prevents spurious diffs when comparing tables from the DB with code + let engine_parsed = engine_parsed + .map(|e: ClickhouseEngine| e.normalize_auto_injected_params(&table_name)); + let engine_params_hash = engine_parsed .as_ref() .map(|e: &ClickhouseEngine| e.non_alterable_params_hash()); diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 7fa16b1dd0..3ab8814a5f 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -885,6 +885,93 @@ impl ClickhouseEngine { self.is_merge_tree_family() || matches!(self, ClickhouseEngine::S3 { .. }) } + /// Normalize auto-injected replication parameters back to None + /// + /// When Moose auto-injects replication params in dev mode or with clusters, + /// it uses a standard pattern. When reading tables back from ClickHouse, + /// we normalize these back to None so they match the user's code (which + /// doesn't specify params) and don't show spurious diffs. + /// + /// Auto-injected pattern: + /// - keeper_path: `/clickhouse/tables/{database}/{shard}/{table_name}` + /// - replica_name: `{replica}` + pub fn normalize_auto_injected_params(&self, table_name: &str) -> Self { + let expected_keeper_path = + format!("/clickhouse/tables/{{database}}/{{shard}}/{}", table_name); + let expected_replica_name = "{replica}"; + + match self { + ClickhouseEngine::ReplicatedMergeTree { + keeper_path, + replica_name, + } => { + if keeper_path.as_deref() == Some(&expected_keeper_path) + && replica_name.as_deref() == Some(expected_replica_name) + { + ClickhouseEngine::ReplicatedMergeTree { + keeper_path: None, + replica_name: None, + } + } else { + self.clone() + } + } + ClickhouseEngine::ReplicatedReplacingMergeTree { + keeper_path, + replica_name, + ver, + is_deleted, + } => { + if keeper_path.as_deref() == Some(&expected_keeper_path) + && replica_name.as_deref() == Some(expected_replica_name) + { + ClickhouseEngine::ReplicatedReplacingMergeTree { + keeper_path: None, + replica_name: None, + ver: ver.clone(), + is_deleted: is_deleted.clone(), + } + } else { + self.clone() + } + } + ClickhouseEngine::ReplicatedAggregatingMergeTree { + keeper_path, + replica_name, + } => { + if keeper_path.as_deref() == Some(&expected_keeper_path) + && replica_name.as_deref() == Some(expected_replica_name) + { + ClickhouseEngine::ReplicatedAggregatingMergeTree { + keeper_path: None, + replica_name: None, + } + } else { + self.clone() + } + } + ClickhouseEngine::ReplicatedSummingMergeTree { + keeper_path, + replica_name, + columns, + } => { + if keeper_path.as_deref() == Some(&expected_keeper_path) + && replica_name.as_deref() == Some(expected_replica_name) + { + ClickhouseEngine::ReplicatedSummingMergeTree { + keeper_path: None, + replica_name: None, + columns: columns.clone(), + } + } else { + self.clone() + } + } + // Non-replicated engines don't need normalization + _ => self.clone(), + } + } + /// Convert engine to string for proto storage (no sensitive data) pub fn to_proto_string(&self) -> String { match self { @@ -2462,7 +2549,7 @@ pub fn create_table_query( } pub static DROP_TABLE_TEMPLATE: &str = r#" -DROP TABLE IF EXISTS `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}}{{/if}}; +DROP TABLE IF EXISTS `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}}{{/if}} SYNC; "#; pub fn drop_table_query( @@ -4617,7 +4704,10 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; "DROP query should contain ON CLUSTER clause" ); - // ON CLUSTER should come after DROP TABLE but before the table name or right after table name + // Should have SYNC (always present) + assert!(query.contains("SYNC"), "DROP query should contain SYNC"); + + // Should have DROP TABLE assert!(query.contains("DROP TABLE")); } @@ -4632,6 +4722,9 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; "DROP query should not contain ON CLUSTER clause when cluster_name is None" ); + // Should still have SYNC (for replicated tables) + assert!(query.contains("SYNC"), "DROP query should contain SYNC"); + // Should still have DROP TABLE assert!(query.contains("DROP TABLE")); } @@ -4768,4 +4861,82 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; _ => panic!("Expected InvalidParameters error"), } } + + #[test] + fn test_normalize_auto_injected_params_replicated_merge_tree() { + // Test auto-injected params are normalized to None + let engine = ClickhouseEngine::ReplicatedMergeTree { + keeper_path: Some("/clickhouse/tables/{database}/{shard}/MyTable".to_string()), + replica_name: Some("{replica}".to_string()), + }; + + let normalized = engine.normalize_auto_injected_params("MyTable"); + + assert!(matches!( + normalized, + ClickhouseEngine::ReplicatedMergeTree { + keeper_path: None, + replica_name: None, + } + )); + } + + #[test] + fn test_normalize_auto_injected_params_custom_params_unchanged() { + // Test custom params are NOT normalized + let engine = ClickhouseEngine::ReplicatedMergeTree { + keeper_path: Some("/custom/path/MyTable".to_string()), + replica_name: Some("custom_replica".to_string()), + }; + + let normalized = engine.normalize_auto_injected_params("MyTable"); + + // Should remain unchanged + match normalized { + ClickhouseEngine::ReplicatedMergeTree { + keeper_path, + replica_name, + } => { + assert_eq!(keeper_path, Some("/custom/path/MyTable".to_string())); + assert_eq!(replica_name, Some("custom_replica".to_string())); + } + _ => panic!("Expected ReplicatedMergeTree"), + } + } + + #[test] + fn test_normalize_auto_injected_params_replicated_replacing_merge_tree() { + // Test ReplicatedReplacingMergeTree with auto-injected params + let engine = ClickhouseEngine::ReplicatedReplacingMergeTree { + keeper_path: Some("/clickhouse/tables/{database}/{shard}/MyTable".to_string()), + replica_name: Some("{replica}".to_string()), + ver: Some("version_col".to_string()), + is_deleted: None, + }; + + let normalized = engine.normalize_auto_injected_params("MyTable"); + + match normalized { + ClickhouseEngine::ReplicatedReplacingMergeTree { + keeper_path, + replica_name, + ver, + is_deleted, + } => { + assert_eq!(keeper_path, None); + assert_eq!(replica_name, None); + assert_eq!(ver, Some("version_col".to_string())); + assert_eq!(is_deleted, None); + } + _ => panic!("Expected ReplicatedReplacingMergeTree"), + } + } + + #[test] + fn test_normalize_auto_injected_params_non_replicated_unchanged() { + // Test non-replicated engines are unchanged + let engine = ClickhouseEngine::MergeTree; + let normalized = engine.normalize_auto_injected_params("MyTable"); + assert!(matches!(normalized, ClickhouseEngine::MergeTree)); + } } diff --git a/apps/framework-cli/src/utilities/migration_plan_schema.json b/apps/framework-cli/src/utilities/migration_plan_schema.json index 85ba7da9f4..a9794af48c 100644 --- a/apps/framework-cli/src/utilities/migration_plan_schema.json +++ b/apps/framework-cli/src/utilities/migration_plan_schema.json @@ -201,8 +201,13 @@ } }, "engine": { - "type": ["string", "null"], - "default": null + "anyOf": [ + { "type": "string" }, + { "type": "object" }, + { "type": "null" } + ], + "default": null, + "description": "Table engine configuration. Can be a simple string (e.g., 'MergeTree') or an object for complex engines (e.g., ReplicatedMergeTree with parameters)" }, "version": { "anyOf": [ diff --git a/packages/py-moose-lib/moose_lib/dmv2/olap_table.py b/packages/py-moose-lib/moose_lib/dmv2/olap_table.py index 1c52b7da4e..16d4097c7a 100644 --- a/packages/py-moose-lib/moose_lib/dmv2/olap_table.py +++ b/packages/py-moose-lib/moose_lib/dmv2/olap_table.py @@ -234,6 +234,29 @@ def __init__(self, name: str, config: OlapConfig = OlapConfig(), **kwargs): ) _tables[registry_key] = self + # Validate cluster and explicit replication params are not both specified + if config.cluster: + from moose_lib.blocks import ( + ReplicatedMergeTreeEngine, + ReplicatedReplacingMergeTreeEngine, + ReplicatedAggregatingMergeTreeEngine, + ReplicatedSummingMergeTreeEngine, + ) + + if isinstance(config.engine, ( + ReplicatedMergeTreeEngine, + ReplicatedReplacingMergeTreeEngine, + ReplicatedAggregatingMergeTreeEngine, + ReplicatedSummingMergeTreeEngine, + )): + if config.engine.keeper_path is not None or config.engine.replica_name is not None: + raise ValueError( + f"OlapTable {name}: Cannot specify both 'cluster' and explicit replication params " + f"('keeper_path' or 'replica_name'). " + f"Use 'cluster' for auto-injected params, or use explicit 'keeper_path' and " + f"'replica_name' without 'cluster'." + ) + # Check if using legacy enum-based engine configuration if config.engine and isinstance(config.engine, ClickHouseEngines): logger = Logger(action="OlapTable") diff --git a/packages/py-moose-lib/tests/test_cluster_validation.py b/packages/py-moose-lib/tests/test_cluster_validation.py new file mode 100644 index 0000000000..0073c804ba --- /dev/null +++ b/packages/py-moose-lib/tests/test_cluster_validation.py @@ -0,0 +1,86 @@ +"""Tests for OlapTable cluster validation.""" + +import pytest +from moose_lib import OlapTable, OlapConfig, MergeTreeEngine, ReplicatedMergeTreeEngine +from pydantic import BaseModel + + +class SampleModel(BaseModel): + """Test model for cluster validation tests.""" + + id: str + value: int + + +def test_cluster_only_is_allowed(): + """Test that specifying only cluster works.""" + table = OlapTable[SampleModel]( + "TestClusterOnly", + OlapConfig( + engine=MergeTreeEngine(), + order_by_fields=["id"], + cluster="test_cluster", + ), + ) + assert table is not None + + +def test_explicit_params_only_is_allowed(): + """Test that specifying explicit keeper_path and replica_name without cluster works.""" + table = OlapTable[SampleModel]( + "TestExplicitOnly", + OlapConfig( + engine=ReplicatedMergeTreeEngine( + keeper_path="/clickhouse/tables/{database}/{table}", + replica_name="{replica}", + ), + order_by_fields=["id"], + ), + ) + assert table is not None + + +def test_cluster_and_explicit_params_raises_error(): + """Test that specifying both cluster and explicit keeper_path/replica_name raises an error.""" + with pytest.raises( + ValueError, + match=r"Cannot specify both 'cluster' and explicit replication params", + ): + OlapTable[SampleModel]( + "TestBothClusterAndExplicit", + OlapConfig( + engine=ReplicatedMergeTreeEngine( + keeper_path="/clickhouse/tables/{database}/{table}", + replica_name="{replica}", + ), + order_by_fields=["id"], + cluster="test_cluster", + ), + ) + + +def test_non_replicated_engine_with_cluster_is_allowed(): + """Test that non-replicated engines can have a cluster specified.""" + table = OlapTable[SampleModel]( + "TestMergeTreeWithCluster", + OlapConfig( + engine=MergeTreeEngine(), + order_by_fields=["id"], + cluster="test_cluster", + ), + ) + assert table is not None + + +def test_replicated_engine_without_cluster_or_explicit_params_is_allowed(): + """Test that ReplicatedMergeTree without cluster or explicit params works (ClickHouse Cloud mode).""" + table = OlapTable[SampleModel]( + "TestCloudMode", + OlapConfig( + engine=ReplicatedMergeTreeEngine(), + order_by_fields=["id"], + # No cluster, no keeper_path, no replica_name + ), + ) + assert table is not None + diff --git a/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts b/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts index 54411ec219..2ed3871038 100644 --- a/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts +++ b/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts @@ -534,6 +534,20 @@ export class OlapTable extends TypedBase> { ); } + // Validate cluster and explicit replication params are not both specified + const hasCluster = typeof (resolvedConfig as any).cluster === "string"; + const hasKeeperPath = + typeof (resolvedConfig as any).keeperPath === "string"; + const hasReplicaName = + typeof (resolvedConfig as any).replicaName === "string"; + + if (hasCluster && (hasKeeperPath || hasReplicaName)) { + throw new Error( + `OlapTable ${name}: Cannot specify both 'cluster' and explicit replication params ('keeperPath' or 'replicaName'). ` + + `Use 'cluster' for auto-injected params, or use explicit 'keeperPath' and 'replicaName' without 'cluster'.`, + ); + } + super(name, resolvedConfig, schema, columns, validators); this.name = name; diff --git a/packages/ts-moose-lib/tests/cluster-validation.test.ts b/packages/ts-moose-lib/tests/cluster-validation.test.ts new file mode 100644 index 0000000000..6f3feea84c --- /dev/null +++ b/packages/ts-moose-lib/tests/cluster-validation.test.ts @@ -0,0 +1,121 @@ +import { expect } from "chai"; +import { OlapTable, ClickHouseEngines } from "../src/index"; +import { IJsonSchemaCollection } from "typia/src/schemas/json/IJsonSchemaCollection"; +import { Column } from "../src/dataModels/dataModelTypes"; + +interface TestModel { + id: string; + value: number; +} + +// Mock schema and columns for testing +const createMockSchema = (): IJsonSchemaCollection.IV3_1 => ({ + version: "3.1", + components: { schemas: {} }, + schemas: [{ type: "object", properties: {} }], +}); + +const createMockColumns = (fields: string[]): Column[] => + fields.map((field) => ({ + name: field as any, + data_type: "String" as any, + required: true, + unique: false, + primary_key: false, + default: null, + ttl: null, + annotations: [], + })); + +// Helper function to create OlapTable with mock schema for testing +const createTestOlapTable = (name: string, config: any) => { + return new OlapTable( + name, + config, + createMockSchema(), + createMockColumns(["id", "value"]), + ); +}; + +describe("OlapTable Cluster Validation", () => { + it("should allow cluster without explicit replication params", () => { + expect(() => { + createTestOlapTable("TestClusterOnly", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + cluster: "test_cluster", + }); + }).to.not.throw(); + }); + + it("should allow explicit keeperPath and replicaName without cluster", () => { + expect(() => { + createTestOlapTable("TestExplicitOnly", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + keeperPath: "/clickhouse/tables/{database}/{table}", + replicaName: "{replica}", + }); + }).to.not.throw(); + }); + + it("should throw error when both cluster and keeperPath are specified", () => { + expect(() => { + createTestOlapTable("TestBothClusterAndKeeper", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + cluster: "test_cluster", + keeperPath: "/clickhouse/tables/{database}/{table}", + }); + }).to.throw( + /Cannot specify both 'cluster' and explicit replication params/, + ); + }); + + it("should throw error when both cluster and replicaName are specified", () => { + expect(() => { + createTestOlapTable("TestBothClusterAndReplica", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + cluster: "test_cluster", + replicaName: "{replica}", + }); + }).to.throw( + /Cannot specify both 'cluster' and explicit replication params/, + ); + }); + + it("should throw error when cluster, keeperPath, and replicaName are all specified", () => { + expect(() => { + createTestOlapTable("TestAll", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + cluster: "test_cluster", + keeperPath: "/clickhouse/tables/{database}/{table}", + replicaName: "{replica}", + }); + }).to.throw( + /Cannot specify both 'cluster' and explicit replication params/, + ); + }); + + it("should allow non-replicated engines with cluster", () => { + expect(() => { + createTestOlapTable("TestMergeTreeWithCluster", { + engine: ClickHouseEngines.MergeTree, + orderByFields: ["id"], + cluster: "test_cluster", + }); + }).to.not.throw(); + }); + + it("should allow ReplicatedMergeTree without cluster or explicit params (ClickHouse Cloud mode)", () => { + expect(() => { + createTestOlapTable("TestCloudMode", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + // No cluster, no keeperPath, no replicaName + }); + }).to.not.throw(); + }); +}); diff --git a/templates/python-cluster/app/ingest/models.py b/templates/python-cluster/app/ingest/models.py index 1bcd83f816..40f013c4ad 100644 --- a/templates/python-cluster/app/ingest/models.py +++ b/templates/python-cluster/app/ingest/models.py @@ -34,6 +34,13 @@ class TableD(BaseModel): timestamp: float +# Table with ReplicatedMergeTree but no cluster or explicit params (ClickHouse Cloud mode) +class TableE(BaseModel): + id: Key[str] + status: str + timestamp: float + + # OLAP Tables # table_a: Uses cluster_a with ReplicatedMergeTree @@ -77,3 +84,13 @@ class TableD(BaseModel): ), ) +# TableE: ReplicatedMergeTree with auto-injected params (ClickHouse Cloud mode) +table_e = OlapTable[TableE]( + "TableE", + OlapConfig( + order_by_fields=["id"], + engine=ReplicatedMergeTreeEngine(), + # No cluster, no keeper_path, no replica_name - Moose will auto-inject in dev + ), +) + diff --git a/templates/typescript-cluster/src/ingest/models.ts b/templates/typescript-cluster/src/ingest/models.ts index 05849b5fe8..a1dc4744fd 100644 --- a/templates/typescript-cluster/src/ingest/models.ts +++ b/templates/typescript-cluster/src/ingest/models.ts @@ -32,6 +32,13 @@ export interface TableD { timestamp: number; } +/** Table with ReplicatedMergeTree but no cluster or explicit params (ClickHouse Cloud mode) */ +export interface TableE { + id: Key; + status: string; + timestamp: number; +} + /** OLAP Tables */ // TableA: Uses cluster_a with ReplicatedMergeTree @@ -61,3 +68,10 @@ export const tableD = new OlapTable("TableD", { keeperPath: "/clickhouse/tables/{database}/{table}", replicaName: "{replica}", }); + +// TableE: ReplicatedMergeTree with auto-injected params (ClickHouse Cloud mode) +export const tableE = new OlapTable("TableE", { + orderByFields: ["id"], + engine: ClickHouseEngines.ReplicatedMergeTree, + // No cluster, no keeperPath, no replicaName - Moose will auto-inject in dev +}); From 98b93e617663f29788919eea5e722457d9ef8155 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 4 Nov 2025 22:00:47 -0700 Subject: [PATCH 10/23] fix yml --- .github/workflows/test-framework-cli.yaml | 79 +++++++++++++++++++++-- 1 file changed, 72 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test-framework-cli.yaml b/.github/workflows/test-framework-cli.yaml index c6fafb16de..e0cd705480 100644 --- a/.github/workflows/test-framework-cli.yaml +++ b/.github/workflows/test-framework-cli.yaml @@ -716,11 +716,73 @@ jobs: run: | cat ~/.moose/*-cli.log - test-e2e-cluster: + test-e2e-cluster-typescript: needs: [detect-changes, check, test-cli, test-ts-moose-lib, test-py-moose-lib] if: needs.detect-changes.outputs.should_run == 'true' - name: Test E2E Cluster Support (Node 20, Python 3.13) + name: Test E2E Cluster Support - TypeScript (Node 20) + runs-on: ubuntu-latest + permissions: + contents: read + env: + RUST_BACKTRACE: full + steps: + - name: Install Protoc (Needed for Temporal) + uses: arduino/setup-protoc@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + version: "23.x" + + - name: Checkout + uses: actions/checkout@v4 + with: + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.sha }} + + # Login to Docker hub to get higher rate limits when moose pulls images + - name: Login to Docker Hub + uses: ./.github/actions/docker-login + with: + op-service-account-token: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }} + + - uses: pnpm/action-setup@v4 + + - name: Install node + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: "pnpm" + + - name: Get system info + id: system + run: | + echo "version=$(lsb_release -rs)" >> $GITHUB_OUTPUT + echo "distro=$(lsb_release -is)" >> $GITHUB_OUTPUT + + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: stable + cache: true + cache-shared-key: ${{ runner.os }}-${{ steps.system.outputs.distro }}-${{ steps.system.outputs.version }}-${{ runner.arch }}-rust + cache-on-failure: true + cache-all-crates: true + cache-workspace-crates: true + + - name: Run TypeScript Cluster E2E Tests + run: pnpm install --frozen-lockfile && pnpm --filter=framework-cli-e2e run test -- --grep "TypeScript Cluster Template" + env: + MOOSE_TELEMETRY_ENABLED: false + + - name: Inspect Logs + if: always() + run: | + cat ~/.moose/*-cli.log + + test-e2e-cluster-python: + needs: + [detect-changes, check, test-cli, test-ts-moose-lib, test-py-moose-lib] + if: needs.detect-changes.outputs.should_run == 'true' + name: Test E2E Cluster Support - Python (Python 3.13) runs-on: ubuntu-latest permissions: contents: read @@ -776,8 +838,8 @@ jobs: - name: Upgrade Python build tools run: pip install --upgrade pip setuptools wheel - - name: Run Cluster E2E Tests - run: pnpm install --frozen-lockfile && pnpm --filter=framework-cli-e2e run test -- --grep "Cluster Support" + - name: Run Python Cluster E2E Tests + run: pnpm install --frozen-lockfile && pnpm --filter=framework-cli-e2e run test -- --grep "Python Cluster Template" env: MOOSE_TELEMETRY_ENABLED: false @@ -848,7 +910,8 @@ jobs: test-e2e-python-tests, test-e2e-backward-compatibility-typescript, test-e2e-backward-compatibility-python, - test-e2e-cluster, + test-e2e-cluster-typescript, + test-e2e-cluster-python, lints, ] if: always() @@ -878,7 +941,8 @@ jobs: [[ "${{ needs.test-e2e-python-tests.result }}" == "failure" ]] || \ [[ "${{ needs.test-e2e-backward-compatibility-typescript.result }}" == "failure" ]] || \ [[ "${{ needs.test-e2e-backward-compatibility-python.result }}" == "failure" ]] || \ - [[ "${{ needs.test-e2e-cluster.result }}" == "failure" ]] || \ + [[ "${{ needs.test-e2e-cluster-typescript.result }}" == "failure" ]] || \ + [[ "${{ needs.test-e2e-cluster-python.result }}" == "failure" ]] || \ [[ "${{ needs.lints.result }}" == "failure" ]]; then echo "One or more required jobs failed" exit 1 @@ -894,7 +958,8 @@ jobs: [[ "${{ needs.test-e2e-python-tests.result }}" == "success" ]] && \ [[ "${{ needs.test-e2e-backward-compatibility-typescript.result }}" == "success" ]] && \ [[ "${{ needs.test-e2e-backward-compatibility-python.result }}" == "success" ]] && \ - [[ "${{ needs.test-e2e-cluster.result }}" == "success" ]] && \ + [[ "${{ needs.test-e2e-cluster-typescript.result }}" == "success" ]] && \ + [[ "${{ needs.test-e2e-cluster-python.result }}" == "success" ]] && \ [[ "${{ needs.lints.result }}" == "success" ]]; then echo "All required jobs succeeded" exit 0 From e7426edec3704a930b274ee0983fef2eb02483a6 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Wed, 5 Nov 2025 09:52:45 -0700 Subject: [PATCH 11/23] docs --- .../llm-docs/python/table-setup.md | 53 +++++++++++++++++++ .../llm-docs/typescript/table-setup.md | 47 ++++++++++++++++ .../src/pages/moose/configuration.mdx | 8 +++ 3 files changed, 108 insertions(+) diff --git a/apps/framework-docs/llm-docs/python/table-setup.md b/apps/framework-docs/llm-docs/python/table-setup.md index 81820cc42c..5bbcd258bc 100644 --- a/apps/framework-docs/llm-docs/python/table-setup.md +++ b/apps/framework-docs/llm-docs/python/table-setup.md @@ -404,6 +404,59 @@ Available replicated engines: - `ReplicatedAggregatingMergeTreeEngine` - Replicated with aggregation - `ReplicatedSummingMergeTreeEngine` - Replicated with summation +### Cluster-Aware Replicated Tables + +For multi-node ClickHouse deployments, you can specify a cluster name to use `ON CLUSTER` DDL operations: + +```python +from moose_lib import OlapTable, OlapConfig +from moose_lib.blocks import ReplicatedMergeTreeEngine +from pydantic import BaseModel +from datetime import datetime + +class ReplicatedData(BaseModel): + id: str + data: str + timestamp: datetime + +# Replicated table on a cluster +clustered_table = OlapTable[ReplicatedData]( + "ClusteredTable", + OlapConfig( + order_by_fields=["id"], + engine=ReplicatedMergeTreeEngine(), + cluster="default" # References cluster from moose.config.toml + ) +) +``` + +**Configuration in `moose.config.toml`:** +```toml +[[clickhouse_config.clusters]] +name = "default" +``` + +**When to omit all parameters (recommended):** +- ✅ **ClickHouse Cloud** - Platform manages replication automatically +- ✅ **Local development** - Moose auto-injects params: `/clickhouse/tables/{database}/{shard}/{table_name}` +- ✅ **Most production deployments** - Works out of the box + +**When to use `cluster`:** +- ✅ Multi-node self-managed ClickHouse with cluster configuration +- ✅ Need `ON CLUSTER` DDL for distributed operations +- ✅ Works without explicit `keeper_path`/`replica_name` parameters + +**When to use explicit `keeper_path`/`replica_name`:** +- ✅ Custom replication topology required +- ✅ Advanced ZooKeeper/Keeper configuration +- ✅ Specific self-managed deployment requirements + +**Important:** Cannot specify both `cluster` and explicit `keeper_path`/`replica_name` - choose one approach. + +**Local Development:** Moose configures cluster names to point to your local ClickHouse instance, letting you develop with `ON CLUSTER` DDL without running multiple nodes. + +**Production:** Cluster names must match your ClickHouse `remote_servers` configuration. + ### S3Queue Engine Tables The S3Queue engine enables automatic processing of files from S3 buckets as they arrive. diff --git a/apps/framework-docs/llm-docs/typescript/table-setup.md b/apps/framework-docs/llm-docs/typescript/table-setup.md index e28f14c84e..aec26eb2ad 100644 --- a/apps/framework-docs/llm-docs/typescript/table-setup.md +++ b/apps/framework-docs/llm-docs/typescript/table-setup.md @@ -237,8 +237,55 @@ export const ReplicatedDedup = new OlapTable("ReplicatedDedup" **Note**: The `keeperPath` and `replicaName` parameters are optional: - **Self-managed ClickHouse**: Both parameters are required for configuring ZooKeeper/ClickHouse Keeper paths - **ClickHouse Cloud / Boreal**: Omit both parameters - the platform manages replication automatically + +### Cluster-Aware Replicated Tables + +For multi-node ClickHouse deployments, you can specify a cluster name to use `ON CLUSTER` DDL operations: + +```typescript +import { OlapTable, ClickHouseEngines, Key } from '@514labs/moose-lib'; + +interface ReplicatedSchema { + id: Key; + data: string; + timestamp: Date; +} + +// Replicated table on a cluster +export const ClusteredTable = new OlapTable("ClusteredTable", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + cluster: "default" // References cluster from moose.config.toml +}); ``` +**Configuration in `moose.config.toml`:** +```toml +[[clickhouse_config.clusters]] +name = "default" +``` + +**When to omit all parameters (recommended):** +- ✅ **ClickHouse Cloud** - Platform manages replication automatically +- ✅ **Local development** - Moose auto-injects params: `/clickhouse/tables/{database}/{shard}/{table_name}` +- ✅ **Most production deployments** - Works out of the box + +**When to use `cluster`:** +- ✅ Multi-node self-managed ClickHouse with cluster configuration +- ✅ Need `ON CLUSTER` DDL for distributed operations +- ✅ Works without explicit `keeperPath`/`replicaName` parameters + +**When to use explicit `keeperPath`/`replicaName`:** +- ✅ Custom replication topology required +- ✅ Advanced ZooKeeper/Keeper configuration +- ✅ Specific self-managed deployment requirements + +**Important:** Cannot specify both `cluster` and explicit `keeperPath`/`replicaName` - choose one approach. + +**Local Development:** Moose configures cluster names to point to your local ClickHouse instance, letting you develop with `ON CLUSTER` DDL without running multiple nodes. + +**Production:** Cluster names must match your ClickHouse `remote_servers` configuration. + ### S3Queue Engine Tables The S3Queue engine allows you to automatically process files from S3 buckets as they arrive. diff --git a/apps/framework-docs/src/pages/moose/configuration.mdx b/apps/framework-docs/src/pages/moose/configuration.mdx index d271969c23..cffb1ae179 100644 --- a/apps/framework-docs/src/pages/moose/configuration.mdx +++ b/apps/framework-docs/src/pages/moose/configuration.mdx @@ -243,6 +243,14 @@ native_port = 9000 # Optional list of additional databases to create on startup (Default: []) # additional_databases = ["analytics", "staging"] +# ClickHouse cluster configuration for replicated tables (optional) +# Define clusters for use with ON CLUSTER DDL operations and distributed tables +# In local dev, Moose creates single-node clusters. In production, names must match your ClickHouse remote_servers config. +# [[clickhouse_config.clusters]] +# name = "default" +# [[clickhouse_config.clusters]] +# name = "my_cluster" + # HTTP server configuration for local development [http_server_config] # Host to bind the webserver to (Default: "localhost") From bf94617f816e111241c92731fc692b5061e008c8 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Wed, 5 Nov 2025 10:09:32 -0700 Subject: [PATCH 12/23] fix yml --- .github/workflows/test-framework-cli.yaml | 4 ++-- templates/python-cluster/README.md | 7 ------- templates/typescript-cluster/README.md | 7 ------- 3 files changed, 2 insertions(+), 16 deletions(-) diff --git a/.github/workflows/test-framework-cli.yaml b/.github/workflows/test-framework-cli.yaml index e0cd705480..ef36fb0486 100644 --- a/.github/workflows/test-framework-cli.yaml +++ b/.github/workflows/test-framework-cli.yaml @@ -43,8 +43,8 @@ jobs: "^\.github/workflows/test-framework-cli\.yaml" "^apps/framework-cli-e2e/" "^apps/framework-cli/" - "^templates/python/" - "^templates/typescript/" + "^templates/python" + "^templates/typescript" "^packages/" "Cargo.lock" "pnpm-lock.yaml" diff --git a/templates/python-cluster/README.md b/templates/python-cluster/README.md index 22a9df3b1c..3f4a552400 100644 --- a/templates/python-cluster/README.md +++ b/templates/python-cluster/README.md @@ -1,10 +1,3 @@ # Python Cluster Test Template This is a test-only template for E2E testing of ClickHouse cluster support in MooseStack. - -## Features - -- Three OLAP tables demonstrating different cluster configurations -- `table_a`: Uses `cluster_a` -- `table_b`: Uses `cluster_b` -- `table_c`: No cluster (for mixed environment testing) diff --git a/templates/typescript-cluster/README.md b/templates/typescript-cluster/README.md index 2ff8400255..ad92d41ca6 100644 --- a/templates/typescript-cluster/README.md +++ b/templates/typescript-cluster/README.md @@ -1,10 +1,3 @@ # TypeScript Cluster Test Template This is a test-only template for E2E testing of ClickHouse cluster support in MooseStack. - -## Features - -- Three OLAP tables demonstrating different cluster configurations -- `TableA`: Uses `cluster_a` -- `TableB`: Uses `cluster_b` -- `TableC`: No cluster (for mixed environment testing) From 47c78747641ea71238f7ad0b66e7951aa8c7c1ae Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Wed, 5 Nov 2025 11:18:43 -0700 Subject: [PATCH 13/23] cleanup --- .../framework/core/infrastructure/table.rs | 4 - .../src/infrastructure/olap/clickhouse/mod.rs | 5 - .../infrastructure/olap/clickhouse/queries.rs | 165 ------------------ 3 files changed, 174 deletions(-) diff --git a/apps/framework-cli/src/framework/core/infrastructure/table.rs b/apps/framework-cli/src/framework/core/infrastructure/table.rs index 72c4fdff41..03e358cbdb 100644 --- a/apps/framework-cli/src/framework/core/infrastructure/table.rs +++ b/apps/framework-cli/src/framework/core/infrastructure/table.rs @@ -515,10 +515,6 @@ impl Table { }) }); - // Normalize auto-injected replication params so they match user code - // This prevents spurious diffs when comparing tables from inframap with code - let engine = engine.map(|e| e.normalize_auto_injected_params(&proto.name)); - // Engine settings are now handled via table_settings field let fallback = || -> OrderBy { diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index ecb34f4a08..12cd98558f 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -1734,11 +1734,6 @@ impl OlapOperations for ConfiguredDBClient { engine.as_str().try_into().ok() }; - // Normalize auto-injected replication params so they match user code - // This prevents spurious diffs when comparing tables from the DB with code - let engine_parsed = engine_parsed - .map(|e: ClickhouseEngine| e.normalize_auto_injected_params(&table_name)); - let engine_params_hash = engine_parsed .as_ref() .map(|e: &ClickhouseEngine| e.non_alterable_params_hash()); diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 3ab8814a5f..e1f4f970c1 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -885,93 +885,6 @@ impl ClickhouseEngine { self.is_merge_tree_family() || matches!(self, ClickhouseEngine::S3 { .. }) } - /// Normalize auto-injected replication parameters back to None - /// - /// When Moose auto-injects replication params in dev mode or with clusters, - /// it uses a standard pattern. When reading tables back from ClickHouse, - /// we normalize these back to None so they match the user's code (which - /// doesn't specify params) and don't show spurious diffs. - /// - /// Auto-injected pattern: - /// - keeper_path: `/clickhouse/tables/{database}/{shard}/{table_name}` - /// - replica_name: `{replica}` - pub fn normalize_auto_injected_params(&self, table_name: &str) -> Self { - let expected_keeper_path = - format!("/clickhouse/tables/{{database}}/{{shard}}/{}", table_name); - let expected_replica_name = "{replica}"; - - match self { - ClickhouseEngine::ReplicatedMergeTree { - keeper_path, - replica_name, - } => { - if keeper_path.as_deref() == Some(&expected_keeper_path) - && replica_name.as_deref() == Some(expected_replica_name) - { - ClickhouseEngine::ReplicatedMergeTree { - keeper_path: None, - replica_name: None, - } - } else { - self.clone() - } - } - ClickhouseEngine::ReplicatedReplacingMergeTree { - keeper_path, - replica_name, - ver, - is_deleted, - } => { - if keeper_path.as_deref() == Some(&expected_keeper_path) - && replica_name.as_deref() == Some(expected_replica_name) - { - ClickhouseEngine::ReplicatedReplacingMergeTree { - keeper_path: None, - replica_name: None, - ver: ver.clone(), - is_deleted: is_deleted.clone(), - } - } else { - self.clone() - } - } - ClickhouseEngine::ReplicatedAggregatingMergeTree { - keeper_path, - replica_name, - } => { - if keeper_path.as_deref() == Some(&expected_keeper_path) - && replica_name.as_deref() == Some(expected_replica_name) - { - ClickhouseEngine::ReplicatedAggregatingMergeTree { - keeper_path: None, - replica_name: None, - } - } else { - self.clone() - } - } - ClickhouseEngine::ReplicatedSummingMergeTree { - keeper_path, - replica_name, - columns, - } => { - if keeper_path.as_deref() == Some(&expected_keeper_path) - && replica_name.as_deref() == Some(expected_replica_name) - { - ClickhouseEngine::ReplicatedSummingMergeTree { - keeper_path: None, - replica_name: None, - columns: columns.clone(), - } - } else { - self.clone() - } - } - // Non-replicated engines don't need normalization - _ => self.clone(), - } - } - /// Convert engine to string for proto storage (no sensitive data) pub fn to_proto_string(&self) -> String { match self { @@ -4861,82 +4774,4 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; _ => panic!("Expected InvalidParameters error"), } } - - #[test] - fn test_normalize_auto_injected_params_replicated_merge_tree() { - // Test auto-injected params are normalized to None - let engine = ClickhouseEngine::ReplicatedMergeTree { - keeper_path: Some("/clickhouse/tables/{database}/{shard}/MyTable".to_string()), - replica_name: Some("{replica}".to_string()), - }; - - let normalized = engine.normalize_auto_injected_params("MyTable"); - - assert!(matches!( - normalized, - ClickhouseEngine::ReplicatedMergeTree { - keeper_path: None, - replica_name: None, - } - )); - } - - #[test] - fn test_normalize_auto_injected_params_custom_params_unchanged() { - // Test custom params are NOT normalized - let engine = ClickhouseEngine::ReplicatedMergeTree { - keeper_path: Some("/custom/path/MyTable".to_string()), - replica_name: Some("custom_replica".to_string()), - }; - - let normalized = engine.normalize_auto_injected_params("MyTable"); - - // Should remain unchanged - match normalized { - ClickhouseEngine::ReplicatedMergeTree { - keeper_path, - replica_name, - } => { - assert_eq!(keeper_path, Some("/custom/path/MyTable".to_string())); - assert_eq!(replica_name, Some("custom_replica".to_string())); - } - _ => panic!("Expected ReplicatedMergeTree"), - } - } - - #[test] - fn test_normalize_auto_injected_params_replicated_replacing_merge_tree() { - // Test ReplicatedReplacingMergeTree with auto-injected params - let engine = ClickhouseEngine::ReplicatedReplacingMergeTree { - keeper_path: Some("/clickhouse/tables/{database}/{shard}/MyTable".to_string()), - replica_name: Some("{replica}".to_string()), - ver: Some("version_col".to_string()), - is_deleted: None, - }; - - let normalized = engine.normalize_auto_injected_params("MyTable"); - - match normalized { - ClickhouseEngine::ReplicatedReplacingMergeTree { - keeper_path, - replica_name, - ver, - is_deleted, - } => { - assert_eq!(keeper_path, None); - assert_eq!(replica_name, None); - assert_eq!(ver, Some("version_col".to_string())); - assert_eq!(is_deleted, None); - } - _ => panic!("Expected ReplicatedReplacingMergeTree"), - } - } - - #[test] - fn test_normalize_auto_injected_params_non_replicated_unchanged() { - // Test non-replicated engines are unchanged - let engine = ClickhouseEngine::MergeTree; - let normalized = engine.normalize_auto_injected_params("MyTable"); - assert!(matches!(normalized, ClickhouseEngine::MergeTree)); - } } From 89feb5628c0a2bee8305053db1bd8757696178e9 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Wed, 5 Nov 2025 15:22:07 -0700 Subject: [PATCH 14/23] fixes --- .../framework-cli/src/cli/routines/migrate.rs | 2 + .../src/infrastructure/olap/clickhouse/mod.rs | 242 ++++++++++++++---- .../infrastructure/olap/clickhouse/queries.rs | 101 ++++++-- .../src/infrastructure/olap/ddl_ordering.rs | 9 + 4 files changed, 287 insertions(+), 67 deletions(-) diff --git a/apps/framework-cli/src/cli/routines/migrate.rs b/apps/framework-cli/src/cli/routines/migrate.rs index 6d3e98e393..143162e327 100644 --- a/apps/framework-cli/src/cli/routines/migrate.rs +++ b/apps/framework-cli/src/cli/routines/migrate.rs @@ -1021,6 +1021,7 @@ mod tests { }, after_column: None, database: Some("bad_db".to_string()), + cluster_name: None, }, SerializableOlapOperation::ModifyTableColumn { table: "test".to_string(), @@ -1047,6 +1048,7 @@ mod tests { ttl: None, }, database: Some("another_bad_db".to_string()), + cluster_name: None, }, ]; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index 12cd98558f..6402579cb9 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -131,6 +131,8 @@ pub enum SerializableOlapOperation { after_column: Option, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, /// Drop a column from a table DropTableColumn { @@ -140,6 +142,8 @@ pub enum SerializableOlapOperation { column_name: String, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, /// Modify a column in a table ModifyTableColumn { @@ -151,6 +155,8 @@ pub enum SerializableOlapOperation { after_column: Column, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, RenameTableColumn { /// The table containing the column @@ -161,6 +167,8 @@ pub enum SerializableOlapOperation { after_column_name: String, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, /// Modify table settings using ALTER TABLE MODIFY SETTING ModifyTableSettings { @@ -172,6 +180,8 @@ pub enum SerializableOlapOperation { after_settings: Option>, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, /// Modify or remove table-level TTL ModifyTableTtl { @@ -180,29 +190,39 @@ pub enum SerializableOlapOperation { after: Option, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, AddTableIndex { table: String, index: TableIndex, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, DropTableIndex { table: String, index_name: String, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, ModifySampleBy { table: String, expression: String, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, RemoveSampleBy { table: String, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, RawSql { /// The SQL statements to execute @@ -457,33 +477,59 @@ pub async fn execute_atomic_operation( column, after_column, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_add_table_column(target_db, table, column, after_column, client).await?; + execute_add_table_column( + target_db, + table, + column, + after_column, + cluster_name.as_deref(), + client, + ) + .await?; } SerializableOlapOperation::DropTableColumn { table, column_name, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_drop_table_column(target_db, table, column_name, client).await?; + execute_drop_table_column( + target_db, + table, + column_name, + cluster_name.as_deref(), + client, + ) + .await?; } SerializableOlapOperation::ModifyTableColumn { table, before_column, after_column, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_modify_table_column(target_db, table, before_column, after_column, client) - .await?; + execute_modify_table_column( + target_db, + table, + before_column, + after_column, + cluster_name.as_deref(), + client, + ) + .await?; } SerializableOlapOperation::RenameTableColumn { table, before_column_name, after_column_name, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); execute_rename_table_column( @@ -491,6 +537,7 @@ pub async fn execute_atomic_operation( table, before_column_name, after_column_name, + cluster_name.as_deref(), client, ) .await?; @@ -500,6 +547,7 @@ pub async fn execute_atomic_operation( before_settings, after_settings, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); execute_modify_table_settings( @@ -507,6 +555,7 @@ pub async fn execute_atomic_operation( table, before_settings, after_settings, + cluster_name.as_deref(), client, ) .await?; @@ -516,16 +565,24 @@ pub async fn execute_atomic_operation( before: _, after, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); // Build ALTER TABLE ... [REMOVE TTL | MODIFY TTL expr] + let cluster_clause = cluster_name + .as_ref() + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let sql = if let Some(expr) = after { format!( - "ALTER TABLE `{}`.`{}` MODIFY TTL {}", - target_db, table, expr + "ALTER TABLE `{}`.`{}`{} MODIFY TTL {}", + target_db, table, cluster_clause, expr ) } else { - format!("ALTER TABLE `{}`.`{}` REMOVE TTL", target_db, table) + format!( + "ALTER TABLE `{}`.`{}`{} REMOVE TTL", + target_db, table, cluster_clause + ) }; run_query(&sql, client).await.map_err(|e| { ClickhouseChangesError::ClickhouseClient { @@ -538,29 +595,51 @@ pub async fn execute_atomic_operation( table, index, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_add_table_index(target_db, table, index, client).await?; + execute_add_table_index(target_db, table, index, cluster_name.as_deref(), client) + .await?; } SerializableOlapOperation::DropTableIndex { table, index_name, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_drop_table_index(target_db, table, index_name, client).await?; + execute_drop_table_index( + target_db, + table, + index_name, + cluster_name.as_deref(), + client, + ) + .await?; } SerializableOlapOperation::ModifySampleBy { table, expression, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_modify_sample_by(target_db, table, expression, client).await?; + execute_modify_sample_by( + target_db, + table, + expression, + cluster_name.as_deref(), + client, + ) + .await?; } - SerializableOlapOperation::RemoveSampleBy { table, database } => { + SerializableOlapOperation::RemoveSampleBy { + table, + database, + cluster_name, + } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_remove_sample_by(target_db, table, client).await?; + execute_remove_sample_by(target_db, table, cluster_name.as_deref(), client).await?; } SerializableOlapOperation::RawSql { sql, description } => { execute_raw_sql(sql, description, client).await?; @@ -593,6 +672,7 @@ async fn execute_add_table_index( db_name: &str, table_name: &str, index: &TableIndex, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { let args = if index.arguments.is_empty() { @@ -600,10 +680,14 @@ async fn execute_add_table_index( } else { format!("({})", index.arguments.join(", ")) }; + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let sql = format!( - "ALTER TABLE `{}`.`{}` ADD INDEX `{}` {} TYPE {}{} GRANULARITY {}", + "ALTER TABLE `{}`.`{}`{} ADD INDEX `{}` {} TYPE {}{} GRANULARITY {}", db_name, table_name, + cluster_clause, index.name, index.expression, index.index_type, @@ -622,11 +706,15 @@ async fn execute_drop_table_index( db_name: &str, table_name: &str, index_name: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let sql = format!( - "ALTER TABLE `{}`.`{}` DROP INDEX `{}`", - db_name, table_name, index_name + "ALTER TABLE `{}`.`{}`{} DROP INDEX `{}`", + db_name, table_name, cluster_clause, index_name ); run_query(&sql, client) .await @@ -640,11 +728,15 @@ async fn execute_modify_sample_by( db_name: &str, table_name: &str, expression: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let sql = format!( - "ALTER TABLE `{}`.`{}` MODIFY SAMPLE BY {}", - db_name, table_name, expression + "ALTER TABLE `{}`.`{}`{} MODIFY SAMPLE BY {}", + db_name, table_name, cluster_clause, expression ); run_query(&sql, client) .await @@ -657,11 +749,15 @@ async fn execute_modify_sample_by( async fn execute_remove_sample_by( db_name: &str, table_name: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let sql = format!( - "ALTER TABLE `{}`.`{}` REMOVE SAMPLE BY", - db_name, table_name + "ALTER TABLE `{}`.`{}`{} REMOVE SAMPLE BY", + db_name, table_name, cluster_clause ); run_query(&sql, client) .await @@ -701,6 +797,7 @@ async fn execute_add_table_column( table_name: &str, column: &Column, after_column: &Option, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { log::info!( @@ -719,10 +816,15 @@ async fn execute_add_table_column( .map(|d| format!(" DEFAULT {}", d)) .unwrap_or_default(); + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); + let add_column_query = format!( - "ALTER TABLE `{}`.`{}` ADD COLUMN `{}` {}{} {}", + "ALTER TABLE `{}`.`{}`{} ADD COLUMN `{}` {}{} {}", db_name, table_name, + cluster_clause, clickhouse_column.name, column_type_string, default_clause, @@ -745,6 +847,7 @@ async fn execute_drop_table_column( db_name: &str, table_name: &str, column_name: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { log::info!( @@ -752,9 +855,12 @@ async fn execute_drop_table_column( table_name, column_name ); + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let drop_column_query = format!( - "ALTER TABLE `{}`.`{}` DROP COLUMN IF EXISTS `{}`", - db_name, table_name, column_name + "ALTER TABLE `{}`.`{}`{} DROP COLUMN IF EXISTS `{}`", + db_name, table_name, cluster_clause, column_name ); log::debug!("Dropping column: {}", drop_column_query); run_query(&drop_column_query, client).await.map_err(|e| { @@ -777,6 +883,7 @@ async fn execute_modify_table_column( table_name: &str, before_column: &Column, after_column: &Column, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { // Check if only the comment has changed @@ -804,11 +911,26 @@ async fn execute_modify_table_column( let clickhouse_column = std_column_to_clickhouse_column(after_column.clone())?; if let Some(ref comment) = clickhouse_column.comment { - execute_modify_column_comment(db_name, table_name, after_column, comment, client) - .await?; + execute_modify_column_comment( + db_name, + table_name, + after_column, + comment, + cluster_name, + client, + ) + .await?; } else { // If the new comment is None, we still need to update to remove the old comment - execute_modify_column_comment(db_name, table_name, after_column, "", client).await?; + execute_modify_column_comment( + db_name, + table_name, + after_column, + "", + cluster_name, + client, + ) + .await?; } return Ok(()); } @@ -834,6 +956,7 @@ data_type_changed: {data_type_changed}, default_changed: {default_changed}, requ &clickhouse_column, removing_default, removing_ttl, + cluster_name, )?; // Execute all statements in order @@ -859,6 +982,7 @@ async fn execute_modify_column_comment( table_name: &str, column: &Column, comment: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { log::info!( @@ -868,7 +992,7 @@ async fn execute_modify_column_comment( ); let modify_comment_query = - build_modify_column_comment_sql(db_name, table_name, &column.name, comment)?; + build_modify_column_comment_sql(db_name, table_name, &column.name, comment, cluster_name)?; log::debug!("Modifying column comment: {}", modify_comment_query); run_query(&modify_comment_query, client) @@ -886,25 +1010,30 @@ fn build_modify_column_sql( ch_col: &ClickHouseColumn, removing_default: bool, removing_ttl: bool, + cluster_name: Option<&str>, ) -> Result, ClickhouseChangesError> { let column_type_string = basic_field_type_to_string(&ch_col.column_type)?; + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); + let mut statements = vec![]; // Add REMOVE DEFAULT statement if needed // ClickHouse doesn't allow mixing column properties with REMOVE clauses if removing_default { statements.push(format!( - "ALTER TABLE `{}`.`{}` MODIFY COLUMN `{}` REMOVE DEFAULT", - db_name, table_name, ch_col.name + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN `{}` REMOVE DEFAULT", + db_name, table_name, cluster_clause, ch_col.name )); } // Add REMOVE TTL statement if needed if removing_ttl { statements.push(format!( - "ALTER TABLE `{}`.`{}` MODIFY COLUMN `{}` REMOVE TTL", - db_name, table_name, ch_col.name + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN `{}` REMOVE TTL", + db_name, table_name, cluster_clause, ch_col.name )); } @@ -933,9 +1062,10 @@ fn build_modify_column_sql( let main_sql = if let Some(ref comment) = ch_col.comment { let escaped_comment = comment.replace('\'', "''"); format!( - "ALTER TABLE `{}`.`{}` MODIFY COLUMN IF EXISTS `{}` {}{}{} COMMENT '{}'", + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{} COMMENT '{}'", db_name, table_name, + cluster_clause, ch_col.name, column_type_string, default_clause, @@ -944,8 +1074,14 @@ fn build_modify_column_sql( ) } else { format!( - "ALTER TABLE `{}`.`{}` MODIFY COLUMN IF EXISTS `{}` {}{}{}", - db_name, table_name, ch_col.name, column_type_string, default_clause, ttl_clause + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{}", + db_name, + table_name, + cluster_clause, + ch_col.name, + column_type_string, + default_clause, + ttl_clause ) }; statements.push(main_sql); @@ -958,11 +1094,15 @@ fn build_modify_column_comment_sql( table_name: &str, column_name: &str, comment: &str, + cluster_name: Option<&str>, ) -> Result { let escaped_comment = comment.replace('\'', "''"); + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); Ok(format!( - "ALTER TABLE `{}`.`{}` MODIFY COLUMN `{}` COMMENT '{}'", - db_name, table_name, column_name, escaped_comment + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN `{}` COMMENT '{}'", + db_name, table_name, cluster_clause, column_name, escaped_comment )) } @@ -972,6 +1112,7 @@ async fn execute_modify_table_settings( table_name: &str, before_settings: &Option>, after_settings: &Option>, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { use std::collections::HashMap; @@ -1004,8 +1145,12 @@ async fn execute_modify_table_settings( // Execute MODIFY SETTING if there are settings to modify if !settings_to_modify.is_empty() { - let alter_settings_query = - alter_table_modify_settings_query(db_name, table_name, &settings_to_modify)?; + let alter_settings_query = alter_table_modify_settings_query( + db_name, + table_name, + &settings_to_modify, + cluster_name, + )?; log::debug!("Modifying table settings: {}", alter_settings_query); run_query(&alter_settings_query, client) @@ -1018,8 +1163,12 @@ async fn execute_modify_table_settings( // Execute RESET SETTING if there are settings to reset if !settings_to_reset.is_empty() { - let reset_settings_query = - alter_table_reset_settings_query(db_name, table_name, &settings_to_reset)?; + let reset_settings_query = alter_table_reset_settings_query( + db_name, + table_name, + &settings_to_reset, + cluster_name, + )?; log::debug!("Resetting table settings: {}", reset_settings_query); run_query(&reset_settings_query, client) @@ -1039,6 +1188,7 @@ async fn execute_rename_table_column( table_name: &str, before_column_name: &str, after_column_name: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { log::info!( @@ -1047,8 +1197,11 @@ async fn execute_rename_table_column( before_column_name, after_column_name ); + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let rename_column_query = format!( - "ALTER TABLE `{db_name}`.`{table_name}` RENAME COLUMN `{before_column_name}` TO `{after_column_name}`" + "ALTER TABLE `{db_name}`.`{table_name}`{cluster_clause} RENAME COLUMN `{before_column_name}` TO `{after_column_name}`" ); log::debug!("Renaming column: {}", rename_column_query); run_query(&rename_column_query, client).await.map_err(|e| { @@ -2299,7 +2452,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra }; let ch_after = std_column_to_clickhouse_column(after_column).unwrap(); - let sqls = build_modify_column_sql("db", "table", &ch_after, false, false).unwrap(); + let sqls = build_modify_column_sql("db", "table", &ch_after, false, false, None).unwrap(); assert_eq!(sqls.len(), 1); assert_eq!( @@ -2331,8 +2484,8 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra }; // Use the pure SQL builder for comment-only update - let sql = - build_modify_column_comment_sql("db", "table", &after_column.name, "new").unwrap(); + let sql = build_modify_column_comment_sql("db", "table", &after_column.name, "new", None) + .unwrap(); assert_eq!( sql, "ALTER TABLE `db`.`table` MODIFY COLUMN `status` COMMENT 'new'" @@ -2360,7 +2513,8 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra let clickhouse_column = std_column_to_clickhouse_column(column).unwrap(); let sqls = - build_modify_column_sql("test_db", "users", &clickhouse_column, false, false).unwrap(); + build_modify_column_sql("test_db", "users", &clickhouse_column, false, false, None) + .unwrap(); assert_eq!(sqls.len(), 1); assert_eq!( diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index e1f4f970c1..1092359545 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -2012,9 +2012,10 @@ fn build_summing_merge_tree_ddl(columns: &Option>) -> String { /// Build replication parameters for replicated engines /// /// When keeper_path and replica_name are None: -/// - In dev mode: Injects default parameters for local development using a static table name hash -/// - In production: Returns empty parameters to let ClickHouse use automatic configuration -/// (ClickHouse Cloud or server-configured defaults) +/// - Dev without cluster: Injects table_name-based paths (ON CLUSTER absent, {uuid} won't work) +/// - Dev with cluster: Returns empty params (ON CLUSTER present, ClickHouse uses {uuid}) +/// - Prod with cluster: Returns empty params (ON CLUSTER present, ClickHouse uses {uuid}) +/// - Prod without cluster: Returns empty params (ClickHouse Cloud handles defaults) fn build_replication_params( keeper_path: &Option, replica_name: &Option, @@ -2028,20 +2029,20 @@ fn build_replication_params( Ok(vec![format!("'{}'", path), format!("'{}'", name)]) } (None, None) => { - // Auto-inject parameters if cluster is defined, otherwise use defaults - // This supports both dev (local ClickHouse) and prod (with clusters) - if cluster_name.is_some() || is_dev { - // In dev mode OR when cluster is defined, inject default parameters - // Use table name to ensure unique paths per table, avoiding conflicts - // {shard}, {replica}, and {database} macros are configured in docker-compose or cluster - // Note: {uuid} macro only works with ON CLUSTER queries, so we use table name instead + // The {uuid} macro only works with ON CLUSTER queries + // Only dev without cluster needs explicit params + if is_dev && cluster_name.is_none() { + // Dev mode without cluster: inject table_name-based paths + // {shard}, {replica}, and {database} macros are configured in docker-compose Ok(vec![ format!("'/clickhouse/tables/{{database}}/{{shard}}/{}'", table_name), "'{replica}'".to_string(), ]) } else { - // In production without cluster, return empty parameters - // This works for ClickHouse Cloud and properly configured servers + // All other cases: return empty parameters + // - Dev with cluster: ON CLUSTER present → ClickHouse uses {uuid} + // - Prod with cluster: ON CLUSTER present → ClickHouse uses {uuid} + // - Prod without cluster: ClickHouse Cloud handles defaults Ok(vec![]) } } @@ -2483,12 +2484,12 @@ pub fn drop_table_query( } pub static ALTER_TABLE_MODIFY_SETTINGS_TEMPLATE: &str = r#" -ALTER TABLE `{{db_name}}`.`{{table_name}}` +ALTER TABLE `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}}{{/if}} MODIFY SETTING {{settings}}; "#; pub static ALTER_TABLE_RESET_SETTINGS_TEMPLATE: &str = r#" -ALTER TABLE `{{db_name}}`.`{{table_name}}` +ALTER TABLE `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}}{{/if}} RESET SETTING {{settings}}; "#; @@ -2497,6 +2498,7 @@ pub fn alter_table_modify_settings_query( db_name: &str, table_name: &str, settings: &std::collections::HashMap, + cluster_name: Option<&str>, ) -> Result { if settings.is_empty() { return Err(ClickhouseError::InvalidParameters { @@ -2523,6 +2525,7 @@ pub fn alter_table_modify_settings_query( "db_name": db_name, "table_name": table_name, "settings": settings_str, + "cluster_name": cluster_name, }); Ok(reg.render_template(ALTER_TABLE_MODIFY_SETTINGS_TEMPLATE, &context)?) @@ -2533,6 +2536,7 @@ pub fn alter_table_reset_settings_query( db_name: &str, table_name: &str, setting_names: &[String], + cluster_name: Option<&str>, ) -> Result { if setting_names.is_empty() { return Err(ClickhouseError::InvalidParameters { @@ -2549,6 +2553,7 @@ pub fn alter_table_reset_settings_query( "db_name": db_name, "table_name": table_name, "settings": settings_str, + "cluster_name": cluster_name, }); Ok(reg.render_template(ALTER_TABLE_RESET_SETTINGS_TEMPLATE, &context)?) @@ -4642,6 +4647,60 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; assert!(query.contains("DROP TABLE")); } + #[test] + fn test_alter_table_modify_setting_with_cluster() { + use std::collections::HashMap; + + let mut settings = HashMap::new(); + settings.insert("index_granularity".to_string(), "4096".to_string()); + settings.insert("ttl_only_drop_parts".to_string(), "1".to_string()); + + let query = alter_table_modify_settings_query( + "test_db", + "test_table", + &settings, + Some("test_cluster"), + ) + .unwrap(); + + assert!( + query.contains("ON CLUSTER test_cluster"), + "MODIFY SETTING query should contain ON CLUSTER clause" + ); + assert!(query.contains("ALTER TABLE")); + assert!(query.contains("MODIFY SETTING")); + } + + #[test] + fn test_alter_table_add_column_with_cluster() { + let column = ClickHouseColumn { + name: "new_col".to_string(), + column_type: ClickHouseColumnType::String, + required: false, + primary_key: false, + unique: false, + default: None, + comment: None, + ttl: None, + }; + + let cluster_clause = Some("test_cluster") + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); + + let query = format!( + "ALTER TABLE `test_db`.`test_table`{} ADD COLUMN `{}` String FIRST", + cluster_clause, column.name + ); + + assert!( + query.contains("ON CLUSTER test_cluster"), + "ADD COLUMN query should contain ON CLUSTER clause" + ); + assert!(query.contains("ALTER TABLE")); + assert!(query.contains("ADD COLUMN")); + } + #[test] fn test_replication_params_dev_no_cluster_no_keeper_args_auto_injects() { let result = build_replication_params( @@ -4674,10 +4733,8 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; assert!(result.is_ok()); let params = result.unwrap(); - // Should auto-inject params - assert_eq!(params.len(), 2); - assert!(params[0].contains("/clickhouse/tables/")); - assert!(params[1].contains("{replica}")); + // Dev with cluster: should return empty params (let CH use {uuid} with ON CLUSTER) + assert_eq!(params.len(), 0); } #[test] @@ -4735,7 +4792,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; } #[test] - fn test_replication_params_prod_with_cluster_no_keeper_args_auto_injects() { + fn test_replication_params_prod_with_cluster_no_keeper_args_empty() { let result = build_replication_params( &None, &None, @@ -4747,10 +4804,8 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; assert!(result.is_ok()); let params = result.unwrap(); - // Should auto-inject params when cluster is defined, even in prod - assert_eq!(params.len(), 2); - assert!(params[0].contains("/clickhouse/tables/")); - assert!(params[1].contains("{replica}")); + // Prod with cluster: should return empty params (let CH use {uuid} with ON CLUSTER) + assert_eq!(params.len(), 0); } #[test] diff --git a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs index 77349acd58..f5237e175a 100644 --- a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs +++ b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs @@ -193,6 +193,7 @@ impl AtomicOlapOperation { column: column.clone(), after_column: after_column.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::DropTableColumn { table, @@ -202,6 +203,7 @@ impl AtomicOlapOperation { table: table.name.clone(), column_name: column_name.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::ModifyTableColumn { table, @@ -213,6 +215,7 @@ impl AtomicOlapOperation { before_column: before_column.clone(), after_column: after_column.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::ModifyTableSettings { table, @@ -224,6 +227,7 @@ impl AtomicOlapOperation { before_settings: before_settings.clone(), after_settings: after_settings.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::ModifyTableTtl { table, @@ -235,12 +239,14 @@ impl AtomicOlapOperation { before: before.clone(), after: after.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::AddTableIndex { table, index, .. } => { SerializableOlapOperation::AddTableIndex { table: table.name.clone(), index: index.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), } } AtomicOlapOperation::DropTableIndex { @@ -249,6 +255,7 @@ impl AtomicOlapOperation { table: table.name.clone(), index_name: index_name.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::ModifySampleBy { table, expression, .. @@ -256,11 +263,13 @@ impl AtomicOlapOperation { table: table.name.clone(), expression: expression.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::RemoveSampleBy { table, .. } => { SerializableOlapOperation::RemoveSampleBy { table: table.name.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), } } AtomicOlapOperation::PopulateMaterializedView { From cde545c4ab3c426c0e5c1b872cbab0c84d3341bf Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Thu, 6 Nov 2025 15:14:11 -0700 Subject: [PATCH 15/23] docs --- .../src/framework/core/infrastructure_map.rs | 8 ++- .../olap/clickhouse/diff_strategy.rs | 55 +++++-------------- .../src/infrastructure/olap/clickhouse/mod.rs | 5 +- .../llm-docs/python/table-setup.md | 33 +++++++++++ .../llm-docs/typescript/table-setup.md | 27 +++++++++ .../src/pages/moose/configuration.mdx | 3 + .../pages/moose/olap/planned-migrations.mdx | 2 + 7 files changed, 88 insertions(+), 45 deletions(-) diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index 9ee4d97a52..622b20de46 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -1854,8 +1854,10 @@ impl InfrastructureMap { // Detect engine change (e.g., MergeTree -> ReplacingMergeTree) let engine_changed = table.engine != target_table.engine; - // Detect cluster name change - let cluster_changed = table.cluster_name != target_table.cluster_name; + // Note: We intentionally do NOT check for cluster_name changes here. + // cluster_name is a deployment directive (how to run DDL), not a schema property. + // The inframap will be updated with the new cluster_name value, and future DDL + // operations will use it, but changing cluster_name doesn't trigger operations. let order_by_change = if order_by_changed { OrderByChange { @@ -1897,12 +1899,12 @@ impl InfrastructureMap { // since ClickHouse requires the full column definition when modifying TTL // Only process changes if there are actual differences to report + // Note: cluster_name changes are intentionally excluded - they don't trigger operations if !column_changes.is_empty() || order_by_changed || partition_by_changed || engine_changed || indexes_changed - || cluster_changed { // Use the strategy to determine the appropriate changes let strategy_changes = strategy.diff_table_update( diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index 175bc447c3..9bd1b2e492 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -472,17 +472,11 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { })]; } - // Check if cluster has changed - if before.cluster_name != after.cluster_name { - log::warn!( - "ClickHouse: cluster changed for table '{}' (from {:?} to {:?}), requiring drop+create", - before.name, before.cluster_name, after.cluster_name - ); - return vec![ - OlapChange::Table(TableChange::Removed(before.clone())), - OlapChange::Table(TableChange::Added(after.clone())), - ]; - } + // Note: cluster_name changes are intentionally NOT treated as requiring drop+create. + // cluster_name is a deployment directive (how to run DDL) rather than a schema property + // (what the table looks like). When cluster_name changes, future DDL operations will + // automatically use the new cluster_name via the ON CLUSTER clause, but the table + // itself doesn't need to be recreated. // Check if PARTITION BY has changed let partition_by_changed = partition_by_change.before != partition_by_change.after; @@ -1654,16 +1648,9 @@ mod tests { let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); - // Should return DROP + CREATE (like ORDER BY changes) - assert_eq!(changes.len(), 2); - assert!(matches!( - changes[0], - OlapChange::Table(TableChange::Removed(_)) - )); - assert!(matches!( - changes[1], - OlapChange::Table(TableChange::Added(_)) - )); + // cluster_name is a deployment directive, not a schema property + // Changing it should not trigger any operations + assert_eq!(changes.len(), 0); } #[test] @@ -1684,16 +1671,9 @@ mod tests { let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); - // Should return DROP + CREATE (like ORDER BY changes) - assert_eq!(changes.len(), 2); - assert!(matches!( - changes[0], - OlapChange::Table(TableChange::Removed(_)) - )); - assert!(matches!( - changes[1], - OlapChange::Table(TableChange::Added(_)) - )); + // cluster_name is a deployment directive, not a schema property + // Changing it should not trigger any operations + assert_eq!(changes.len(), 0); } #[test] @@ -1714,16 +1694,9 @@ mod tests { let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); - // Should return DROP + CREATE (like ORDER BY changes) - assert_eq!(changes.len(), 2); - assert!(matches!( - changes[0], - OlapChange::Table(TableChange::Removed(_)) - )); - assert!(matches!( - changes[1], - OlapChange::Table(TableChange::Added(_)) - )); + // cluster_name is a deployment directive, not a schema property + // Changing it should not trigger any operations + assert_eq!(changes.len(), 0); } #[test] diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index 6402579cb9..5219d50ee8 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -1933,7 +1933,10 @@ impl OlapOperations for ConfiguredDBClient { indexes, database: Some(database), table_ttl_setting, - cluster_name: None, // Cluster info not extracted from introspection + // cluster_name is always None from introspection because ClickHouse doesn't store + // the ON CLUSTER clause - it's only used during DDL execution and isn't persisted + // in system tables. Users must manually specify cluster in their table configs. + cluster_name: None, }; debug!("Created table object: {:?}", table); diff --git a/apps/framework-docs/llm-docs/python/table-setup.md b/apps/framework-docs/llm-docs/python/table-setup.md index 5bbcd258bc..9bbb2b1e1e 100644 --- a/apps/framework-docs/llm-docs/python/table-setup.md +++ b/apps/framework-docs/llm-docs/python/table-setup.md @@ -457,6 +457,39 @@ name = "default" **Production:** Cluster names must match your ClickHouse `remote_servers` configuration. +#### Understanding `cluster` as a Deployment Directive + +The `cluster` field is a **deployment directive** that controls HOW Moose runs DDL operations, not WHAT the table looks like: + +- **Changing `cluster` won't recreate your table** - it only affects future DDL operations (CREATE, ALTER, etc.) +- **ClickHouse doesn't store cluster information** - the `ON CLUSTER` clause is only used during DDL execution +- **`moose init --from-remote` & `moose db pull` cannot detect cluster names** - ClickHouse system tables don't preserve this information + +**If you're importing existing tables that were created with `ON CLUSTER`:** +1. Run `moose init --from-remote` to generate your table definitions +2. Manually add `cluster="your_cluster_name"` to the generated table configs +3. Future migrations and DDL operations will correctly use `ON CLUSTER` + +**Example workflow:** +```python +# After moose init --from-remote generates this: +my_table = OlapTable[MySchema]( + "MyTable", + OlapConfig( + order_by_fields=["id"] + ) +) + +# Manually add cluster if you know it was created with ON CLUSTER: +my_table = OlapTable[MySchema]( + "MyTable", + OlapConfig( + order_by_fields=["id"], + cluster="my_cluster" # Add this line + ) +) +``` + ### S3Queue Engine Tables The S3Queue engine enables automatic processing of files from S3 buckets as they arrive. diff --git a/apps/framework-docs/llm-docs/typescript/table-setup.md b/apps/framework-docs/llm-docs/typescript/table-setup.md index aec26eb2ad..59714c4a45 100644 --- a/apps/framework-docs/llm-docs/typescript/table-setup.md +++ b/apps/framework-docs/llm-docs/typescript/table-setup.md @@ -286,6 +286,33 @@ name = "default" **Production:** Cluster names must match your ClickHouse `remote_servers` configuration. +#### Understanding `cluster` as a Deployment Directive + +The `cluster` field is a **deployment directive** that controls HOW Moose runs DDL operations, not WHAT the table looks like: + +- **Changing `cluster` won't recreate your table** - it only affects future DDL operations (CREATE, ALTER, etc.) +- **ClickHouse doesn't store cluster information** - the `ON CLUSTER` clause is only used during DDL execution +- **`moose init --from-remote` & `moose db pull` cannot detect cluster names** - ClickHouse system tables don't preserve this information + +**If you're importing existing tables that were created with `ON CLUSTER`:** +1. Run `moose init --from-remote` to generate your table definitions +2. Manually add `cluster: "your_cluster_name"` to the generated table configs +3. Future migrations and DDL operations will correctly use `ON CLUSTER` + +**Example workflow:** +```typescript +// After moose init --from-remote generates this: +export const MyTable = new OlapTable("MyTable", { + orderByFields: ["id"] +}); + +// Manually add cluster if you know it was created with ON CLUSTER: +export const MyTable = new OlapTable("MyTable", { + orderByFields: ["id"], + cluster: "my_cluster" // Add this line +}); +``` + ### S3Queue Engine Tables The S3Queue engine allows you to automatically process files from S3 buckets as they arrive. diff --git a/apps/framework-docs/src/pages/moose/configuration.mdx b/apps/framework-docs/src/pages/moose/configuration.mdx index cffb1ae179..92fcd7416e 100644 --- a/apps/framework-docs/src/pages/moose/configuration.mdx +++ b/apps/framework-docs/src/pages/moose/configuration.mdx @@ -246,6 +246,9 @@ native_port = 9000 # ClickHouse cluster configuration for replicated tables (optional) # Define clusters for use with ON CLUSTER DDL operations and distributed tables # In local dev, Moose creates single-node clusters. In production, names must match your ClickHouse remote_servers config. +# +# Note: Cluster names are deployment directives that control HOW Moose runs DDL (via ON CLUSTER), +# not schema properties. Changing cluster names in your table configs won't trigger table recreation. # [[clickhouse_config.clusters]] # name = "default" # [[clickhouse_config.clusters]] diff --git a/apps/framework-docs/src/pages/moose/olap/planned-migrations.mdx b/apps/framework-docs/src/pages/moose/olap/planned-migrations.mdx index 5ccea10388..cb31d39a09 100644 --- a/apps/framework-docs/src/pages/moose/olap/planned-migrations.mdx +++ b/apps/framework-docs/src/pages/moose/olap/planned-migrations.mdx @@ -84,6 +84,8 @@ You will commit the entire `migrations/` directory to version control, and Moose Moose makes some assumptions about your schema changes, such as renaming a column instead of dropping and adding. You can modify the plan to override these assumptions. + +Note: The `cluster` field controls which ClickHouse cluster Moose uses for `ON CLUSTER` DDL operations. It's a deployment directive, not a schema property, so changing it won't trigger table recreation. Open `plan.yaml` in your PR. Operations are ordered (teardown first, then setup) to avoid dependency issues. Review like regular code. You can also edit the plan to override the default assumptions Moose makes. From 723da137473fdf1a9f284f78b71049331d22d767 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Thu, 6 Nov 2025 15:35:26 -0700 Subject: [PATCH 16/23] fix rebase --- .../olap/clickhouse/diff_strategy.rs | 70 +++++++++++++++++-- .../src/infrastructure/olap/clickhouse/mod.rs | 2 + 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index 9bd1b2e492..4e27e2e2b3 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -1646,7 +1646,19 @@ mod tests { after: after.order_by.clone(), }; - let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); // cluster_name is a deployment directive, not a schema property // Changing it should not trigger any operations @@ -1669,7 +1681,19 @@ mod tests { after: after.order_by.clone(), }; - let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); // cluster_name is a deployment directive, not a schema property // Changing it should not trigger any operations @@ -1692,7 +1716,19 @@ mod tests { after: after.order_by.clone(), }; - let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); // cluster_name is a deployment directive, not a schema property // Changing it should not trigger any operations @@ -1715,7 +1751,19 @@ mod tests { after: after.order_by.clone(), }; - let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); // Should not trigger a validation error - no changes at all assert_eq!(changes.len(), 0); @@ -1737,7 +1785,19 @@ mod tests { after: after.order_by.clone(), }; - let changes = strategy.diff_table_update(&before, &after, vec![], order_by_change, "local"); + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); // Should not trigger a validation error - no changes at all assert_eq!(changes.len(), 0); diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index 5219d50ee8..ca0cba07e2 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -2912,6 +2912,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra table_settings: None, indexes: vec![], database: None, + cluster_name: None, table_ttl_setting: Some("created_at + INTERVAL 30 DAY".to_string()), }; @@ -2977,6 +2978,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra table_settings: None, indexes: vec![], database: None, + cluster_name: None, table_ttl_setting: Some("created_at + INTERVAL 30 DAY".to_string()), }; From c292254dd8400c5521017b4f1ef77d3913f4d61d Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Fri, 7 Nov 2025 10:19:01 -0700 Subject: [PATCH 17/23] pr feedback --- .../src/framework/core/plan_validator.rs | 72 +++++++++---------- 1 file changed, 34 insertions(+), 38 deletions(-) diff --git a/apps/framework-cli/src/framework/core/plan_validator.rs b/apps/framework-cli/src/framework/core/plan_validator.rs index ddbb638338..7863ec27d6 100644 --- a/apps/framework-cli/src/framework/core/plan_validator.rs +++ b/apps/framework-cli/src/framework/core/plan_validator.rs @@ -20,50 +20,46 @@ fn validate_cluster_references(project: &Project, plan: &InfraPlan) -> Result<() let defined_clusters = project.clickhouse_config.clusters.as_ref(); // Get all cluster names from the defined clusters - let cluster_names: Option> = - defined_clusters.map(|clusters| clusters.iter().map(|c| c.name.clone()).collect()); + let cluster_names: Vec = defined_clusters + .map(|clusters| clusters.iter().map(|c| c.name.clone()).collect()) + .unwrap_or_default(); // Check all tables in the target infrastructure map for table in plan.target_infra_map.tables.values() { if let Some(cluster_name) = &table.cluster_name { // If table has a cluster_name, verify it's defined in the config - match &cluster_names { - None => { - // No clusters defined in config but table references one - return Err(ValidationError::ClusterValidation(format!( - "Table '{}' references cluster '{}', but no clusters are defined in moose.config.toml.\n\ - \n\ - To fix this, add the cluster definition to your config:\n\ - \n\ - [[clickhouse_config.clusters]]\n\ - name = \"{}\"\n", - table.name, cluster_name, cluster_name - ))); - } - Some(names) if !names.contains(cluster_name) => { - // Table references a cluster that's not defined - return Err(ValidationError::ClusterValidation(format!( - "Table '{}' references cluster '{}', which is not defined in moose.config.toml.\n\ - \n\ - Available clusters: {}\n\ - \n\ - To fix this, either:\n\ - 1. Add the cluster to your config:\n\ - [[clickhouse_config.clusters]]\n\ - name = \"{}\"\n\ - \n\ - 2. Or change the table to use an existing cluster: {}\n", - table.name, - cluster_name, - names.join(", "), - cluster_name, - names.join(", ") - ))); - } - _ => { - // Cluster is defined, continue validation - } + if cluster_names.is_empty() { + // No clusters defined in config but table references one + return Err(ValidationError::ClusterValidation(format!( + "Table '{}' references cluster '{}', but no clusters are defined in moose.config.toml.\n\ + \n\ + To fix this, add the cluster definition to your config:\n\ + \n\ + [[clickhouse_config.clusters]]\n\ + name = \"{}\"\n", + table.name, cluster_name, cluster_name + ))); + } else if !cluster_names.contains(cluster_name) { + // Table references a cluster that's not defined + return Err(ValidationError::ClusterValidation(format!( + "Table '{}' references cluster '{}', which is not defined in moose.config.toml.\n\ + \n\ + Available clusters: {}\n\ + \n\ + To fix this, either:\n\ + 1. Add the cluster to your config:\n\ + [[clickhouse_config.clusters]]\n\ + name = \"{}\"\n\ + \n\ + 2. Or change the table to use an existing cluster: {}\n", + table.name, + cluster_name, + cluster_names.join(", "), + cluster_name, + cluster_names.join(", ") + ))); } + // Cluster is defined, continue validation } } From 885b782d179e383b559be5d2b15303b91e1e835c Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Fri, 7 Nov 2025 11:22:45 -0700 Subject: [PATCH 18/23] fix rebase --- .../src/infrastructure/olap/clickhouse/mod.rs | 19 ++++++++++++++----- .../infrastructure/olap/clickhouse/queries.rs | 1 + 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index ca0cba07e2..3be21d87de 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -2543,8 +2543,15 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra ttl: None, }; - let sqls = build_modify_column_sql("test_db", "test_table", &sample_hash_col, false, false) - .unwrap(); + let sqls = build_modify_column_sql( + "test_db", + "test_table", + &sample_hash_col, + false, + false, + None, + ) + .unwrap(); assert_eq!(sqls.len(), 1); // The fix ensures xxHash64(_id) is NOT quoted - if it were quoted, ClickHouse would treat it as a string literal @@ -2565,8 +2572,9 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra ttl: None, }; - let sqls = build_modify_column_sql("test_db", "test_table", &created_at_col, false, false) - .unwrap(); + let sqls = + build_modify_column_sql("test_db", "test_table", &created_at_col, false, false, None) + .unwrap(); assert_eq!(sqls.len(), 1); // The fix ensures now() is NOT quoted @@ -2588,7 +2596,8 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra }; let sqls = - build_modify_column_sql("test_db", "test_table", &status_col, false, false).unwrap(); + build_modify_column_sql("test_db", "test_table", &status_col, false, false, None) + .unwrap(); assert_eq!(sqls.len(), 1); // String literals should preserve their quotes diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 1092359545..aef82acc3d 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -3085,6 +3085,7 @@ ENGINE = MergeTree table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); From d7bb37c6695e55a8a13341dfcbdf8d947c584aef Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 11 Nov 2025 11:35:56 -0700 Subject: [PATCH 19/23] pr feedback --- apps/framework-cli/src/cli.rs | 6 + .../framework-cli/src/cli/routines/migrate.rs | 341 ++++++++++++++---- apps/framework-cli/src/cli/routines/mod.rs | 10 +- 3 files changed, 292 insertions(+), 65 deletions(-) diff --git a/apps/framework-cli/src/cli.rs b/apps/framework-cli/src/cli.rs index c632407248..87c7e78a0d 100644 --- a/apps/framework-cli/src/cli.rs +++ b/apps/framework-cli/src/cli.rs @@ -252,7 +252,13 @@ fn override_project_config_from_url( ) })?; + let clusters = project.clickhouse_config.clusters.clone(); + let additional_databases = project.clickhouse_config.additional_databases.clone(); + project.clickhouse_config = clickhouse_config; + project.clickhouse_config.clusters = clusters; + project.clickhouse_config.additional_databases = additional_databases; + info!( "Overriding project ClickHouse config from CLI: database = {}", project.clickhouse_config.db_name diff --git a/apps/framework-cli/src/cli/routines/migrate.rs b/apps/framework-cli/src/cli/routines/migrate.rs index 143162e327..247c74b83d 100644 --- a/apps/framework-cli/src/cli/routines/migrate.rs +++ b/apps/framework-cli/src/cli/routines/migrate.rs @@ -6,7 +6,7 @@ use crate::framework::core::infrastructure::table::Table; use crate::framework::core::infrastructure_map::InfrastructureMap; use crate::framework::core::migration_plan::MigrationPlan; use crate::framework::core::state_storage::{StateStorage, StateStorageBuilder}; -use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; +use crate::infrastructure::olap::clickhouse::config::{ClickHouseConfig, ClusterConfig}; use crate::infrastructure::olap::clickhouse::IgnorableOperation; use crate::infrastructure::olap::clickhouse::{ check_ready, create_client, ConfiguredDBClient, SerializableOlapOperation, @@ -196,124 +196,226 @@ fn report_drift(drift: &DriftStatus) { } } -/// Validates that all table databases specified in operations are configured -fn validate_table_databases( +/// Validates that all table databases and clusters specified in operations are configured +fn validate_table_databases_and_clusters( operations: &[SerializableOlapOperation], primary_database: &str, additional_databases: &[String], + clusters: &Option>, ) -> Result<()> { let mut invalid_tables = Vec::new(); - - // Helper to validate a database option - let mut validate_db = |db_opt: &Option, table_name: &str| { + let mut invalid_clusters = Vec::new(); + + // Get configured cluster names + let cluster_names: Vec = clusters + .as_ref() + .map(|cs| cs.iter().map(|c| c.name.clone()).collect()) + .unwrap_or_default(); + + log::info!("Configured cluster names: {:?}", cluster_names); + + // Helper to validate database and cluster options + let mut validate = |db_opt: &Option, cluster_opt: &Option, table_name: &str| { + log::info!( + "Validating table '{}' with cluster: {:?}", + table_name, + cluster_opt + ); + // Validate database if let Some(db) = db_opt { if db != primary_database && !additional_databases.contains(db) { invalid_tables.push((table_name.to_string(), db.clone())); } } + // Validate cluster + if let Some(cluster) = cluster_opt { + log::info!( + "Checking if cluster '{}' is in {:?}", + cluster, + cluster_names + ); + // Fail if cluster is not in the configured list (or if list is empty) + if cluster_names.is_empty() || !cluster_names.contains(cluster) { + log::info!("Cluster '{}' not found in configured clusters!", cluster); + invalid_clusters.push((table_name.to_string(), cluster.clone())); + } + } }; for operation in operations { match operation { SerializableOlapOperation::CreateTable { table } => { - validate_db(&table.database, &table.name); + validate(&table.database, &table.cluster_name, &table.name); } SerializableOlapOperation::DropTable { table, database, - cluster_name: _, + cluster_name, } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::AddTableColumn { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::DropTableColumn { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::ModifyTableColumn { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::RenameTableColumn { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::ModifyTableSettings { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::ModifyTableTtl { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::AddTableIndex { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::DropTableIndex { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::ModifySampleBy { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::RemoveSampleBy { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::RawSql { .. } => { - // RawSql doesn't reference specific tables/databases, skip validation + // RawSql doesn't reference specific tables/databases/clusters, skip validation } } } - if !invalid_tables.is_empty() { - let mut error_message = String::from( - "One or more tables specify databases that are not configured in moose.config.toml:\n\n" - ); + // Build error message if we found any issues + let has_errors = !invalid_tables.is_empty() || !invalid_clusters.is_empty(); + if has_errors { + let mut error_message = String::new(); - for (table_name, database) in &invalid_tables { - error_message.push_str(&format!( - " • Table '{}' specifies database '{}'\n", - table_name, database - )); - } + // Report database errors + if !invalid_tables.is_empty() { + error_message.push_str( + "One or more tables specify databases that are not configured in moose.config.toml:\n\n", + ); - error_message - .push_str("\nTo fix this, add the missing database(s) to your moose.config.toml:\n\n"); - error_message.push_str("[clickhouse_config]\n"); - error_message.push_str(&format!("db_name = \"{}\"\n", primary_database)); - error_message.push_str("additional_databases = ["); + for (table_name, database) in &invalid_tables { + error_message.push_str(&format!( + " • Table '{}' specifies database '{}'\n", + table_name, database + )); + } - let mut all_databases: Vec = additional_databases.to_vec(); - for (_, db) in &invalid_tables { - if !all_databases.contains(db) { - all_databases.push(db.clone()); + error_message.push_str( + "\nTo fix this, add the missing database(s) to your moose.config.toml:\n\n", + ); + error_message.push_str("[clickhouse_config]\n"); + error_message.push_str(&format!("db_name = \"{}\"\n", primary_database)); + error_message.push_str("additional_databases = ["); + + let mut all_databases: Vec = additional_databases.to_vec(); + for (_, db) in &invalid_tables { + if !all_databases.contains(db) { + all_databases.push(db.clone()); + } } + all_databases.sort(); + + let db_list = all_databases + .iter() + .map(|db| format!("\"{}\"", db)) + .collect::>() + .join(", "); + error_message.push_str(&db_list); + error_message.push_str("]\n"); } - all_databases.sort(); - let db_list = all_databases - .iter() - .map(|db| format!("\"{}\"", db)) - .collect::>() - .join(", "); - error_message.push_str(&db_list); - error_message.push_str("]\n"); + // Report cluster errors + if !invalid_clusters.is_empty() { + if !invalid_tables.is_empty() { + error_message.push('\n'); + } + + error_message.push_str( + "One or more tables specify clusters that are not configured in moose.config.toml:\n\n", + ); + + for (table_name, cluster) in &invalid_clusters { + error_message.push_str(&format!( + " • Table '{}' specifies cluster '{}'\n", + table_name, cluster + )); + } + + error_message.push_str( + "\nTo fix this, add the missing cluster(s) to your moose.config.toml:\n\n", + ); + + // Only show the missing clusters in the error message, not the already configured ones + let mut missing_clusters: Vec = invalid_clusters + .iter() + .map(|(_, cluster)| cluster.clone()) + .collect(); + missing_clusters.sort(); + missing_clusters.dedup(); + + for cluster in &missing_clusters { + error_message.push_str(&format!("[[clickhouse_config.clusters]]\n")); + error_message.push_str(&format!("name = \"{}\"\n\n", cluster)); + } + } anyhow::bail!(error_message); } @@ -345,11 +447,16 @@ async fn execute_operations( migration_plan.operations.len() ); - // Validate that all table databases are configured - validate_table_databases( + // Validate that all table databases and clusters are configured + log::info!( + "Validating operations against config. Clusters: {:?}", + project.clickhouse_config.clusters + ); + validate_table_databases_and_clusters( &migration_plan.operations, &project.clickhouse_config.db_name, &project.clickhouse_config.additional_databases, + &project.clickhouse_config.clusters, )?; let is_dev = !project.is_production; @@ -969,7 +1076,7 @@ mod tests { }]; // Primary database matches - should pass - let result = validate_table_databases(&operations, "local", &[]); + let result = validate_table_databases_and_clusters(&operations, "local", &[], &None); assert!(result.is_ok()); // Database in additional_databases - should pass @@ -978,7 +1085,12 @@ mod tests { let operations = vec![SerializableOlapOperation::CreateTable { table: table_analytics, }]; - let result = validate_table_databases(&operations, "local", &["analytics".to_string()]); + let result = validate_table_databases_and_clusters( + &operations, + "local", + &["analytics".to_string()], + &None, + ); assert!(result.is_ok()); } @@ -990,7 +1102,7 @@ mod tests { let operations = vec![SerializableOlapOperation::CreateTable { table }]; // Database not in config - should fail - let result = validate_table_databases(&operations, "local", &[]); + let result = validate_table_databases_and_clusters(&operations, "local", &[], &None); assert!(result.is_err()); let err = result.unwrap_err().to_string(); assert!(err.contains("unconfigured_db")); @@ -1052,7 +1164,7 @@ mod tests { }, ]; - let result = validate_table_databases(&operations, "local", &[]); + let result = validate_table_databases_and_clusters(&operations, "local", &[], &None); assert!(result.is_err()); let err = result.unwrap_err().to_string(); // Should report both bad databases @@ -1068,7 +1180,108 @@ mod tests { description: "test".to_string(), }]; - let result = validate_table_databases(&operations, "local", &[]); + let result = validate_table_databases_and_clusters(&operations, "local", &[], &None); assert!(result.is_ok()); } + + #[test] + fn test_validate_cluster_valid() { + let mut table = create_test_table("users"); + table.cluster_name = Some("my_cluster".to_string()); + + let operations = vec![SerializableOlapOperation::CreateTable { + table: table.clone(), + }]; + + let clusters = Some(vec![ClusterConfig { + name: "my_cluster".to_string(), + }]); + + // Cluster is configured - should pass + let result = validate_table_databases_and_clusters(&operations, "local", &[], &clusters); + assert!(result.is_ok()); + } + + #[test] + fn test_validate_cluster_invalid() { + let mut table = create_test_table("users"); + table.cluster_name = Some("unconfigured_cluster".to_string()); + + let operations = vec![SerializableOlapOperation::CreateTable { table }]; + + let clusters = Some(vec![ + ClusterConfig { + name: "my_cluster".to_string(), + }, + ClusterConfig { + name: "another_cluster".to_string(), + }, + ]); + + // Cluster not in config - should fail and show available clusters + let result = validate_table_databases_and_clusters(&operations, "local", &[], &clusters); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("unconfigured_cluster"), + "Error should mention the invalid cluster" + ); + assert!( + err.contains("moose.config.toml"), + "Error should reference config file" + ); + } + + #[test] + fn test_validate_cluster_no_clusters_configured() { + let mut table = create_test_table("users"); + table.cluster_name = Some("some_cluster".to_string()); + + let operations = vec![SerializableOlapOperation::CreateTable { table }]; + + // No clusters configured but table references one - should fail + let result = validate_table_databases_and_clusters(&operations, "local", &[], &None); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("some_cluster")); + } + + #[test] + fn test_validate_both_database_and_cluster_invalid() { + let mut table = create_test_table("users"); + table.database = Some("bad_db".to_string()); + table.cluster_name = Some("bad_cluster".to_string()); + + let operations = vec![SerializableOlapOperation::CreateTable { table }]; + + let clusters = Some(vec![ClusterConfig { + name: "good_cluster".to_string(), + }]); + + // Both database and cluster invalid - should report both errors + let result = validate_table_databases_and_clusters(&operations, "local", &[], &clusters); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("bad_db")); + assert!(err.contains("bad_cluster")); + } + + #[test] + fn test_validate_cluster_in_drop_table_operation() { + let operations = vec![SerializableOlapOperation::DropTable { + table: "users".to_string(), + database: None, + cluster_name: Some("unconfigured_cluster".to_string()), + }]; + + let clusters = Some(vec![ClusterConfig { + name: "my_cluster".to_string(), + }]); + + // DropTable with invalid cluster - should fail + let result = validate_table_databases_and_clusters(&operations, "local", &[], &clusters); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("unconfigured_cluster")); + } } diff --git a/apps/framework-cli/src/cli/routines/mod.rs b/apps/framework-cli/src/cli/routines/mod.rs index 49492b11c8..38a49280b8 100644 --- a/apps/framework-cli/src/cli/routines/mod.rs +++ b/apps/framework-cli/src/cli/routines/mod.rs @@ -1181,8 +1181,16 @@ pub async fn remote_gen_migration( }, ); + // Validate the plan before generating migration files + let plan = InfraPlan { + target_infra_map: local_infra_map.clone(), + changes, + }; + + plan_validator::validate(project, &plan)?; + let db_migration = - MigrationPlan::from_infra_plan(&changes, &project.clickhouse_config.db_name)?; + MigrationPlan::from_infra_plan(&plan.changes, &project.clickhouse_config.db_name)?; Ok(MigrationPlanWithBeforeAfter { remote_state: remote_infra_map, From 4a910b2a3c50aa1ffd81760048b2bb1879a29829 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 11 Nov 2025 12:06:47 -0700 Subject: [PATCH 20/23] clippy --- apps/framework-cli/src/cli/routines/migrate.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/framework-cli/src/cli/routines/migrate.rs b/apps/framework-cli/src/cli/routines/migrate.rs index 247c74b83d..8eb2e87bfd 100644 --- a/apps/framework-cli/src/cli/routines/migrate.rs +++ b/apps/framework-cli/src/cli/routines/migrate.rs @@ -412,7 +412,7 @@ fn validate_table_databases_and_clusters( missing_clusters.dedup(); for cluster in &missing_clusters { - error_message.push_str(&format!("[[clickhouse_config.clusters]]\n")); + error_message.push_str("[[clickhouse_config.clusters]]\n"); error_message.push_str(&format!("name = \"{}\"\n\n", cluster)); } } From dc4b7c5823f3d52e9e8605e664e88f545eb35e4c Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Thu, 13 Nov 2025 15:30:27 -0700 Subject: [PATCH 21/23] migrate docs --- .../content/moosestack/configuration.mdx | 11 ++ .../content/moosestack/olap/model-table.mdx | 107 ++++++++++++++++-- .../src/pages/moose/olap/model-table.mdx | 104 +++++++++++++++-- 3 files changed, 208 insertions(+), 14 deletions(-) diff --git a/apps/framework-docs-v2/content/moosestack/configuration.mdx b/apps/framework-docs-v2/content/moosestack/configuration.mdx index 28702959eb..425f97079b 100644 --- a/apps/framework-docs-v2/content/moosestack/configuration.mdx +++ b/apps/framework-docs-v2/content/moosestack/configuration.mdx @@ -233,6 +233,17 @@ native_port = 9000 # Optional list of additional databases to create on startup (Default: []) # additional_databases = ["analytics", "staging"] +# ClickHouse cluster configuration for replicated tables (optional) +# Define clusters for use with ON CLUSTER DDL operations and distributed tables +# In local dev, Moose creates single-node clusters. In production, names must match your ClickHouse remote_servers config. +# +# Note: Cluster names are deployment directives that control HOW Moose runs DDL (via ON CLUSTER), +# not schema properties. Changing cluster names in your table configs won't trigger table recreation. +# [[clickhouse_config.clusters]] +# name = "default" +# [[clickhouse_config.clusters]] +# name = "my_cluster" + # HTTP server configuration for local development [http_server_config] # Host to bind the webserver to (Default: "localhost") diff --git a/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx b/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx index 865fcdc908..a9ee3b35c1 100644 --- a/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx +++ b/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx @@ -1070,18 +1070,111 @@ cloud_replicated = OlapTable[Record]("cloud_records", OlapConfig( - -The `keeper_path` and `replica_name` parameters are **optional** for replicated engines: +##### Configuring Replication -- **Omit both parameters** (recommended): Moose uses smart defaults that work in both ClickHouse Cloud and self-managed environments. The default path pattern `/clickhouse/tables/{uuid}/{shard}` with replica `{replica}` works automatically with Atomic databases (default in modern ClickHouse). - -- **Provide custom paths**: You can still specify both parameters explicitly if you need custom replication paths for your self-managed cluster. +Replicated engines support three configuration approaches. Choose the one that fits your deployment: -**Note**: Both parameters must be provided together, or both omitted. The `{uuid}`, `{shard}`, and `{replica}` macros are automatically substituted by ClickHouse at runtime. +1. Default Configuration (Recommended) -For more details, see the [ClickHouse documentation on data replication](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication). +Omit all replication parameters. Moose uses smart defaults that work in both ClickHouse Cloud and self-managed environments: + + + +```ts filename="DefaultReplication.ts" copy +const table = new OlapTable("my_table", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"] + // No keeper_path, replica_name, or cluster needed +}); +``` + + +```py filename="DefaultReplication.py" copy +table = OlapTable[Record]("my_table", OlapConfig( + engine=ReplicatedMergeTreeEngine(), # No parameters + order_by_fields=["id"] +)) +``` + + + +Moose auto-injects: `/clickhouse/tables/{database}/{shard}/{table_name}` and `{replica}` in local development. ClickHouse Cloud uses its own patterns automatically. + +2. Cluster Configuration + +For multi-node deployments, specify a cluster name to use `ON CLUSTER` DDL operations: + + + +```ts filename="ClusterReplication.ts" copy +const table = new OlapTable("my_table", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + cluster: "default" // References cluster from moose.config.toml +}); +``` + + +```py filename="ClusterReplication.py" copy +table = OlapTable[Record]("my_table", OlapConfig( + engine=ReplicatedMergeTreeEngine(), + order_by_fields=["id"], + cluster="default" # References cluster from moose.config.toml +)) +``` + + + +**Configuration in `moose.config.toml`:** +```toml +[[clickhouse_config.clusters]] +name = "default" +``` + +**Use when:** +- Running multi-node self-managed ClickHouse with cluster configuration +- Need `ON CLUSTER` DDL for distributed operations + +3. Explicit Replication Paths + +For custom replication topology, specify both `keeper_path` and `replica_name`: + + + +```ts filename="ExplicitReplication.ts" copy +const table = new OlapTable("my_table", { + engine: ClickHouseEngines.ReplicatedMergeTree, + keeperPath: "/clickhouse/tables/{database}/{shard}/my_table", + replicaName: "{replica}", + orderByFields: ["id"] +}); +``` + + +```py filename="ExplicitReplication.py" copy +table = OlapTable[Record]("my_table", OlapConfig( + engine=ReplicatedMergeTreeEngine( + keeper_path="/clickhouse/tables/{database}/{shard}/my_table", + replica_name="{replica}" + ), + order_by_fields=["id"] +)) +``` + + + +**Use when:** +- Need custom replication paths for advanced configurations +- Both parameters must be provided together + + +**Cannot mix approaches:** Specifying both `cluster` and explicit `keeper_path`/`replica_name` will cause an error. Choose one approach. + +**Cluster is a deployment directive:** Changing `cluster` won't recreate your table—it only affects future DDL operations. +For more details, see the [ClickHouse documentation on data replication](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication). + ### Irregular column names and Python Aliases diff --git a/apps/framework-docs/src/pages/moose/olap/model-table.mdx b/apps/framework-docs/src/pages/moose/olap/model-table.mdx index 300fcc55b8..4bc5c28d30 100644 --- a/apps/framework-docs/src/pages/moose/olap/model-table.mdx +++ b/apps/framework-docs/src/pages/moose/olap/model-table.mdx @@ -1050,18 +1050,108 @@ cloud_replicated = OlapTable[Record]("cloud_records", OlapConfig( ``` - -The `keeper_path` and `replica_name` parameters are **optional** for replicated engines: +##### Configuring Replication -- **Omit both parameters** (recommended): Moose uses smart defaults that work in both ClickHouse Cloud and self-managed environments. The default path pattern `/clickhouse/tables/{uuid}/{shard}` with replica `{replica}` works automatically with Atomic databases (default in modern ClickHouse). - -- **Provide custom paths**: You can still specify both parameters explicitly if you need custom replication paths for your self-managed cluster. +Replicated engines support three configuration approaches. Choose the one that fits your deployment: -**Note**: Both parameters must be provided together, or both omitted. The `{uuid}`, `{shard}`, and `{replica}` macros are automatically substituted by ClickHouse at runtime. +1. Default Configuration (Recommended) -For more details, see the [ClickHouse documentation on data replication](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication). +Omit all replication parameters. Moose uses smart defaults that work in both ClickHouse Cloud and self-managed environments: + + +```ts filename="DefaultReplication.ts" copy +const table = new OlapTable("my_table", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"] + // No keeper_path, replica_name, or cluster needed +}); +``` + + + +```py filename="DefaultReplication.py" copy +table = OlapTable[Record]("my_table", OlapConfig( + engine=ReplicatedMergeTreeEngine(), # No parameters + order_by_fields=["id"] +)) +``` + + +Moose auto-injects: `/clickhouse/tables/{database}/{shard}/{table_name}` and `{replica}` in local development. ClickHouse Cloud uses its own patterns automatically. + +2. Cluster Configuration + +For multi-node deployments, specify a cluster name to use `ON CLUSTER` DDL operations: + + +```ts filename="ClusterReplication.ts" copy +const table = new OlapTable("my_table", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + cluster: "default" // References cluster from moose.config.toml +}); +``` + + + +```py filename="ClusterReplication.py" copy +table = OlapTable[Record]("my_table", OlapConfig( + engine=ReplicatedMergeTreeEngine(), + order_by_fields=["id"], + cluster="default" # References cluster from moose.config.toml +)) +``` + + +**Configuration in `moose.config.toml`:** +```toml +[[clickhouse_config.clusters]] +name = "default" +``` + +**Use when:** +- Running multi-node self-managed ClickHouse with cluster configuration +- Need `ON CLUSTER` DDL for distributed operations + +3. Explicit Replication Paths + +For custom replication topology, specify both `keeper_path` and `replica_name`: + + +```ts filename="ExplicitReplication.ts" copy +const table = new OlapTable("my_table", { + engine: ClickHouseEngines.ReplicatedMergeTree, + keeperPath: "/clickhouse/tables/{database}/{shard}/my_table", + replicaName: "{replica}", + orderByFields: ["id"] +}); +``` + + + +```py filename="ExplicitReplication.py" copy +table = OlapTable[Record]("my_table", OlapConfig( + engine=ReplicatedMergeTreeEngine( + keeper_path="/clickhouse/tables/{database}/{shard}/my_table", + replica_name="{replica}" + ), + order_by_fields=["id"] +)) +``` + + +**Use when:** +- Need custom replication paths for advanced configurations +- Both parameters must be provided together + + +**Cannot mix approaches:** Specifying both `cluster` and explicit `keeper_path`/`replica_name` will cause an error. Choose one approach. + +**Cluster is a deployment directive:** Changing `cluster` won't recreate your table -— it only affects future DDL operations. +For more details, see the [ClickHouse documentation on data replication](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication). + ### Irregular column names and Python Aliases From ccb8acd3de737886f953bf4f90a16f007bca0818 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Thu, 13 Nov 2025 15:37:34 -0700 Subject: [PATCH 22/23] fix docs --- .../content/moosestack/olap/model-table.mdx | 6 +++--- apps/framework-docs/src/pages/moose/olap/model-table.mdx | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx b/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx index a9ee3b35c1..b0f98c4c28 100644 --- a/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx +++ b/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx @@ -1074,7 +1074,7 @@ cloud_replicated = OlapTable[Record]("cloud_records", OlapConfig( Replicated engines support three configuration approaches. Choose the one that fits your deployment: -1. Default Configuration (Recommended) +###### Default Omit all replication parameters. Moose uses smart defaults that work in both ClickHouse Cloud and self-managed environments: @@ -1100,7 +1100,7 @@ table = OlapTable[Record]("my_table", OlapConfig( Moose auto-injects: `/clickhouse/tables/{database}/{shard}/{table_name}` and `{replica}` in local development. ClickHouse Cloud uses its own patterns automatically. -2. Cluster Configuration +###### Cluster For multi-node deployments, specify a cluster name to use `ON CLUSTER` DDL operations: @@ -1135,7 +1135,7 @@ name = "default" - Running multi-node self-managed ClickHouse with cluster configuration - Need `ON CLUSTER` DDL for distributed operations -3. Explicit Replication Paths +###### Replication Paths For custom replication topology, specify both `keeper_path` and `replica_name`: diff --git a/apps/framework-docs/src/pages/moose/olap/model-table.mdx b/apps/framework-docs/src/pages/moose/olap/model-table.mdx index 4bc5c28d30..a4082712f6 100644 --- a/apps/framework-docs/src/pages/moose/olap/model-table.mdx +++ b/apps/framework-docs/src/pages/moose/olap/model-table.mdx @@ -1054,7 +1054,7 @@ cloud_replicated = OlapTable[Record]("cloud_records", OlapConfig( Replicated engines support three configuration approaches. Choose the one that fits your deployment: -1. Default Configuration (Recommended) +###### Default Omit all replication parameters. Moose uses smart defaults that work in both ClickHouse Cloud and self-managed environments: @@ -1079,7 +1079,7 @@ table = OlapTable[Record]("my_table", OlapConfig( Moose auto-injects: `/clickhouse/tables/{database}/{shard}/{table_name}` and `{replica}` in local development. ClickHouse Cloud uses its own patterns automatically. -2. Cluster Configuration +###### Cluster For multi-node deployments, specify a cluster name to use `ON CLUSTER` DDL operations: @@ -1113,7 +1113,7 @@ name = "default" - Running multi-node self-managed ClickHouse with cluster configuration - Need `ON CLUSTER` DDL for distributed operations -3. Explicit Replication Paths +###### Replication Paths For custom replication topology, specify both `keeper_path` and `replica_name`: From a201fdd6f29a05cec6f1fda437730c03a1046e86 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Fri, 14 Nov 2025 17:43:32 -0700 Subject: [PATCH 23/23] ok --- .../infrastructure/olap/clickhouse/queries.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index aef82acc3d..ab4fceaae6 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -2463,7 +2463,7 @@ pub fn create_table_query( } pub static DROP_TABLE_TEMPLATE: &str = r#" -DROP TABLE IF EXISTS `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}}{{/if}} SYNC; +DROP TABLE IF EXISTS `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}} SYNC{{/if}}; "#; pub fn drop_table_query( @@ -4623,8 +4623,11 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; "DROP query should contain ON CLUSTER clause" ); - // Should have SYNC (always present) - assert!(query.contains("SYNC"), "DROP query should contain SYNC"); + // Should have SYNC (when using ON CLUSTER) + assert!( + query.contains("SYNC"), + "DROP query should contain SYNC with ON CLUSTER" + ); // Should have DROP TABLE assert!(query.contains("DROP TABLE")); @@ -4641,8 +4644,11 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; "DROP query should not contain ON CLUSTER clause when cluster_name is None" ); - // Should still have SYNC (for replicated tables) - assert!(query.contains("SYNC"), "DROP query should contain SYNC"); + // Should NOT have SYNC (only needed with ON CLUSTER) + assert!( + !query.contains("SYNC"), + "DROP query should not contain SYNC without ON CLUSTER" + ); // Should still have DROP TABLE assert!(query.contains("DROP TABLE"));