From 4c43dba16441c15001ab8c4465d62804f0fcc427 Mon Sep 17 00:00:00 2001 From: Anderson Nogueira Date: Wed, 27 Aug 2025 12:29:33 +0200 Subject: [PATCH 1/2] Add comprehensive OpenShift cluster destroyer script - Safely destroys OpenShift clusters on AWS with all associated resources - Supports multiple destruction methods: openshift-install and manual AWS cleanup - Handles orphaned clusters without state files - Includes dry-run mode for preview without deletion - Comprehensive resource counting and detailed listing - Route53 DNS and S3 state cleanup - Safety features: confirmation prompts, detailed logging - Auto-detects infrastructure ID from cluster name - Properly counts nested VPC resources (subnets, security groups, etc.) --- scripts/destroy-openshift-cluster.sh | 1042 ++++++++++++++++++++++++++ 1 file changed, 1042 insertions(+) create mode 100755 scripts/destroy-openshift-cluster.sh diff --git a/scripts/destroy-openshift-cluster.sh b/scripts/destroy-openshift-cluster.sh new file mode 100755 index 0000000000..5c6149d9d5 --- /dev/null +++ b/scripts/destroy-openshift-cluster.sh @@ -0,0 +1,1042 @@ +#!/bin/bash +# +# OpenShift Cluster Destroyer Script +# +# This script can destroy OpenShift clusters in various states: +# - Properly installed clusters with metadata.json +# - Orphaned clusters without state files +# - Partially created clusters that failed during installation +# +# Usage: ./destroy-openshift-cluster.sh [OPTIONS] +# +# Required parameters (one of): +# --cluster-name NAME Base cluster name (will auto-detect infra-id) +# --infra-id ID Infrastructure ID (e.g., cluster-name-xxxxx) +# --metadata-file PATH Path to metadata.json file +# +# Optional parameters: +# --region REGION AWS region (default: us-east-2) +# --profile PROFILE AWS profile (default: percona-dev-admin) +# --base-domain DOMAIN Base domain for Route53 (default: cd.percona.com) +# --dry-run Show what would be deleted without actually deleting +# --force Skip confirmation prompts +# --verbose Enable verbose output +# --s3-bucket BUCKET S3 bucket for state files (auto-detected if not provided) +# --help Show this help message + +set -euo pipefail + +# Default values +AWS_REGION="${AWS_REGION:-us-east-2}" +AWS_PROFILE="${AWS_PROFILE:-percona-dev-admin}" +BASE_DOMAIN="${BASE_DOMAIN:-cd.percona.com}" +DRY_RUN=false +FORCE=false +VERBOSE=false +CLUSTER_NAME="" +INFRA_ID="" +METADATA_FILE="" +S3_BUCKET="" +LOG_FILE="/tmp/openshift-destroy-$(date +%Y%m%d-%H%M%S).log" + +# Color codes for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +BOLD='\033[1m' +NC='\033[0m' # No Color + +# Logging functions +log() { + echo -e "${1}" | tee -a "$LOG_FILE" +} + +log_info() { + log "${BLUE}[INFO]${NC} ${1}" +} + +log_success() { + log "${GREEN}[SUCCESS]${NC} ${1}" +} + +log_warning() { + log "${YELLOW}[WARNING]${NC} ${1}" +} + +log_error() { + log "${RED}[ERROR]${NC} ${1}" +} + +log_debug() { + if [[ "$VERBOSE" == "true" ]]; then + log "[DEBUG] ${1}" + fi +} + +# Help function +show_help() { + cat << EOF +OpenShift Cluster Destroyer Script + +This script safely removes OpenShift clusters and all associated AWS resources. + +USAGE: + $(basename "$0") [OPTIONS] + +REQUIRED (one of): + --cluster-name NAME Base cluster name (will auto-detect infra-id) + --infra-id ID Infrastructure ID (e.g., cluster-name-xxxxx) + --metadata-file PATH Path to metadata.json file + +OPTIONS: + --region REGION AWS region (default: us-east-2) + --profile PROFILE AWS profile (default: percona-dev-admin) + --base-domain DOMAIN Base domain for Route53 (default: cd.percona.com) + --dry-run Show what would be deleted without actually deleting + --force Skip confirmation prompts + --verbose Enable verbose output + --s3-bucket BUCKET S3 bucket for state files (auto-detected if not provided) + --help Show this help message + +EXAMPLES: + # Destroy using cluster name (auto-detects infra-id) + $(basename "$0") --cluster-name helm-test + + # Destroy using specific infrastructure ID + $(basename "$0") --infra-id helm-test-tqtlx + + # Dry run to see what would be deleted + $(basename "$0") --cluster-name test-cluster --dry-run + + # Destroy using metadata file + $(basename "$0") --metadata-file /path/to/metadata.json + + # Force deletion without prompts + $(basename "$0") --infra-id helm-test-tqtlx --force + +NOTES: + - The script will attempt to use openshift-install if metadata exists + - Falls back to manual AWS resource deletion for orphaned clusters + - All operations are logged to: $LOG_FILE + +EOF + exit 0 +} + +# Parse command line arguments +parse_args() { + while [[ $# -gt 0 ]]; do + case $1 in + --cluster-name) + CLUSTER_NAME="$2" + shift 2 + ;; + --infra-id) + INFRA_ID="$2" + shift 2 + ;; + --metadata-file) + METADATA_FILE="$2" + shift 2 + ;; + --region) + AWS_REGION="$2" + shift 2 + ;; + --profile) + AWS_PROFILE="$2" + shift 2 + ;; + --base-domain) + BASE_DOMAIN="$2" + shift 2 + ;; + --s3-bucket) + S3_BUCKET="$2" + shift 2 + ;; + --dry-run) + DRY_RUN=true + shift + ;; + --force) + FORCE=true + shift + ;; + --verbose) + VERBOSE=true + shift + ;; + --help|-h) + show_help + ;; + *) + log_error "Unknown option: $1" + show_help + ;; + esac + done +} + +# Validate inputs +validate_inputs() { + # Check if at least one identifier is provided + if [[ -z "$CLUSTER_NAME" && -z "$INFRA_ID" && -z "$METADATA_FILE" ]]; then + log_error "You must provide either --cluster-name, --infra-id, or --metadata-file" + show_help + fi + + # Check AWS credentials + if ! aws sts get-caller-identity --profile "$AWS_PROFILE" &>/dev/null; then + log_error "Failed to authenticate with AWS profile: $AWS_PROFILE" + log_info "Try running: aws sso login --profile $AWS_PROFILE" + exit 1 + fi + + # Auto-detect S3 bucket if not provided + if [[ -z "$S3_BUCKET" ]]; then + local account_id=$(aws sts get-caller-identity --profile "$AWS_PROFILE" --query Account --output text) + S3_BUCKET="openshift-clusters-${account_id}-${AWS_REGION}" + log_debug "Auto-detected S3 bucket: $S3_BUCKET" + fi +} + +# Extract metadata from file +extract_metadata() { + local metadata_file="$1" + + if [[ -f "$metadata_file" ]]; then + INFRA_ID=$(jq -r '.infraID' "$metadata_file" 2>/dev/null || echo "") + CLUSTER_NAME=$(jq -r '.clusterName' "$metadata_file" 2>/dev/null || echo "") + AWS_REGION=$(jq -r '.aws.region // .platform.aws.region' "$metadata_file" 2>/dev/null || echo "$AWS_REGION") + + if [[ -n "$INFRA_ID" ]]; then + log_info "Extracted from metadata: cluster=$CLUSTER_NAME, infra-id=$INFRA_ID, region=$AWS_REGION" + return 0 + fi + fi + + return 1 +} + +# Auto-detect infrastructure ID from AWS resources +detect_infra_id() { + local cluster_name="$1" + + log_info "Searching for infrastructure ID for cluster: $cluster_name" + + # Search for VPCs with cluster tags + local vpc_tags=$(aws ec2 describe-vpcs \ + --filters "Name=tag-key,Values=kubernetes.io/cluster/${cluster_name}*" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "Vpcs[].Tags[?starts_with(Key, 'kubernetes.io/cluster/')].Key" \ + --output text 2>/dev/null) + + if [[ -n "$vpc_tags" ]]; then + # Extract infra ID from tag + INFRA_ID=$(echo "$vpc_tags" | sed 's/kubernetes.io\/cluster\///' | head -1) + log_success "Auto-detected infrastructure ID: $INFRA_ID" + return 0 + fi + + # Try S3 metadata + if aws s3 ls "s3://${S3_BUCKET}/${cluster_name}/metadata.json" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" &>/dev/null; then + + local temp_metadata="/tmp/${cluster_name}-metadata.json" + aws s3 cp "s3://${S3_BUCKET}/${cluster_name}/metadata.json" "$temp_metadata" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" 2>/dev/null + + if extract_metadata "$temp_metadata"; then + rm -f "$temp_metadata" + return 0 + fi + rm -f "$temp_metadata" + fi + + log_warning "Could not auto-detect infrastructure ID for cluster: $cluster_name" + return 1 +} + +# Count AWS resources for a cluster +count_resources() { + local infra_id="$1" + local resource_count=0 + + # Log to stderr so it doesn't interfere with return value + log_info "Counting resources for infrastructure ID: $infra_id" >&2 + + # EC2 Instances + local instances=$(aws ec2 describe-instances \ + --filters "Name=tag:kubernetes.io/cluster/$infra_id,Values=owned" \ + "Name=instance-state-name,Values=running,stopped,stopping,pending" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "Reservations[].Instances[].InstanceId" --output text 2>/dev/null | wc -w) + ((resource_count += instances)) + [[ $instances -gt 0 ]] && log_info " EC2 Instances: $instances" >&2 + + # Load Balancers + local elbs=$(aws elb describe-load-balancers \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "LoadBalancerDescriptions[?contains(LoadBalancerName, '$infra_id')].LoadBalancerName" \ + --output text 2>/dev/null | wc -w) + ((resource_count += elbs)) + [[ $elbs -gt 0 ]] && log_info " Classic Load Balancers: $elbs" >&2 + + local nlbs=$(aws elbv2 describe-load-balancers \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "LoadBalancers[?contains(LoadBalancerName, '$infra_id')].LoadBalancerArn" \ + --output text 2>/dev/null | wc -w) + ((resource_count += nlbs)) + [[ $nlbs -gt 0 ]] && log_info " Network/Application Load Balancers: $nlbs" >&2 + + # NAT Gateways + local nats=$(aws ec2 describe-nat-gateways \ + --filter "Name=tag:kubernetes.io/cluster/$infra_id,Values=owned" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "NatGateways[?State!='deleted'].NatGatewayId" --output text 2>/dev/null | wc -w) + ((resource_count += nats)) + [[ $nats -gt 0 ]] && log_info " NAT Gateways: $nats" >&2 + + # Elastic IPs + local eips=$(aws ec2 describe-addresses \ + --filters "Name=tag:kubernetes.io/cluster/$infra_id,Values=owned" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "Addresses[].AllocationId" --output text 2>/dev/null | wc -w) + ((resource_count += eips)) + [[ $eips -gt 0 ]] && log_info " Elastic IPs: $eips" >&2 + + # VPCs and their nested resources + local vpcs=$(aws ec2 describe-vpcs \ + --filters "Name=tag:kubernetes.io/cluster/$infra_id,Values=owned" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "Vpcs[].VpcId" --output text 2>/dev/null | wc -w) + + if [[ $vpcs -gt 0 ]]; then + local vpc_id=$(aws ec2 describe-vpcs \ + --filters "Name=tag:kubernetes.io/cluster/$infra_id,Values=owned" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "Vpcs[0].VpcId" --output text 2>/dev/null) + + if [[ "$vpc_id" != "None" && -n "$vpc_id" ]]; then + # Count VPC itself + ((resource_count += 1)) + log_info " VPCs: 1" >&2 + + # Count subnets + local subnet_count=$(aws ec2 describe-subnets \ + --filters "Name=vpc-id,Values=$vpc_id" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "Subnets | length(@)" --output text 2>/dev/null || echo 0) + ((resource_count += subnet_count)) + [[ $subnet_count -gt 0 ]] && log_info " Subnets: $subnet_count" >&2 + + # Count security groups (excluding default) + local sg_count=$(aws ec2 describe-security-groups \ + --filters "Name=vpc-id,Values=$vpc_id" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "SecurityGroups[?GroupName!='default'] | length(@)" --output text 2>/dev/null || echo 0) + ((resource_count += sg_count)) + [[ $sg_count -gt 0 ]] && log_info " Security Groups: $sg_count" >&2 + + # Count route tables (excluding main) + local rt_count=$(aws ec2 describe-route-tables \ + --filters "Name=vpc-id,Values=$vpc_id" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "RouteTables[?Associations[0].Main!=\`true\`] | length(@)" --output text 2>/dev/null || echo 0) + ((resource_count += rt_count)) + [[ $rt_count -gt 0 ]] && log_info " Route Tables: $rt_count" >&2 + + # Count Internet Gateways + local igw_count=$(aws ec2 describe-internet-gateways \ + --filters "Name=attachment.vpc-id,Values=$vpc_id" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "InternetGateways | length(@)" --output text 2>/dev/null || echo 0) + ((resource_count += igw_count)) + [[ $igw_count -gt 0 ]] && log_info " Internet Gateways: $igw_count" >&2 + fi + fi + + echo "$resource_count" +} + +# Try to destroy using openshift-install +destroy_with_openshift_install() { + local cluster_dir="$1" + + log_info "Attempting destruction with openshift-install..." + + # Check if openshift-install is available + if ! command -v openshift-install &> /dev/null; then + log_warning "openshift-install not found in PATH" + return 1 + fi + + # Check if metadata.json exists + if [[ ! -f "${cluster_dir}/metadata.json" ]]; then + log_warning "No metadata.json found in $cluster_dir" + return 1 + fi + + if [[ "$DRY_RUN" == "true" ]]; then + log_info "[DRY RUN] Would run: openshift-install destroy cluster --dir=$cluster_dir" + return 0 + fi + + # Run openshift-install destroy + cd "$cluster_dir" + if AWS_PROFILE="$AWS_PROFILE" openshift-install destroy cluster --log-level=info 2>&1 | tee -a "$LOG_FILE"; then + log_success "Successfully destroyed cluster using openshift-install" + return 0 + else + log_warning "openshift-install destroy failed, falling back to manual cleanup" + return 1 + fi +} + +# Clean up Route53 DNS records +cleanup_route53_records() { + local infra_id="$1" + local cluster_name="${CLUSTER_NAME:-${infra_id%-*}}" + local base_domain="${BASE_DOMAIN:-cd.percona.com}" + + log_info " Checking Route53 DNS records..." + log_debug "Looking for: api.$cluster_name.$base_domain and *.apps.$cluster_name.$base_domain" + + # Get hosted zone ID + local zone_id=$(aws route53 list-hosted-zones \ + --query "HostedZones[?Name=='${base_domain}.'].Id" \ + --output text --profile "$AWS_PROFILE" 2>/dev/null | head -1) + + if [[ -z "$zone_id" ]]; then + log_debug "No hosted zone found for domain: $base_domain" + return 0 + fi + + # Look for DNS records related to the cluster + # Check both api. and *.apps. patterns + local api_record=$(aws route53 list-resource-record-sets \ + --hosted-zone-id "$zone_id" \ + --query "ResourceRecordSets[?Name=='api.${cluster_name}.${base_domain}.']" \ + --profile "$AWS_PROFILE" 2>/dev/null) + + local apps_record=$(aws route53 list-resource-record-sets \ + --hosted-zone-id "$zone_id" \ + --query "ResourceRecordSets[?Name=='\\052.apps.${cluster_name}.${base_domain}.']" \ + --profile "$AWS_PROFILE" 2>/dev/null) + + local found_records=false + + # Check if we found any records + if [[ "$api_record" != "[]" && "$api_record" != "null" ]]; then + found_records=true + fi + if [[ "$apps_record" != "[]" && "$apps_record" != "null" ]]; then + found_records=true + fi + + if [[ "$found_records" == "false" ]]; then + log_info " No Route53 records found for cluster" + return 0 + fi + + log_info " Found Route53 DNS records to clean up" + + # Process API record if found + if [[ "$api_record" != "[]" && "$api_record" != "null" ]]; then + echo "$api_record" | jq -c '.[]' | while read -r record; do + local name=$(echo "$record" | jq -r '.Name') + local type=$(echo "$record" | jq -r '.Type') + + if [[ "$DRY_RUN" == "false" ]]; then + # Create change batch for deletion + local change_batch=$(cat </dev/null 2>&1 || true + + log_info " Deleted DNS record: $name ($type)" + else + log_info " [DRY RUN] Would delete DNS record: $name ($type)" + fi + done + fi + + # Process apps wildcard record if found + if [[ "$apps_record" != "[]" && "$apps_record" != "null" ]]; then + echo "$apps_record" | jq -c '.[]' | while read -r record; do + local name=$(echo "$record" | jq -r '.Name') + local type=$(echo "$record" | jq -r '.Type') + + if [[ "$DRY_RUN" == "false" ]]; then + # Create change batch for deletion + local change_batch=$(cat </dev/null 2>&1 || true + + log_info " Deleted DNS record: $name ($type)" + else + log_info " [DRY RUN] Would delete DNS record: $name ($type)" + fi + done + fi +} + +# Manual AWS resource cleanup +destroy_aws_resources() { + local infra_id="$1" + + log_info "Starting manual AWS resource cleanup for: $infra_id" + + if [[ "$DRY_RUN" == "true" ]]; then + log_warning "DRY RUN MODE - No resources will be deleted" + fi + + # 1. Terminate EC2 Instances + log_info "Step 1/9: Terminating EC2 instances..." + local instance_ids=$(aws ec2 describe-instances \ + --filters "Name=tag:kubernetes.io/cluster/$infra_id,Values=owned" \ + "Name=instance-state-name,Values=running,stopped,stopping,pending" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "Reservations[].Instances[].InstanceId" --output text) + + if [[ -n "$instance_ids" ]]; then + if [[ "$DRY_RUN" == "false" ]]; then + aws ec2 terminate-instances --instance-ids $instance_ids \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" >/dev/null + log_info " Waiting for instances to terminate..." + aws ec2 wait instance-terminated --instance-ids $instance_ids \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" 2>/dev/null || true + else + log_info " [DRY RUN] Would terminate: $instance_ids" + fi + else + log_info " No instances found" + fi + + # 2. Delete Load Balancers + log_info "Step 2/9: Deleting load balancers..." + + # Classic ELBs + local elbs=$(aws elb describe-load-balancers \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "LoadBalancerDescriptions[?contains(LoadBalancerName, '$infra_id')].LoadBalancerName" \ + --output text) + + for elb in $elbs; do + if [[ "$DRY_RUN" == "false" ]]; then + aws elb delete-load-balancer --load-balancer-name "$elb" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" + log_info " Deleted Classic ELB: $elb" + else + log_info " [DRY RUN] Would delete Classic ELB: $elb" + fi + done + + # ALBs/NLBs + local nlbs=$(aws elbv2 describe-load-balancers \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "LoadBalancers[?contains(LoadBalancerName, '$infra_id')].LoadBalancerArn" \ + --output text) + + for nlb in $nlbs; do + if [[ "$DRY_RUN" == "false" ]]; then + aws elbv2 delete-load-balancer --load-balancer-arn "$nlb" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" + log_info " Deleted NLB/ALB: $(basename $nlb)" + else + log_info " [DRY RUN] Would delete NLB/ALB: $(basename $nlb)" + fi + done + + # 3. Delete NAT Gateways + log_info "Step 3/9: Deleting NAT gateways..." + local nat_gateways=$(aws ec2 describe-nat-gateways \ + --filter "Name=tag:kubernetes.io/cluster/$infra_id,Values=owned" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "NatGateways[?State!='deleted'].NatGatewayId" --output text) + + for nat_id in $nat_gateways; do + if [[ "$DRY_RUN" == "false" ]]; then + aws ec2 delete-nat-gateway --nat-gateway-id "$nat_id" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" >/dev/null + log_info " Deleted NAT Gateway: $nat_id" + else + log_info " [DRY RUN] Would delete NAT Gateway: $nat_id" + fi + done + + # 4. Release Elastic IPs + log_info "Step 4/9: Releasing Elastic IPs..." + local eips=$(aws ec2 describe-addresses \ + --filters "Name=tag:kubernetes.io/cluster/$infra_id,Values=owned" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "Addresses[].AllocationId" --output text) + + for eip in $eips; do + if [[ "$DRY_RUN" == "false" ]]; then + aws ec2 release-address --allocation-id "$eip" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" 2>/dev/null || true + log_info " Released Elastic IP: $eip" + else + log_info " [DRY RUN] Would release Elastic IP: $eip" + fi + done + + # 5. Delete Security Groups (wait a bit for dependencies to clear) + if [[ "$DRY_RUN" == "false" ]]; then + log_info " Waiting for network interfaces to detach..." + sleep 30 + fi + + log_info "Step 5/9: Deleting security groups..." + local vpc_id=$(aws ec2 describe-vpcs \ + --filters "Name=tag:kubernetes.io/cluster/$infra_id,Values=owned" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "Vpcs[0].VpcId" --output text) + + if [[ "$vpc_id" != "None" && -n "$vpc_id" ]]; then + local sgs=$(aws ec2 describe-security-groups \ + --filters "Name=vpc-id,Values=$vpc_id" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "SecurityGroups[?GroupName!='default'].GroupId" --output text) + + # Delete rules first to avoid dependency issues + for sg in $sgs; do + if [[ "$DRY_RUN" == "false" ]]; then + # Remove all ingress rules + aws ec2 revoke-security-group-ingress --group-id "$sg" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --source-group "$sg" --protocol all 2>/dev/null || true + fi + done + + # Now delete the security groups + for sg in $sgs; do + if [[ "$DRY_RUN" == "false" ]]; then + aws ec2 delete-security-group --group-id "$sg" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" 2>/dev/null || true + log_info " Deleted Security Group: $sg" + else + log_info " [DRY RUN] Would delete Security Group: $sg" + fi + done + fi + + # 6. Delete Subnets + log_info "Step 6/9: Deleting subnets..." + if [[ "$vpc_id" != "None" && -n "$vpc_id" ]]; then + local subnets=$(aws ec2 describe-subnets \ + --filters "Name=vpc-id,Values=$vpc_id" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "Subnets[].SubnetId" --output text) + + for subnet in $subnets; do + if [[ "$DRY_RUN" == "false" ]]; then + aws ec2 delete-subnet --subnet-id "$subnet" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" 2>/dev/null || true + log_info " Deleted Subnet: $subnet" + else + log_info " [DRY RUN] Would delete Subnet: $subnet" + fi + done + fi + + # 7. Delete Internet Gateway and Route Tables + log_info "Step 7/9: Deleting internet gateway and route tables..." + if [[ "$vpc_id" != "None" && -n "$vpc_id" ]]; then + # Internet Gateway + local igw=$(aws ec2 describe-internet-gateways \ + --filters "Name=attachment.vpc-id,Values=$vpc_id" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "InternetGateways[0].InternetGatewayId" --output text) + + if [[ "$igw" != "None" && -n "$igw" ]]; then + if [[ "$DRY_RUN" == "false" ]]; then + aws ec2 detach-internet-gateway --internet-gateway-id "$igw" --vpc-id "$vpc_id" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" 2>/dev/null || true + aws ec2 delete-internet-gateway --internet-gateway-id "$igw" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" 2>/dev/null || true + log_info " Deleted Internet Gateway: $igw" + else + log_info " [DRY RUN] Would delete Internet Gateway: $igw" + fi + fi + + # Route Tables + local rts=$(aws ec2 describe-route-tables \ + --filters "Name=vpc-id,Values=$vpc_id" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "RouteTables[?Associations[0].Main!=\`true\`].RouteTableId" --output text) + + for rt in $rts; do + if [[ "$DRY_RUN" == "false" ]]; then + aws ec2 delete-route-table --route-table-id "$rt" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" 2>/dev/null || true + log_info " Deleted Route Table: $rt" + else + log_info " [DRY RUN] Would delete Route Table: $rt" + fi + done + fi + + # 8. Delete VPC + log_info "Step 8/9: Deleting VPC..." + if [[ "$vpc_id" != "None" && -n "$vpc_id" ]]; then + if [[ "$DRY_RUN" == "false" ]]; then + aws ec2 delete-vpc --vpc-id "$vpc_id" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" 2>/dev/null || true + log_info " Deleted VPC: $vpc_id" + else + log_info " [DRY RUN] Would delete VPC: $vpc_id" + fi + fi + + # 9. Clean up Route53 DNS records + log_info "Step 9/9: Cleaning up Route53 DNS records..." + cleanup_route53_records "$infra_id" + + log_success "Manual resource cleanup completed" +} + +# Clean up S3 state +cleanup_s3_state() { + local cluster_name="$1" + + log_info "Cleaning up S3 state for cluster: $cluster_name" + + if aws s3 ls "s3://${S3_BUCKET}/${cluster_name}/" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" &>/dev/null; then + + if [[ "$DRY_RUN" == "false" ]]; then + aws s3 rm "s3://${S3_BUCKET}/${cluster_name}/" --recursive \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" >/dev/null + log_success "Deleted S3 state: s3://${S3_BUCKET}/${cluster_name}/" + else + log_info "[DRY RUN] Would delete S3 state: s3://${S3_BUCKET}/${cluster_name}/" + fi + else + log_info "No S3 state found for cluster: $cluster_name" + fi +} + +# Main execution +main() { + log_info "OpenShift Cluster Destroyer started at $(date)" + log_info "Log file: $LOG_FILE" + + # Parse and validate inputs + parse_args "$@" + validate_inputs + + # Extract metadata if file provided + if [[ -n "$METADATA_FILE" ]]; then + if ! extract_metadata "$METADATA_FILE"; then + log_error "Failed to extract metadata from: $METADATA_FILE" + exit 1 + fi + fi + + # Auto-detect infrastructure ID if needed + if [[ -z "$INFRA_ID" && -n "$CLUSTER_NAME" ]]; then + if ! detect_infra_id "$CLUSTER_NAME"; then + log_error "Could not find infrastructure ID for cluster: $CLUSTER_NAME" + log_info "The cluster might not exist or might already be deleted" + exit 1 + fi + fi + + # Ensure we have an infrastructure ID at this point + if [[ -z "$INFRA_ID" ]]; then + log_error "No infrastructure ID found or provided" + exit 1 + fi + + # Count resources + echo "" + log_info "${BOLD}Cluster Destruction Summary${NC}" + log_info "Cluster Name: ${CLUSTER_NAME:-unknown}" + log_info "Infrastructure ID: $INFRA_ID" + log_info "AWS Region: $AWS_REGION" + log_info "AWS Profile: $AWS_PROFILE" + log_info "Mode: $([ "$DRY_RUN" == "true" ] && echo "DRY RUN" || echo "LIVE")" + echo "" + + local resource_count=$(count_resources "$INFRA_ID") + log_info "Total AWS resources found: $resource_count" + + if [[ "$resource_count" -eq 0 ]]; then + log_warning "No AWS resources found for this cluster" + cleanup_s3_state "${CLUSTER_NAME:-$INFRA_ID}" + log_success "Cluster cleanup completed (no resources to delete)" + exit 0 + fi + + # Show detailed resource list for both dry-run and normal mode + # In normal mode, also show confirmation prompt (unless --force is used) + if [[ "$resource_count" -gt 0 ]]; then + echo "" + log_info "${BOLD}$([ "$DRY_RUN" == "true" ] && echo "RESOURCES THAT WOULD BE DELETED:" || echo "RESOURCES TO BE DELETED:")${NC}" + echo "" + + # List EC2 Instances + local instances=$(aws ec2 describe-instances \ + --filters "Name=tag:kubernetes.io/cluster/$INFRA_ID,Values=owned" \ + "Name=instance-state-name,Values=running,stopped,stopping,pending" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "Reservations[].Instances[].[InstanceId,InstanceType,Tags[?Key=='Name'].Value|[0]]" \ + --output text 2>/dev/null) + + if [[ -n "$instances" ]]; then + log_info "EC2 Instances:" + echo "$instances" | while read id type name; do + echo " - $id ($type) - $name" + done + fi + + # List Load Balancers + local nlbs=$(aws elbv2 describe-load-balancers \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "LoadBalancers[?contains(LoadBalancerName, '$INFRA_ID')].[LoadBalancerName,Type]" \ + --output text 2>/dev/null) + + if [[ -n "$nlbs" ]]; then + log_info "Load Balancers:" + echo "$nlbs" | while read name type; do + echo " - $name ($type)" + done + fi + + # List NAT Gateways + local nats=$(aws ec2 describe-nat-gateways \ + --filter "Name=tag:kubernetes.io/cluster/$INFRA_ID,Values=owned" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "NatGateways[?State!='deleted'].[NatGatewayId,State]" \ + --output text 2>/dev/null) + + if [[ -n "$nats" ]]; then + log_info "NAT Gateways:" + echo "$nats" | while read id state; do + echo " - $id ($state)" + done + fi + + # List Elastic IPs + local eips=$(aws ec2 describe-addresses \ + --filters "Name=tag:kubernetes.io/cluster/$INFRA_ID,Values=owned" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "Addresses[].[AllocationId,PublicIp]" \ + --output text 2>/dev/null) + + if [[ -n "$eips" ]]; then + log_info "Elastic IPs:" + echo "$eips" | while read id ip; do + echo " - $id ($ip)" + done + fi + + # List VPC + local vpc=$(aws ec2 describe-vpcs \ + --filters "Name=tag:kubernetes.io/cluster/$INFRA_ID,Values=owned" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "Vpcs[0].[VpcId,CidrBlock]" \ + --output text 2>/dev/null) + + if [[ -n "$vpc" && "$vpc" != "None" ]]; then + log_info "VPC:" + echo " - $(echo $vpc | awk '{print $1}') ($(echo $vpc | awk '{print $2}'))" + + # Count subnets + local subnet_count=$(aws ec2 describe-subnets \ + --filters "Name=vpc-id,Values=$(echo $vpc | awk '{print $1}')" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "Subnets | length(@)" --output text 2>/dev/null) + echo " - $subnet_count subnets" + + # Count security groups + local sg_count=$(aws ec2 describe-security-groups \ + --filters "Name=vpc-id,Values=$(echo $vpc | awk '{print $1}')" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "SecurityGroups | length(@)" --output text 2>/dev/null) + echo " - $sg_count security groups" + + # Count route tables + local rt_count=$(aws ec2 describe-route-tables \ + --filters "Name=vpc-id,Values=$(echo $vpc | awk '{print $1}')" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" \ + --query "RouteTables | length(@)" --output text 2>/dev/null) + echo " - $rt_count route tables" + fi + + # Check Route53 records + local cluster_name="${CLUSTER_NAME:-${INFRA_ID%-*}}" + local zone_id=$(aws route53 list-hosted-zones \ + --query "HostedZones[?Name=='${BASE_DOMAIN}.'].Id" \ + --output text --profile "$AWS_PROFILE" 2>/dev/null | head -1) + + if [[ -n "$zone_id" ]]; then + local dns_count=0 + + # Check for api record + if aws route53 list-resource-record-sets \ + --hosted-zone-id "$zone_id" \ + --query "ResourceRecordSets[?Name=='api.${cluster_name}.${BASE_DOMAIN}.']" \ + --profile "$AWS_PROFILE" 2>/dev/null | grep -q "api.${cluster_name}"; then + ((dns_count++)) + fi + + # Check for apps record + if aws route53 list-resource-record-sets \ + --hosted-zone-id "$zone_id" \ + --query "ResourceRecordSets[?Name=='\\052.apps.${cluster_name}.${BASE_DOMAIN}.']" \ + --profile "$AWS_PROFILE" 2>/dev/null | grep -q "apps.${cluster_name}"; then + ((dns_count++)) + fi + + if [[ $dns_count -gt 0 ]]; then + log_info "Route53 DNS Records:" + echo " - api.${cluster_name}.${BASE_DOMAIN}" + echo " - *.apps.${cluster_name}.${BASE_DOMAIN}" + fi + fi + + # Check S3 state + if [[ -n "$CLUSTER_NAME" ]]; then + if aws s3 ls "s3://${S3_BUCKET}/${CLUSTER_NAME}/" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" &>/dev/null; then + log_info "S3 State:" + echo " - s3://${S3_BUCKET}/${CLUSTER_NAME}/" + fi + fi + + echo "" + + # Add summary + local total_resources=0 + [[ -n "$instances" ]] && total_resources=$((total_resources + $(echo "$instances" | wc -l))) + [[ -n "$nlbs" ]] && total_resources=$((total_resources + $(echo "$nlbs" | wc -l))) + [[ -n "$nats" ]] && total_resources=$((total_resources + $(echo "$nats" | wc -l))) + [[ -n "$eips" ]] && total_resources=$((total_resources + $(echo "$eips" | wc -l))) + [[ -n "$vpc" && "$vpc" != "None" ]] && total_resources=$((total_resources + 1 + subnet_count + sg_count + rt_count)) + + log_info "${BOLD}TOTAL: Approximately $total_resources AWS resources $([ "$DRY_RUN" == "true" ] && echo "would be" || echo "will be") deleted${NC}" + + # Show confirmation only in normal mode (not dry-run) + if [[ "$DRY_RUN" != "true" ]]; then + if [[ "$FORCE" != "true" ]]; then + echo "" + log_warning "[!] THIS ACTION CANNOT BE UNDONE!" + echo "" + read -p "Are you sure you want to destroy ALL the above resources? Type 'yes' to continue: " -r confirm + if [[ "$confirm" != "yes" ]]; then + log_warning "Destruction cancelled by user" + exit 0 + fi + fi + fi + fi + + # Priority order for destruction methods: + # 1. Try openshift-install with S3 state (if available) + # 2. Fall back to manual AWS cleanup + + local use_openshift_install=false + + # Try openshift-install if we have cluster name and S3 state + if [[ -n "$CLUSTER_NAME" ]]; then + log_info "Checking for S3 state to use openshift-install..." + + # Check if S3 has cluster state + if aws s3 ls "s3://${S3_BUCKET}/${CLUSTER_NAME}/metadata.json" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" &>/dev/null; then + + log_info "Found cluster state in S3, downloading for openshift-install..." + + local temp_dir="/tmp/openshift-destroy-${CLUSTER_NAME}-$$" + mkdir -p "$temp_dir" + + # Download all cluster state from S3 + if aws s3 sync "s3://${S3_BUCKET}/${CLUSTER_NAME}/" "$temp_dir/" \ + --region "$AWS_REGION" --profile "$AWS_PROFILE" --quiet; then + + if [[ -f "$temp_dir/metadata.json" ]]; then + log_info "Successfully downloaded cluster state, using openshift-install..." + + # Extract infrastructure ID from metadata if not already set + if [[ -z "$INFRA_ID" ]]; then + INFRA_ID=$(jq -r '.infraID // empty' "$temp_dir/metadata.json" 2>/dev/null) + if [[ -n "$INFRA_ID" ]]; then + log_info "Extracted infrastructure ID: $INFRA_ID" + fi + fi + + # Try openshift-install destroy + if destroy_with_openshift_install "$temp_dir"; then + use_openshift_install=true + log_success "OpenShift installer completed successfully" + else + log_warning "OpenShift installer failed or incomplete, will run manual cleanup" + fi + else + log_warning "No metadata.json found in S3 state" + fi + else + log_warning "Failed to download cluster state from S3" + fi + + rm -rf "$temp_dir" + else + log_info "No S3 state found for cluster: $CLUSTER_NAME" + fi + fi + + # Always run manual AWS cleanup to ensure all resources are deleted + # This catches any resources that openshift-install might have missed + log_info "Running comprehensive AWS resource cleanup..." + destroy_aws_resources "$INFRA_ID" + + # Clean up S3 state + if [[ -n "$CLUSTER_NAME" ]]; then + cleanup_s3_state "$CLUSTER_NAME" + fi + + # Final verification + echo "" + log_info "${BOLD}Post-destruction verification...${NC}" + local remaining=$(count_resources "$INFRA_ID") + if [[ "$remaining" -eq 0 ]]; then + log_success "All cluster resources successfully removed!" + else + log_warning "$remaining resources may still exist. Check AWS console." + fi + + log_info "Destruction completed at $(date)" + log_info "Full log available at: $LOG_FILE" +} + +# Run main function +main "$@" From 02270d54982c3b63a6c4f0b1df240a87f7e865e8 Mon Sep 17 00:00:00 2001 From: Peter Sirotnak Date: Thu, 4 Sep 2025 13:32:55 +0200 Subject: [PATCH 2/2] PMM-7: Fix query source for ps --- pmm/v3/pmm3-ui-tests-nightly.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pmm/v3/pmm3-ui-tests-nightly.groovy b/pmm/v3/pmm3-ui-tests-nightly.groovy index 90b593403b..d39605f48a 100644 --- a/pmm/v3/pmm3-ui-tests-nightly.groovy +++ b/pmm/v3/pmm3-ui-tests-nightly.groovy @@ -329,7 +329,7 @@ pipeline { } stage('ps single and mongo pss client') { steps { - runStagingClient(DOCKER_VERSION, CLIENT_VERSION, '--database ps --database psmdb,SETUP_TYPE=pss', 'yes', env.VM_IP, 'mysql-node', ENABLE_PULL_MODE, PXC_VERSION, PS_VERSION, MS_VERSION, PGSQL_VERSION, PDPGSQL_VERSION, MD_VERSION, PSMDB_VERSION, MODB_VERSION, QUERY_SOURCE, QA_INTEGRATION_GIT_BRANCH, ADMIN_PASSWORD) + runStagingClient(DOCKER_VERSION, CLIENT_VERSION, '--database ps,QUERY_SOURCE=slowlog --database psmdb,SETUP_TYPE=pss', 'yes', env.VM_IP, 'mysql-node', ENABLE_PULL_MODE, PXC_VERSION, PS_VERSION, MS_VERSION, PGSQL_VERSION, PDPGSQL_VERSION, MD_VERSION, PSMDB_VERSION, MODB_VERSION, QUERY_SOURCE, QA_INTEGRATION_GIT_BRANCH, ADMIN_PASSWORD) } } stage('pdpgsql, pgsql and pdpgsql patroni client') {