From be36e77f06db20f362bae41223161abfdeede3ef Mon Sep 17 00:00:00 2001
From: FosterG4 <dikkyhardian@gmail.com>
Date: Sat, 26 Jul 2025 01:14:39 +0700
Subject: [PATCH 1/5] Refactor and Improve Proxy Scraper

File Improvements:

proxyChecker.py:
- Split load_proxies_from_file into smaller helper functions
- Refactored check() function to reduce complexity
- Broke down main() into focused setup functions
- Added _prepare_checking_environment, _create_proxy_checker helpers

proxyGeolocation.py:
- Refactored get_ip_info() with _check_special_addresses helper
- Split parse_proxy_list() into focused parsing functions
- Simplified _handle_source_analysis with validation helpers
- Modularized main() function with environment setup

proxyScraper.py:
- Enhanced ProxyListApiScraper.handle() with data processing helpers
- Refactored scrape() function into configuration and execution phases
- Modularized main() with argument parsing and logging setup
- Added proper type hints with Optional import
---
 .github/workflows/tests.yml |   4 +-
 .gitignore                  |  52 ++-
 README.md                   | 225 +++++++++++--
 dev_requirements.txt        |  16 +-
 proxyChecker.py             | 593 +++++++++++++++++++++++++++------
 proxyGeolocation.py         | 504 ++++++++++++++++++++++++++++
 proxyScraper.py             | 642 +++++++++++++++++++++++++++++-------
 requirements.txt            |  13 +-
 setup.py                    |  16 +-
 user_agents.txt             |  37 +++
 10 files changed, 1833 insertions(+), 269 deletions(-)
 create mode 100644 proxyGeolocation.py

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index d7fdae8..3004d06 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -41,4 +41,6 @@ jobs:
       - name: Run proxyScraper
         run: python3 proxyScraper.py -p http
       - name: Run proxyChecker
-        run: python3 proxyChecker.py -t 20 -s google.com -l output.txt
\ No newline at end of file
+        run: python3 proxyChecker.py -t 20 -s google.com -l output.txt
+      - name: Run proxyGeolocation
+        run: python3 proxyGeolocation.py -i 8.8.8.8
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 0bd7639..82bacc9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -138,4 +138,54 @@ dmypy.json
 # Cython debug symbols
 cython_debug/
 
-output.txt
\ No newline at end of file
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be added to the global gitignore or merged into this project gitignore
+.idea/
+
+# VS Code
+.vscode/
+*.code-workspace
+
+# Project specific files
+.github/copilot-instructions.md
+output.txt
+test_small.txt
+test_local.py
+*.txt
+!requirements.txt
+!dev_requirements.txt
+!user_agents.txt
+!README.txt
+
+# Temporary files
+*.tmp
+*.temp
+*.swp
+*.swo
+*~
+
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Logs
+*.log
+logs/
+
+# Backup files
+*.bak
+*.backup
+
+# IDE files
+*.sublime-project
+*.sublime-workspace
+
+# Poetry/PDM (modern Python package managers)
+poetry.lock
+.pdm.toml
\ No newline at end of file
diff --git a/README.md b/README.md
index 7e93215..cef17f5 100644
--- a/README.md
+++ b/README.md
@@ -1,21 +1,20 @@
-# Proxy Scraper and Checker
+# Proxy Scraper & Checker
 
 [![Tests](https://github.com/iw4p/proxy-scraper/actions/workflows/tests.yml/badge.svg)](https://github.com/iw4p/proxy-scraper/actions/workflows/tests.yml)
 [![Downloads](https://static.pepy.tech/badge/proxyz)](https://pepy.tech/project/proxyz)
 
-Scrape more than 1K HTTP - HTTPS - SOCKS4 - SOCKS5 proxies in less than 2 seconds.
+**Fast, reliable proxy scraper that collects 30K+ HTTP/HTTPS/SOCKS proxies from 24+ sources in seconds.**
 
-Scraping fresh public proxies from different sources:
+✨ **Features:**
+- ⚡ **Fast scraping** - All sources scraped concurrently  
+- 🛡️ **Smart filtering** - Automatically removes CDN/bad IPs (Cloudflare, etc.)
+- 🌍 **Global coverage** - Proxies from Asia, Europe, Americas
+- 🔧 **Easy to use** - Simple CLI interface
+- ✅ **Quality checked** - Built-in proxy validation
 
-- [sslproxies.org](http://sslproxies.org) (HTTP, HTTPS)
-- [free-proxy-list.net](http://free-proxy-list.net) (HTTP, HTTPS)
-- [us-proxy.org](http://us-proxy.org) (HTTP, HTTPS)
-- [socks-proxy.net](http://socks-proxy.net) (Socks4, Socks5)
-- [proxyscrape.com](https://proxyscrape.com) (HTTP, Socks4, Socks5)
-- [proxy-list.download](https://www.proxy-list.download) (HTTP, HTTPS, Socks4, Socks5)
-- [geonode.com](https://geonode.com) (HTTP, HTTPS, Socks4, Socks5)
+## Installation & Setup
 
-## Installation
+### 📦 Option 1: Install from PyPI (Recommended)
 
 You can install the package directly from PyPI using `pip`:
 
@@ -23,63 +22,217 @@ You can install the package directly from PyPI using `pip`:
 pip install proxyz
 ```
 
+**Verify installation:**
+```bash
+proxy_scraper --help
+proxy_checker --help
+```
+
+### 🔧 Option 2: Install from Source Code
+
 Alternatively, you can install dependencies manually if you're working from the source code:
 
 ```bash
+# Clone the repository
+git clone https://github.com/iw4p/proxy-scraper.git
+cd proxy-scraper
+
+# Install dependencies
 pip3 install -r requirements.txt
+
+# Test the installation
+python proxyScraper.py --help
+python proxyChecker.py --help
 ```
 
-## Usage
+### 🐍 Python Requirements
+- **Python 3.9+** (3.9, 3.10, 3.11, 3.12 supported)
+- **Dependencies:** httpx, beautifulsoup4, pysocks
 
-### Using the Command-Line Interface
+## Quick Start Tutorial
 
-Once installed via `pip`, you can use the command-line tools `proxy_scraper` and `proxy_checker` directly.
+### Step 1: Scrape Proxies
+```bash
+# Get HTTP proxies (basic)
+proxy_scraper -p http
+
+# Get SOCKS5 proxies with detailed output
+proxy_scraper -p socks5 -v
 
-#### For Scraping Proxies:
+# Save to custom file
+proxy_scraper -p http -o my_proxies.txt -v
+```
 
+### Step 2: Check Proxy Quality
 ```bash
-proxy_scraper -p http
+# Test scraped proxies (basic)
+proxy_checker -l output.txt -t 10
+
+# Test against specific site with verbose output
+proxy_checker -l output.txt -s https://google.com -v
+
+# Use random user agents for testing
+proxy_checker -l output.txt -r -v
+```
+
+### Step 3: Complete Workflow Example
+```bash
+# 1. Scrape HTTP proxies
+proxy_scraper -p http -v -o fresh_proxies.txt
+
+# 2. Check their quality
+proxy_checker -l fresh_proxies.txt -t 15 -v
+
+# 3. Result: output.txt contains only working proxies
 ```
 
-- With `-p` or `--proxy`, you can choose your proxy type. Supported proxy types are: **HTTP - HTTPS - Socks (Both 4 and 5) - Socks4 - Socks5**.
-- With `-o` or `--output`, specify the output file name where the proxies will be saved. (Default is **output.txt**).
-- With `-v` or `--verbose`, increase output verbosity.
-- With `-h` or `--help`, show the help message.
+## Supported Proxy Types
+- **HTTP** - Web traffic
+- **HTTPS** - Secure web traffic  
+- **SOCKS4** - TCP connections
+- **SOCKS5** - TCP + UDP connections
 
-#### For Checking Proxies:
+## Proxy Sources
 
+We collect proxies from **24 sources**:
+
+**🌐 Direct Websites (11 sources)**
+- spys.me, free-proxy-list.net, proxyscrape.com, geonode.com
+- sslproxies.org, us-proxy.org, socks-proxy.net  
+- proxy-list.download, proxyscan.io, proxyspace.pro
+- freeproxy.lunaproxy.com
+
+**📦 GitHub Repositories (13 sources)**  
+- proxifly/free-proxy-list, monosans/proxy-list, TheSpeedX/PROXY-List
+- jetkai/proxy-list, roosterkid/openproxylist, mmpx12/proxy-list
+- ShiftyTR/Proxy-List, clarketm/proxy-list, sunny9577/proxy-scraper
+- zloi-user/hideip.me, almroot/proxylist, aslisk/proxyhttps
+- proxy4parsing/proxy-list
+
+## Advanced Usage
+
+### CLI Options
+
+**Scraping:**
 ```bash
-proxy_checker -p http -t 20 -s https://google.com -l output.txt
+proxy_scraper -p <type> [-o output.txt] [-v]
+
+Options:
+  -p, --proxy     Proxy type: http, https, socks, socks4, socks5
+  -o, --output    Output file (default: output.txt)  
+  -v, --verbose   Show detailed statistics
 ```
 
-- With `-t` or `--timeout`, set the timeout in seconds after which the proxy is considered dead. (Default is **20**).
-- With `-p` or `--proxy`, check HTTPS, HTTP, SOCKS4, or SOCKS5 proxies. (Default is **HTTP**).
-- With `-l` or `--list`, specify the path to your proxy list file. (Default is **output.txt**).
-- With `-s` or `--site`, check proxies against a specific website like google.com. (Default is **https://google.com**).
-- With `-r` or `--random_agent`, use a random user agent per proxy.
-- With `-v` or `--verbose`, increase output verbosity.
-- With `-h` or `--help`, show the help message.
+**Checking:**
+```bash
+proxy_checker [-l input.txt] [-t timeout] [-s site] [-v]
+
+Options:
+  -l, --list      Input proxy file (default: output.txt)
+  -t, --timeout   Timeout in seconds (default: 20)
+  -s, --site      Test site (default: https://google.com)
+  -r, --random_agent  Use random user agents
+  -v, --verbose   Show detailed progress
+```
+
+### From Source Code
+```bash
+# Clone repository
+git clone https://github.com/iw4p/proxy-scraper
+cd proxy-scraper
+
+# Install dependencies  
+pip install -r requirements.txt
+
+# Run scraper
+python proxyScraper.py -p http -v
 
-### Running Directly from Source
+# Check proxies
+python proxyChecker.py -l output.txt -v
+```
+
+## Quality & Performance
 
-If you prefer running the scripts directly from the source code, you can use the following commands:
+- ✅ **Automatic filtering** - Removes bad IPs (Cloudflare, CDNs, private ranges)
+- 📊 **Source statistics** - See which sources provide the best proxies
+- ⚡ **Fast concurrent** - All sources scraped simultaneously
 
-#### For Scraping:
 
+## Example Output
 ```bash
-python3 proxyScraper.py -p http
+Scraping proxies using 24 sources...
+📊 Source Statistics:
+--------------------------------------------------
+ProxyScrapeScraper: 18769 valid, 16408 bad IPs filtered  
+PlainTextScraper: 13516 valid, 5515 bad IPs filtered
+GitHubScraper: 1767 valid, 739 bad IPs filtered
+...
+Total filtered: 22177 bad IPs (CDN/etc), 1 invalid format
+Found 30938 unique valid proxies
 ```
 
-#### For Checking:
+## 🌍 Proxy Geolocation & Analysis
+
+The project includes a powerful geolocation tool to analyze proxy origins and track sources:
+
+### Features
+- **🔍 IP Geolocation** - Get country, city, ISP, and organization info
+- **☁️ CDN Detection** - Automatically identifies Cloudflare and other CDNs  
+- **🏢 Datacenter Detection** - Flags hosting providers and datacenters
+- **📊 Source Tracking** - Maps proxies back to their original sources
+- **💾 JSON Export** - Save analysis results for further processing
+
+### Usage Examples
+
+**Analyze single IP:**
+```bash
+python proxyGeolocation.py -i 104.16.1.31
+```
+
+**Analyze proxy file:**
+```bash
+python proxyGeolocation.py -f output.txt -l 50
+```
+
+**Track proxy sources:**
+```bash
+python proxyGeolocation.py -f output.txt -s --limit 100
+```
+
+**Export to JSON:**
+```bash
+python proxyGeolocation.py -f output.txt -o analysis.json
+```
 
+### Sample Output
 ```bash
-python3 proxyChecker.py -p http -t 20 -s https://google.com -l output.txt
+🔍 Proxy Geolocation Analysis Results
+==================================================
+
+📊 Summary:
+Total proxies analyzed: 50
+Proxies with geolocation data: 45
+Cloudflare proxies: 8
+Datacenter proxies: 12
+
+🌎 Countries:
+  United States (US): 15
+  Germany (DE): 8
+  Singapore (SG): 6
+  ...
+
+📋 Detailed Results:
+────────────────────────────────────────────────────────────────
+☁️ 104.16.1.31:80 - San Francisco, United States | Cloudflare Inc.
+🌍  45.79.143.52:3128 - Tokyo, Japan | Linode LLC
+🏢  159.203.61.169:3128 - New York, United States | DigitalOcean
 ```
 
 ## Good to Know
 
 - Dead proxies will be removed, and only alive proxies will remain in the output file.
-- This script is capable of scraping SOCKS proxies, but `proxyChecker` currently only checks HTTP(S) proxies.
+- The proxy checker supports all proxy types: **HTTP, HTTPS, SOCKS4, and SOCKS5**.
+- Use random user agents (`-r` flag) for better success rates when checking proxies.
 
 ## Star History
 
diff --git a/dev_requirements.txt b/dev_requirements.txt
index f09b308..2b8fd6b 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -1,8 +1,8 @@
-flake8==4.0.1
-flake8-black==0.3.1
-flake8-bugbear==22.4.25
-flake8-builtins==1.5.3
-flake8-commas==2.1.0
-flake8-isort==4.1.1
-flake8-polyfill==1.0.2
-pep8-naming==0.12.1
\ No newline at end of file
+flake8>=4.0.1,<8.0.0
+flake8-black>=0.3.1,<1.0.0
+flake8-bugbear>=22.4.25,<25.0.0
+flake8-builtins>=1.5.3,<3.0.0
+flake8-commas>=2.1.0,<5.0.0
+flake8-isort>=4.1.1,<7.0.0
+flake8-polyfill>=1.0.2,<2.0.0
+pep8-naming>=0.12.1,<1.0.0
\ No newline at end of file
diff --git a/proxyChecker.py b/proxyChecker.py
index a5b8828..6e3ee80 100644
--- a/proxyChecker.py
+++ b/proxyChecker.py
@@ -1,151 +1,552 @@
 import argparse
+import concurrent.futures
+import logging
 import random
 import re
 import socket
+import sys
 import threading
 import urllib.request
+from pathlib import Path
 from time import time
+from typing import List, Optional, Tuple
 
 import socks
 
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+#fallback user agents (will be extended from user_agents.txt if available)
 user_agents = [
-    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/37.0.2062.94 Chrome/37.0.2062.94 Safari/537.36"
-    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36",
-    "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
-    "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0",
-    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 (KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
-    "Mozilla/5.0 (iPad; CPU OS 8_4_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H321 Safari/600.1.4",
-    "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0",
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0",
+    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+    "Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
 ]
 
-try:
-    with open("user_agents.txt", "r") as f:
-        for line in f:
-            user_agents.append(line.replace("\n", ""))
-except FileNotFoundError:
-    pass
+# Load additional user agents from file if available
+def load_user_agents() -> None:
+    """Load user agents from external file if available."""
+    try:
+        user_agents_file = Path("user_agents.txt")
+        if user_agents_file.exists():
+            with open(user_agents_file, "r", encoding="utf-8") as f:
+                for line in f:
+                    line = line.strip()
+                    if line and line not in user_agents:
+                        user_agents.append(line)
+            logger.debug(f"Loaded {len(user_agents)} user agents from file")
+        else:
+            logger.debug("user_agents.txt not found, using built-in user agents")
+    except Exception as e:
+        logger.warning(f"Failed to load user agents from file: {e}")
+
+# Load user agents at module level
+load_user_agents()
 
 
 class Proxy:
-    def __init__(self, method, proxy):
-        if method.lower() not in ["http", "https", "socks4", "socks5"]:
-            raise NotImplementedError("Only HTTP, HTTPS, SOCKS4, and SOCKS5 are supported")
-        self.method = method.lower()
-        self.proxy = proxy
-
-    def is_valid(self):
-        return re.match(r"\d{1,3}(?:\.\d{1,3}){3}(?::\d{1,5})?$", self.proxy)
-
-    def check(self, site, timeout, user_agent, verbose):
-        if self.method in ["socks4", "socks5"]:
-            socks.set_default_proxy(socks.SOCKS4 if self.method == "socks4" else socks.SOCKS5,
-                                    self.proxy.split(':')[0], int(self.proxy.split(':')[1]))
+    """Represents a proxy server with validation and checking capabilities."""
+    
+    SUPPORTED_METHODS = ["http", "https", "socks4", "socks5"]
+    
+    def __init__(self, method: str, proxy: str):
+        """
+        Initialize a proxy instance.
+        
+        Args:
+            method: Proxy type (http, https, socks4, socks5)
+            proxy: Proxy address in format 'ip:port'
+        
+        Raises:
+            NotImplementedError: If proxy method is not supported
+            ValueError: If proxy format is invalid
+        """
+        method = method.lower().strip()
+        if method not in self.SUPPORTED_METHODS:
+            raise NotImplementedError(f"Only {', '.join(self.SUPPORTED_METHODS)} are supported, got: {method}")
+        
+        self.method = method
+        self.proxy = proxy.strip()
+        
+        # Validate proxy format during initialization
+        if not self.is_valid():
+            raise ValueError(f"Invalid proxy format: {proxy}")
+
+    def is_valid(self) -> bool:
+        """
+        Validate proxy format (IP:port).
+        
+        Returns:
+            True if proxy format is valid, False otherwise
+        """
+        if not self.proxy or ':' not in self.proxy:
+            return False
+            
+        try:
+            ip, port = self.proxy.split(':', 1)
+            
+            # Validate IP format
+            if not re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip):
+                return False
+            
+            # Validate IP range (0-255 for each octet)
+            ip_parts = [int(x) for x in ip.split('.')]
+            if not all(0 <= part <= 255 for part in ip_parts):
+                return False
+            
+            # Validate port range
+            port_num = int(port)
+            if not (1 <= port_num <= 65535):
+                return False
+                
+            return True
+        except (ValueError, AttributeError):
+            return False
+
+    def check(self, site: str, timeout: int, user_agent: str, verbose: bool) -> Tuple[bool, float, Optional[Exception]]:
+        """
+        Check if proxy is working by attempting to connect through it.
+        
+        Args:
+            site: Target website to test connection
+            timeout: Connection timeout in seconds
+            user_agent: User agent string to use
+            verbose: Enable verbose logging
+            
+        Returns:
+            Tuple of (is_valid, response_time, error)
+        """
+        if not site.startswith(('http://', 'https://')):
+            site = f"https://{site}"
+            
+        start_time = time()
+        
+        try:
+            if self.method in ["socks4", "socks5"]:
+                return self._check_socks_proxy(site, timeout, verbose, start_time)
+            else:
+                return self._check_http_proxy(site, timeout, user_agent, verbose, start_time)
+        except Exception as e:
+            verbose_print(verbose, f"Proxy {self.proxy} failed with unexpected error: {e}")
+            return False, 0.0, e
+
+    def _check_socks_proxy(self, site: str, timeout: int, verbose: bool, start_time: float) -> Tuple[bool, float, Optional[Exception]]:
+        """Check SOCKS proxy connectivity."""
+        # Store original socket to restore later
+        original_socket = socket.socket
+        
+        try:
+            ip, port = self.proxy.split(':')
+            socks_type = socks.SOCKS4 if self.method == "socks4" else socks.SOCKS5
+            
+            socks.set_default_proxy(socks_type, ip, int(port))
             socket.socket = socks.socksocket
+            
             try:
-                start_time = time()
-                urllib.request.urlopen(site, timeout=timeout)
-                end_time = time()
-                time_taken = end_time - start_time
-                verbose_print(verbose, f"Proxy {self.proxy} is valid, time taken: {time_taken}")
-                return True, time_taken, None
-            except Exception as e:
-                verbose_print(verbose, f"Proxy {self.proxy} is not valid, error: {str(e)}")
-                return False, 0, e
-        else:
-            url = self.method + "://" + self.proxy
-            proxy_support = urllib.request.ProxyHandler({self.method: url})
-            opener = urllib.request.build_opener(proxy_support)
-            urllib.request.install_opener(opener)
-            req = urllib.request.Request(self.method + "://" + site)
-            req.add_header("User-Agent", user_agent)
-            try:
-                start_time = time()
-                urllib.request.urlopen(req, timeout=timeout)
+                response = urllib.request.urlopen(site, timeout=timeout)
+                response.read(1024)  # Read a small amount to ensure connection works
                 end_time = time()
                 time_taken = end_time - start_time
-                verbose_print(verbose, f"Proxy {self.proxy} is valid, time taken: {time_taken}")
+                
+                verbose_print(verbose, f"✓ Proxy {self.proxy} ({self.method.upper()}) is valid, time: {time_taken:.2f}s")
                 return True, time_taken, None
-            except Exception as e:
-                verbose_print(verbose, f"Proxy {self.proxy} is not valid, error: {str(e)}")
-                return False, 0, e
+                
+            finally:
+                # Always restore original socket
+                socket.socket = original_socket
+                
+        except Exception as e:
+            socket.socket = original_socket  # Ensure cleanup even on error
+            verbose_print(verbose, f"✗ Proxy {self.proxy} ({self.method.upper()}) failed: {e}")
+            return False, 0.0, e
+
+    def _check_http_proxy(self, site: str, timeout: int, user_agent: str, verbose: bool, start_time: float) -> Tuple[bool, float, Optional[Exception]]:
+        """Check HTTP/HTTPS proxy connectivity."""
+        try:
+            proxy_url = f"{self.method}://{self.proxy}"
+            proxy_handler = urllib.request.ProxyHandler({
+                'http': proxy_url,
+                'https': proxy_url,
+            })
+            
+            opener = urllib.request.build_opener(proxy_handler)
+            
+            # Create request with proper headers
+            request = urllib.request.Request(site)
+            request.add_header("User-Agent", user_agent)
+            request.add_header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
+            request.add_header("Accept-Language", "en-US,en;q=0.5")
+            request.add_header("Accept-Encoding", "gzip, deflate")
+            request.add_header("Connection", "keep-alive")
+            
+            response = opener.open(request, timeout=timeout)
+            response.read(1024)  # Read a small amount to ensure connection works
+            
+            end_time = time()
+            time_taken = end_time - start_time
+            
+            verbose_print(verbose, f"✓ Proxy {self.proxy} ({self.method.upper()}) is valid, time: {time_taken:.2f}s")
+            return True, time_taken, None
+            
+        except Exception as e:
+            verbose_print(verbose, f"✗ Proxy {self.proxy} ({self.method.upper()}) failed: {e}")
+            return False, 0.0, e
 
-    def __str__(self):
+    def __str__(self) -> str:
+        """String representation of the proxy."""
         return self.proxy
 
+    def __repr__(self) -> str:
+        """Detailed string representation."""
+        return f"Proxy(method='{self.method}', proxy='{self.proxy}')"
+
 
-def verbose_print(verbose, message):
+def verbose_print(verbose: bool, message: str) -> None:
+    """Print message if verbose mode is enabled."""
     if verbose:
         print(message)
 
 
-def check(file, timeout, method, site, verbose, random_user_agent):
+def _process_proxy_line(line: str, line_num: int, method: str) -> Optional[Proxy]:
+    """Process a single line from proxy file."""
+    line = line.strip()
+    if not line or line.startswith('#'):  # Skip empty lines and comments
+        return None
+        
+    try:
+        return Proxy(method, line)
+    except (ValueError, NotImplementedError) as e:
+        logger.debug(f"Line {line_num}: Invalid proxy '{line}' - {e}")
+        return None
+
+
+def _read_proxy_file(file_path: str) -> List[str]:
+    """Read and return lines from proxy file."""
+    try:
+        with open(file_path, "r", encoding="utf-8") as f:
+            return list(f)
+    except FileNotFoundError:
+        logger.error(f"Proxy file not found: {file_path}")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"Error reading proxy file {file_path}: {e}")
+        sys.exit(1)
+
+
+def load_proxies_from_file(file_path: str, method: str) -> List[Proxy]:
+    """
+    Load proxies from file and create Proxy objects.
+    
+    Args:
+        file_path: Path to proxy list file
+        method: Proxy method to use
+        
+    Returns:
+        List of valid Proxy objects
+    """
     proxies = []
-    with open(file, "r") as f:
-        for line in f:
-            proxies.append(Proxy(method, line.replace("\n", "")))
+    invalid_count = 0
+    
+    lines = _read_proxy_file(file_path)
+    
+    for line_num, line in enumerate(lines, 1):
+        proxy = _process_proxy_line(line, line_num, method)
+        if proxy is not None:
+            proxies.append(proxy)
+        else:
+            if line.strip() and not line.strip().startswith('#'):
+                invalid_count += 1
+    
+    if invalid_count > 0:
+        logger.warning(f"Skipped {invalid_count} invalid proxy entries")
+        
+    return proxies
 
-    print(f"Checking {len(proxies)} proxies")
-    proxies = filter(lambda x: x.is_valid(), proxies)
-    valid_proxies = []
-    user_agent = random.choice(user_agents)
 
-    def check_proxy(proxy, user_agent):
-        new_user_agent = user_agent
-        if random_user_agent:
-            new_user_agent = random.choice(user_agents)
-        valid, time_taken, error = proxy.check(site, timeout, new_user_agent, verbose)
-        valid_proxies.extend([proxy] if valid else [])
+def save_valid_proxies(file_path: str, valid_proxies: List[Proxy]) -> None:
+    """
+    Save valid proxies back to file.
+    
+    Args:
+        file_path: Output file path
+        valid_proxies: List of valid proxies to save
+    """
+    try:
+        # Sort proxies for consistent output
+        sorted_proxies = sorted(valid_proxies, key=lambda p: p.proxy)
+        
+        with open(file_path, "w", encoding="utf-8") as f:
+            for proxy in sorted_proxies:
+                f.write(f"{proxy}\n")
+                
+        logger.info(f"Saved {len(valid_proxies)} valid proxies to {file_path}")
+        
+    except Exception as e:
+        logger.error(f"Failed to save proxies to {file_path}: {e}")
+        raise
 
-    threads = []
-    for proxy in proxies:
-        t = threading.Thread(target=check_proxy, args=(proxy, user_agent))
-        threads.append(t)
 
-    for t in threads:
-        t.start()
+def _prepare_checking_environment(file: str, method: str, site: str, timeout: int, random_user_agent: bool) -> Tuple[List[Proxy], str, int]:
+    """Prepare the environment for proxy checking."""
+    print(f"Loading proxies from {file}...")
+    proxies = load_proxies_from_file(file, method)
+    print(f"Loaded {len(proxies)} valid proxies for checking")
+    
+    if not proxies:
+        print("No valid proxies found to check")
+        return [], "", 0
+    
+    # Choose base user agent
+    base_user_agent = random.choice(user_agents)
+    
+    # Print checking parameters
+    max_threads = min(len(proxies), 100)
+    print(f"Starting proxy validation with {max_threads} concurrent threads...")
+    print(f"Target site: {site}")
+    print(f"Timeout: {timeout}s")
+    print(f"Method: {method.upper()}")
+    print(f"User agent strategy: {'Random per proxy' if random_user_agent else 'Fixed'}")
+    print("-" * 60)
+    
+    return proxies, base_user_agent, max_threads
 
-    for t in threads:
-        t.join()
 
-    with open(file, "w") as f:
-        for proxy in valid_proxies:
-            f.write(str(proxy) + "\n")
+def _create_proxy_checker(valid_proxies: List[Proxy], checked_count_ref: List[int], lock: threading.Lock,
+                          site: str, timeout: int, random_user_agent: bool, base_user_agent: str,
+                          total_proxies: int, verbose: bool):
+    """Create a proxy checking function with proper closure."""
+    def check_single_proxy(proxy: Proxy) -> None:
+        """Check a single proxy and update results."""
+        try:
+            # Select user agent
+            current_user_agent = random.choice(user_agents) if random_user_agent else base_user_agent
+            
+            # Check proxy
+            is_valid, response_time, error = proxy.check(site, timeout, current_user_agent, verbose)
+            
+            # Update results thread-safely
+            with lock:
+                checked_count_ref[0] += 1
+                
+                if is_valid:
+                    valid_proxies.append(proxy)
+                
+                # Progress indicator
+                if not verbose and checked_count_ref[0] % 50 == 0:
+                    print(f"Progress: {checked_count_ref[0]}/{total_proxies} ({len(valid_proxies)} valid)")
+                    
+        except Exception as e:
+            logger.debug(f"Unexpected error checking proxy {proxy}: {e}")
+    
+    return check_single_proxy
 
-    print(f"Found {len(valid_proxies)} valid proxies")
+
+def check(file: str, timeout: int, method: str, site: str, verbose: bool, random_user_agent: bool) -> None:
+    """
+    Main proxy checking function.
+    
+    Args:
+        file: Path to proxy list file
+        timeout: Connection timeout in seconds
+        method: Proxy method to check
+        site: Target website for testing
+        verbose: Enable verbose output
+        random_user_agent: Use random user agent per proxy
+    """
+    start_time = time()
+    
+    # Prepare checking environment
+    proxies, base_user_agent, max_threads = _prepare_checking_environment(
+        file, method, site, timeout, random_user_agent,
+    )
+    
+    if not proxies:
+        return
+    
+    # Initialize checking state
+    valid_proxies = []
+    checked_count_ref = [0]  # Use list for mutable reference
+    lock = threading.Lock()
+    
+    # Create checker function
+    check_single_proxy = _create_proxy_checker(
+        valid_proxies, checked_count_ref, lock, site, timeout,
+        random_user_agent, base_user_agent, len(proxies), verbose,
+    )
+    
+    # Execute checking with thread pool
+    with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
+        futures = [executor.submit(check_single_proxy, proxy) for proxy in proxies]
+        
+        try:
+            concurrent.futures.wait(futures, timeout=None)
+        except KeyboardInterrupt:
+            print("\nChecking interrupted by user")
+            executor.shutdown(wait=False)
+            return
+    
+    # Save results
+    save_valid_proxies(file, valid_proxies)
+    
+    # Final statistics
+    elapsed_time = time() - start_time
+    success_rate = (len(valid_proxies) / len(proxies)) * 100 if proxies else 0
+    
+    print("-" * 60)
+    print("Proxy checking completed!")
+    print(f"Total checked: {len(proxies)}")
+    print(f"Valid proxies: {len(valid_proxies)}")
+    print(f"Success rate: {success_rate:.1f}%")
+    print(f"Time taken: {elapsed_time:.2f} seconds")
+    print(f"Average time per proxy: {elapsed_time/len(proxies):.2f}s")
+    
+    if len(valid_proxies) == 0:
+        print("⚠️  No working proxies found. Consider:")
+        print("   - Increasing timeout value")
+        print("   - Trying a different target site")
+        print("   - Using fresh proxy list")
 
 
-def main():
-    parser = argparse.ArgumentParser()
+def _setup_argument_parser() -> argparse.ArgumentParser:
+    """Set up and configure the argument parser."""
+    parser = argparse.ArgumentParser(
+        description="Check proxy servers for connectivity and validity",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s -p http -t 10 -v                    # Check HTTP proxies with 10s timeout
+  %(prog)s -p socks4 -l socks.txt -r           # Check SOCKS4 with random user agents
+  %(prog)s -p https -s httpbin.org/ip --debug  # Check HTTPS proxies against custom site
+
+Notes:
+  - Dead proxies are automatically removed from the list file
+  - Use --debug for detailed error information
+  - Higher timeout values may find more working proxies but take longer
+        """,
+    )
+    
     parser.add_argument(
-        "-t",
-        "--timeout",
+        "-t", "--timeout",
         type=int,
-        help="Dismiss the proxy after -t seconds",
         default=20,
+        help="Connection timeout in seconds (default: %(default)s)",
     )
-    parser.add_argument("-p", "--proxy", help="Check HTTPS, HTTP, SOCKS4, or SOCKS5 proxies", default="http")
-    parser.add_argument("-l", "--list", help="Path to your proxy list file", default="output.txt")
     parser.add_argument(
-        "-s",
-        "--site",
-        help="Check with specific website like google.com",
-        default="https://google.com/",
+        "-p", "--proxy",
+        choices=Proxy.SUPPORTED_METHODS,
+        default="http",
+        help="Proxy type to check (default: %(default)s)",
     )
     parser.add_argument(
-        "-v",
-        "--verbose",
-        help="Increase output verbosity",
+        "-l", "--list",
+        default="output.txt",
+        help="Path to proxy list file (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-s", "--site",
+        default="https://httpbin.org/ip",
+        help="Target website for testing (default: %(default)s)",
+    )
+    parser.add_argument(
+        "-v", "--verbose",
         action="store_true",
+        help="Enable verbose output showing each proxy check",
     )
     parser.add_argument(
-        "-r",
-        "--random_agent",
-        help="Use a random user agent per proxy",
+        "-r", "--random_agent",
         action="store_true",
+        help="Use a different random user agent for each proxy",
     )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Enable debug logging for troubleshooting",
+    )
+    parser.add_argument(
+        "--max-threads",
+        type=int,
+        default=100,
+        help="Maximum number of concurrent threads (default: %(default)s)",
+    )
+    
+    return parser
+
+
+def _configure_logging_and_validate_args(args) -> str:
+    """Configure logging and validate arguments."""
+    # Configure logging
+    if args.debug:
+        logging.getLogger().setLevel(logging.DEBUG)
+    elif args.verbose:
+        logging.getLogger().setLevel(logging.INFO)
+    else:
+        logging.getLogger().setLevel(logging.WARNING)
+    
+    # Validate arguments
+    if args.timeout <= 0:
+        print("Error: Timeout must be positive")
+        sys.exit(1)
+    
+    if args.max_threads <= 0:
+        print("Error: max-threads must be positive")
+        sys.exit(1)
+    
+    # Check if proxy file exists
+    if not Path(args.list).exists():
+        print(f"Error: Proxy file '{args.list}' not found")
+        print("Tip: Run the proxy scraper first to generate a proxy list")
+        sys.exit(1)
+    
+    # Normalize site URL
+    site = args.site
+    if not site.startswith(('http://', 'https://')):
+        site = f"https://{site}"
+    
+    return site
+
+
+def main() -> None:
+    """Main entry point for the proxy checker."""
+    parser = _setup_argument_parser()
     args = parser.parse_args()
-    check(file=args.list, timeout=args.timeout, method=args.proxy, site=args.site, verbose=args.verbose,
-          random_user_agent=args.random_agent)
+    
+    # Configure logging and validate arguments
+    site = _configure_logging_and_validate_args(args)
+    
+    # Display startup information
+    print("🔍 Proxy Checker v2.0")
+    print(f"📁 Proxy file: {args.list}")
+    print(f"🎯 Target site: {site}")
+    print(f"⏱️  Timeout: {args.timeout}s")
+    print(f"🔧 Method: {args.proxy.upper()}")
+    print(f"🧵 Max threads: {args.max_threads}")
+    print(f"👤 User agents: {len(user_agents)} available")
+    print("=" * 60)
+    
+    try:
+        check(
+            file=args.list,
+            timeout=args.timeout,
+            method=args.proxy,
+            site=site,
+            verbose=args.verbose,
+            random_user_agent=args.random_agent,
+        )
+        
+    except KeyboardInterrupt:
+        print("\n⚠️  Operation interrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"Proxy checking failed: {e}")
+        if args.debug:
+            import traceback
+            traceback.print_exc()
+        sys.exit(1)
 
 
 if __name__ == "__main__":
diff --git a/proxyGeolocation.py b/proxyGeolocation.py
new file mode 100644
index 0000000..152d976
--- /dev/null
+++ b/proxyGeolocation.py
@@ -0,0 +1,504 @@
+#!/usr/bin/env python3
+"""
+Proxy Geolocation and Source Tracking Tool
+Identifies proxy origins and tracks which sources provide which proxies.
+"""
+
+import argparse
+import asyncio
+import json
+import logging
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+import httpx
+
+# Setup logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+@dataclass
+class ProxyInfo:
+    """Information about a proxy including its geolocation and source."""
+    ip: str
+    port: str
+    country: Optional[str] = None
+    country_code: Optional[str] = None
+    city: Optional[str] = None
+    region: Optional[str] = None
+    org: Optional[str] = None
+    isp: Optional[str] = None
+    source: Optional[str] = None
+    is_cloudflare: bool = False
+    is_datacenter: bool = False
+
+class ProxyGeolocator:
+    """Main class for proxy geolocation and source tracking."""
+    
+    def __init__(self):
+        self.session: Optional[httpx.AsyncClient] = None
+        
+    async def __aenter__(self):
+        """Async context manager entry."""
+        self.session = httpx.AsyncClient(
+            timeout=httpx.Timeout(30.0),
+            limits=httpx.Limits(max_connections=10, max_keepalive_connections=5),
+        )
+        return self
+        
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit."""
+        if self.session:
+            await self.session.aclose()
+    
+    def _check_special_addresses(self, ip: str, proxy_info: ProxyInfo) -> bool:
+        """Check for special/reserved addresses. Returns True if special address found."""
+        try:
+            import ipaddress
+            ip_obj = ipaddress.ip_address(ip)
+            
+            if str(ip_obj) == "0.0.0.0":
+                proxy_info.org = "Reserved: 'This host' address"
+                proxy_info.country = "Invalid"
+                return True
+            elif ip_obj.is_private:
+                proxy_info.org = "Private network address"
+                proxy_info.country = "Local"
+                return True
+            elif ip_obj.is_loopback:
+                proxy_info.org = "Loopback address"
+                proxy_info.country = "Local"
+                return True
+            elif ip_obj.is_reserved:
+                proxy_info.org = "Reserved address"
+                proxy_info.country = "Invalid"
+                return True
+                
+            return False
+        except Exception:
+            return False
+    
+    def _process_geolocation_data(self, data: dict, proxy_info: ProxyInfo) -> None:
+        """Process geolocation API response data."""
+        if data.get("status") != "success":
+            return
+            
+        proxy_info.country = data.get("country")
+        proxy_info.country_code = data.get("countryCode")
+        proxy_info.city = data.get("city")
+        proxy_info.region = data.get("region")
+        proxy_info.org = data.get("org")
+        proxy_info.isp = data.get("isp")
+        
+        # Check if it's Cloudflare
+        org_lower = (data.get("org") or "").lower()
+        isp_lower = (data.get("isp") or "").lower()
+        if "cloudflare" in org_lower or "cloudflare" in isp_lower:
+            proxy_info.is_cloudflare = True
+        
+        # Check if it's a datacenter
+        datacenter_keywords = ["datacenter", "hosting", "server", "cloud", "digital ocean", "aws", "amazon", "google", "microsoft"]
+        if any(keyword in org_lower or keyword in isp_lower for keyword in datacenter_keywords):
+            proxy_info.is_datacenter = True
+
+    async def get_ip_info(self, ip: str) -> ProxyInfo:
+        """Get geolocation information for an IP address."""
+        proxy_info = ProxyInfo(ip=ip, port="")
+        
+        # Check for special/reserved addresses first
+        if self._check_special_addresses(ip, proxy_info):
+            return proxy_info
+        
+        try:
+            # Use ip-api.com for geolocation (free, no API key needed)
+            url = f"http://ip-api.com/json/{ip}?fields=status,message,country,countryCode,region,city,org,isp,as"
+            
+            if not self.session:
+                raise RuntimeError("Session not initialized")
+                
+            response = await self.session.get(url)
+            response.raise_for_status()
+            
+            data = response.json()
+            self._process_geolocation_data(data, proxy_info)
+                    
+        except Exception as e:
+            logger.debug(f"Error getting IP info for {ip}: {e}")
+            
+        return proxy_info
+    
+    def _parse_proxy_line(self, line: str, line_num: int) -> Optional[Tuple[str, int]]:
+        """Parse a single proxy line. Returns None if invalid."""
+        line = line.strip()
+        if not line or line.startswith('#'):
+            return None
+            
+        if ':' not in line:
+            return None
+            
+        try:
+            ip, port = line.split(':', 1)
+            ip = ip.strip()
+            port = int(port.strip())
+            return (ip, port)
+        except ValueError:
+            logger.warning(f"Invalid proxy format on line {line_num}: {line}")
+            return None
+    
+    def _read_proxy_file_lines(self, file_path: str) -> List[str]:
+        """Read all lines from proxy file."""
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                return list(f)
+        except FileNotFoundError:
+            logger.error(f"Proxy file not found: {file_path}")
+            return []
+        except Exception as e:
+            logger.error(f"Error reading proxy file: {e}")
+            return []
+
+    def parse_proxy_list(self, file_path: str) -> List[Tuple[str, int]]:
+        """Parse proxy list file and return list of (ip, port) tuples."""
+        proxies = []
+        lines = self._read_proxy_file_lines(file_path)
+        
+        for line_num, line in enumerate(lines, 1):
+            proxy = self._parse_proxy_line(line, line_num)
+            if proxy is not None:
+                proxies.append(proxy)
+                
+        return proxies
+    
+    async def analyze_proxies(self, proxy_list: List[Tuple[str, int]], limit: Optional[int] = None) -> List[ProxyInfo]:
+        """Analyze a list of proxies and get their geolocation info."""
+        if limit:
+            proxy_list = proxy_list[:limit]
+            
+        logger.info(f"🌍 Analyzing {len(proxy_list)} proxies for geolocation...")
+        
+        results = []
+        for i, (ip, port) in enumerate(proxy_list, 1):
+            logger.info(f"📍 Analyzing {i}/{len(proxy_list)}: {ip}:{port}")
+            
+            proxy_info = await self.get_ip_info(ip)
+            proxy_info.port = str(port)
+            results.append(proxy_info)
+            
+            # Small delay to be respectful to the API
+            await asyncio.sleep(0.1)
+            
+        return results
+    
+    def _calculate_summary_stats(self, results: List[ProxyInfo]) -> Tuple[Dict[str, int], int, int, int]:
+        """Calculate summary statistics from proxy results."""
+        countries = {}
+        cloudflare_count = 0
+        datacenter_count = 0
+        valid_info_count = 0
+        
+        for proxy in results:
+            if proxy.country:
+                valid_info_count += 1
+                country_key = f"{proxy.country} ({proxy.country_code})" if proxy.country_code else proxy.country
+                countries[country_key] = countries.get(country_key, 0) + 1
+                
+            if proxy.is_cloudflare:
+                cloudflare_count += 1
+            if proxy.is_datacenter:
+                datacenter_count += 1
+                
+        return countries, cloudflare_count, datacenter_count, valid_info_count
+
+    def _print_summary_stats(self, results: List[ProxyInfo], countries: Dict[str, int], 
+                             cloudflare_count: int, datacenter_count: int, valid_info_count: int):
+        """Print summary statistics."""
+        print("\n📊 Summary:")
+        print(f"Total proxies analyzed: {len(results)}")
+        print(f"Proxies with geolocation data: {valid_info_count}")
+        print(f"Cloudflare proxies: {cloudflare_count}")
+        print(f"Datacenter proxies: {datacenter_count}")
+        
+        if countries:
+            print("\n🌎 Countries:")
+            for country, count in sorted(countries.items(), key=lambda x: x[1], reverse=True):
+                print(f"  {country}: {count}")
+
+    def _format_proxy_details(self, proxy: ProxyInfo) -> str:
+        """Format proxy details for display."""
+        flag = "🔍"
+        if proxy.is_cloudflare:
+            flag = "☁️"
+        elif proxy.is_datacenter:
+            flag = "🏢"
+        elif proxy.country:
+            flag = "🌍"
+        
+        location = "Unknown"
+        if proxy.city and proxy.country:
+            location = f"{proxy.city}, {proxy.country}"
+        elif proxy.country:
+            location = proxy.country
+        
+        org_info = ""
+        if proxy.org:
+            org_info = f" | {proxy.org}"
+        if proxy.isp and proxy.isp != proxy.org:
+            org_info += f" | ISP: {proxy.isp}"
+        
+        return f"{flag} {proxy.ip}:{proxy.port} - {location}{org_info}"
+
+    def print_analysis_results(self, results: List[ProxyInfo], show_details: bool = True):
+        """Print analysis results in a formatted way."""
+        if not results:
+            print("❌ No proxy data to analyze")
+            return
+            
+        print("\n🔍 Proxy Geolocation Analysis Results")
+        print("=" * 50)
+        
+        # Calculate summary statistics
+        countries, cloudflare_count, datacenter_count, valid_info_count = self._calculate_summary_stats(results)
+        
+        # Print summary
+        self._print_summary_stats(results, countries, cloudflare_count, datacenter_count, valid_info_count)
+        
+        if show_details:
+            print("\n📋 Detailed Results:")
+            print("-" * 80)
+            
+            for proxy in results:
+                print(self._format_proxy_details(proxy))
+    
+    def save_results_json(self, results: List[ProxyInfo], output_file: str):
+        """Save results to JSON file."""
+        data = []
+        for proxy in results:
+            data.append({
+                "ip": proxy.ip,
+                "port": proxy.port,
+                "country": proxy.country,
+                "country_code": proxy.country_code,
+                "city": proxy.city,
+                "region": proxy.region,
+                "org": proxy.org,
+                "isp": proxy.isp,
+                "is_cloudflare": proxy.is_cloudflare,
+                "is_datacenter": proxy.is_datacenter,
+                "source": proxy.source,
+            })
+        
+        try:
+            with open(output_file, 'w', encoding='utf-8') as f:
+                json.dump(data, f, indent=2, ensure_ascii=False)
+            print(f"💾 Results saved to: {output_file}")
+        except Exception as e:
+            logger.error(f"Error saving results: {e}")
+    
+    async def analyze_proxy_sources(self, proxy_file: str, limit: Optional[int] = None) -> Dict[str, List[str]]:
+        """Analyze which source each proxy likely came from by checking current scraper results."""
+        # Dynamic import to avoid circular dependency
+        try:
+            import proxyScraper
+            scrapers = proxyScraper.scrapers
+        except ImportError:
+            logger.warning("Could not import proxyScraper - source analysis unavailable")
+            return {}
+        
+        # Load proxies from file
+        proxies = self.parse_proxy_list(proxy_file)
+        if limit:
+            proxies = proxies[:limit]
+        
+        proxy_set = {f"{ip}:{port}" for ip, port in proxies}
+        source_map = {}
+        
+        logger.info(f"🔍 Analyzing sources for {len(proxy_set)} proxies...")
+        
+        # Check each scraper
+        client_config = {
+            "follow_redirects": True,
+            "timeout": 30.0,
+            "limits": httpx.Limits(max_keepalive_connections=20, max_connections=100),
+            "headers": {
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+            },
+        }
+        
+        async with httpx.AsyncClient(**client_config) as client:
+            for scraper in scrapers:
+                try:
+                    logger.info(f"� Checking {scraper.source_name}...")
+                    scraped_proxies, _ = await scraper.scrape(client)
+                    scraped_set = set(scraped_proxies)
+                    
+                    # Find matches
+                    matches = proxy_set.intersection(scraped_set)
+                    if matches:
+                        source_map[scraper.source_name] = list(matches)
+                        logger.info(f"  Found {len(matches)} matches")
+                    
+                    await asyncio.sleep(0.5)  # Be respectful to sources
+                    
+                except Exception as e:
+                    logger.debug(f"Error checking {scraper.source_name}: {e}")
+        
+        return source_map
+    
+    async def check_single_ip(self, ip: str) -> ProxyInfo:
+        """Check a single IP address."""
+        logger.info(f"🔍 Checking IP: {ip}")
+        return await self.get_ip_info(ip)
+
+def _setup_argument_parser():
+    """Set up command line argument parser."""
+    parser = argparse.ArgumentParser(
+        description="Proxy Geolocation and Source Tracking Tool",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python proxyGeolocation.py -i 104.16.1.31
+  python proxyGeolocation.py -f output.txt -l 20
+  python proxyGeolocation.py -f output.txt -s --limit 50
+  python proxyGeolocation.py -f output.txt -o results.json
+  python proxyGeolocation.py -f output.txt --no-details
+        """,
+    )
+    
+    parser.add_argument("-i", "--ip", type=str, help="Check single IP address")
+    parser.add_argument("-f", "--file", type=str, help="Path to proxy list file (default: output.txt)")
+    parser.add_argument("-s", "--sources", action="store_true", help="Analyze which sources provide which proxies")
+    parser.add_argument("-l", "--limit", type=int, help="Limit number of proxies to analyze")
+    parser.add_argument("-o", "--output", type=str, help="Save results to JSON file")
+    parser.add_argument("--no-details", action="store_true", help="Show only summary, no detailed results")
+    parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose logging")
+    
+    return parser
+
+async def _handle_single_ip(geolocator, args):
+    """Handle single IP analysis."""
+    result = await geolocator.check_single_ip(args.ip)
+    geolocator.print_analysis_results([result], show_details=True)
+    
+    if args.output:
+        geolocator.save_results_json([result], args.output)
+
+def _validate_proxy_file(proxy_file: str) -> bool:
+    """Validate that proxy file exists."""
+    if not Path(proxy_file).exists():
+        print(f"❌ Proxy file not found: {proxy_file}")
+        print("💡 Run proxy scraper first: python proxyScraper.py -p http")
+        return False
+    return True
+
+def _print_source_summary(source_map: dict, total_mapped: int) -> None:
+    """Print source analysis summary."""
+    print("\n🔍 Proxy Source Analysis Results")
+    print("=" * 50)
+    print(f"Total proxies mapped to sources: {total_mapped}")
+
+def _print_source_details(source_map: dict, show_details: bool) -> None:
+    """Print detailed source information."""
+    if not source_map:
+        return
+        
+    print("\n📊 Sources:")
+    for source, proxy_list in sorted(source_map.items(), key=lambda x: len(x[1]), reverse=True):
+        print(f"  {source}: {len(proxy_list)} proxies")
+        if not show_details:
+            continue
+            
+        # Show first few proxies as examples
+        for proxy in proxy_list[:5]:
+            print(f"    - {proxy}")
+        if len(proxy_list) > 5:
+            print(f"    ... and {len(proxy_list) - 5} more")
+        print()
+
+async def _handle_source_analysis(geolocator, args):
+    """Handle proxy source analysis."""
+    proxy_file = args.file or "output.txt"
+    
+    if not _validate_proxy_file(proxy_file):
+        return
+    
+    source_map = await geolocator.analyze_proxy_sources(proxy_file, args.limit)
+    total_mapped = sum(len(proxies) for proxies in source_map.values())
+    
+    _print_source_summary(source_map, total_mapped)
+    _print_source_details(source_map, not args.no_details)
+    
+    if args.output:
+        output_data = {
+            "analysis_type": "source_mapping",
+            "total_mapped": total_mapped,
+            "sources": source_map,
+        }
+        try:
+            with open(args.output, 'w', encoding='utf-8') as f:
+                json.dump(output_data, f, indent=2)
+            print(f"💾 Source analysis saved to: {args.output}")
+        except Exception as e:
+            logger.error(f"Error saving results: {e}")
+
+async def _handle_file_analysis(geolocator, args):
+    """Handle proxy file analysis."""
+    proxy_file = args.file or "output.txt"
+    
+    if not Path(proxy_file).exists():
+        print(f"❌ Proxy file not found: {proxy_file}")
+        print("💡 Run proxy scraper first: python proxyScraper.py -p http")
+        return
+    
+    proxies = geolocator.parse_proxy_list(proxy_file)
+    
+    if not proxies:
+        print(f"❌ No valid proxies found in {proxy_file}")
+        return
+    
+    results = await geolocator.analyze_proxies(proxies, args.limit)
+    geolocator.print_analysis_results(results, show_details=not args.no_details)
+    
+    if args.output:
+        geolocator.save_results_json(results, args.output)
+
+def _configure_environment(args) -> None:
+    """Configure logging and environment settings."""
+    if args.verbose:
+        logging.getLogger().setLevel(logging.DEBUG)
+    
+    # Handle Windows event loop
+    if sys.platform.startswith('win'):
+        asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
+
+async def _run_analysis_based_on_args(geolocator, args):
+    """Run analysis based on command line arguments."""
+    if args.ip:
+        await _handle_single_ip(geolocator, args)
+    elif args.sources:
+        await _handle_source_analysis(geolocator, args)
+    else:
+        await _handle_file_analysis(geolocator, args)
+
+def main():
+    """Main function for CLI usage."""
+    parser = _setup_argument_parser()
+    args = parser.parse_args()
+    
+    _configure_environment(args)
+    
+    async def run_analysis():
+        async with ProxyGeolocator() as geolocator:
+            await _run_analysis_based_on_args(geolocator, args)
+    
+    # Run the analysis
+    try:
+        asyncio.run(run_analysis())
+    except KeyboardInterrupt:
+        print("\n⏹️  Analysis interrupted by user")
+    except Exception as e:
+        logger.error(f"Analysis failed: {e}")
+
+if __name__ == "__main__":
+    main()
diff --git a/proxyScraper.py b/proxyScraper.py
index ec00038..c774462 100644
--- a/proxyScraper.py
+++ b/proxyScraper.py
@@ -1,68 +1,201 @@
 import argparse
 import asyncio
+import ipaddress
+import logging
 import platform
 import re
 import sys
 import time
+from typing import Dict, List, Optional, Set, Tuple
 
 import httpx
 from bs4 import BeautifulSoup
 
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+# Known bad IP ranges to filter out (Cloudflare, major CDNs, etc.)
+BAD_IP_RANGES = [
+    # Cloudflare
+    "173.245.48.0/20",
+    "103.21.244.0/22", 
+    "103.22.200.0/22",
+    "103.31.4.0/22",
+    "141.101.64.0/18",
+    "108.162.192.0/18",
+    "190.93.240.0/20",
+    "188.114.96.0/20",
+    "197.234.240.0/22",
+    "198.41.128.0/17",
+    "162.158.0.0/15",
+    "104.16.0.0/13",  # This includes our problematic IP 104.16.1.31
+    "104.24.0.0/14",
+    "172.64.0.0/13",
+    "131.0.72.0/22",
+    # Amazon CloudFront
+    "13.32.0.0/15",
+    "13.35.0.0/17",
+    "18.160.0.0/15",
+    "52.222.128.0/17",
+    "54.182.0.0/16",
+    "54.192.0.0/16",
+    "54.230.0.0/16",
+    "54.239.128.0/18",
+    "99.86.0.0/16",
+    "205.251.200.0/21",
+    "216.137.32.0/19",
+]
+
+def is_bad_ip(ip: str) -> bool:
+    """Check if an IP is in a known bad range (CDN, etc.) or is a reserved address."""
+    try:
+        ip_obj = ipaddress.ip_address(ip)
+        
+        # Check for reserved/special addresses
+        if ip_obj.is_private or ip_obj.is_loopback or ip_obj.is_reserved or ip_obj.is_multicast:
+            return True
+            
+        # Check for specific bad addresses
+        if str(ip_obj) in ["0.0.0.0", "255.255.255.255", "127.0.0.1"]:
+            return True
+        
+        # Check against known bad ranges (CDNs)
+        for cidr in BAD_IP_RANGES:
+            if ip_obj in ipaddress.ip_network(cidr):
+                return True
+                
+    except (ValueError, ipaddress.AddressValueError):
+        return True  # Invalid IP format
+    return False
+
 
 class Scraper:
+    """Base scraper class for proxy sources."""
 
-    def __init__(self, method, _url):
+    def __init__(self, method: str, _url: str, timeout: int = 10):
         self.method = method
         self._url = _url
+        self.timeout = timeout
+        self.source_name = self.__class__.__name__
 
-    def get_url(self, **kwargs):
+    def get_url(self, **kwargs) -> str:
+        """Get the formatted URL for the scraper."""
         return self._url.format(**kwargs, method=self.method)
 
-    async def get_response(self, client):
-        return await client.get(self.get_url())
+    async def get_response(self, client: httpx.AsyncClient) -> httpx.Response:
+        """Get HTTP response from the proxy source."""
+        return await client.get(self.get_url(), timeout=self.timeout)
 
-    async def handle(self, response):
+    async def handle(self, response: httpx.Response) -> str:
+        """Handle the response and extract proxy data."""
         return response.text
 
-    async def scrape(self, client):
-        response = await self.get_response(client)
-        proxies = await self.handle(response)
-        pattern = re.compile(r"\d{1,3}(?:\.\d{1,3}){3}(?::\d{1,5})?")
-        return re.findall(pattern, proxies)
+    def filter_proxies(self, proxy_text: str) -> Tuple[Set[str], Dict[str, int]]:
+        """Filter proxies and return valid ones with statistics."""
+        proxies = set()
+        stats = {"total": 0, "filtered_bad": 0, "filtered_invalid": 0, "valid": 0}
+        
+        for line in proxy_text.split('\n'):
+            line = line.strip()
+            if not line:
+                continue
+                
+            stats["total"] += 1
+            
+            # Basic format validation
+            if ':' not in line:
+                stats["filtered_invalid"] += 1
+                continue
+                
+            try:
+                ip, port = line.split(':', 1)
+                ip = ip.strip()
+                port = port.strip()
+                
+                # Validate IP format
+                ipaddress.ip_address(ip)
+                
+                # Validate port
+                port_num = int(port)
+                if not (1 <= port_num <= 65535):
+                    stats["filtered_invalid"] += 1
+                    continue
+                
+                # Check if it's a bad IP (CDN, etc.)
+                if is_bad_ip(ip):
+                    stats["filtered_bad"] += 1
+                    logger.debug(f"Filtered bad IP from {self.source_name}: {ip}:{port}")
+                    continue
+                
+                proxies.add(f"{ip}:{port}")
+                stats["valid"] += 1
+                
+            except (ValueError, ipaddress.AddressValueError):
+                stats["filtered_invalid"] += 1
+                continue
+        
+        return proxies, stats
+
+    async def scrape(self, client: httpx.AsyncClient) -> Tuple[List[str], Dict[str, int]]:
+        """Scrape proxies from the source."""
+        try:
+            response = await self.get_response(client)
+            response.raise_for_status()  # Raise an exception for bad status codes
+            proxy_text = await self.handle(response)
+            
+            # Use regex to find all potential proxies
+            pattern = re.compile(r"\d{1,3}(?:\.\d{1,3}){3}(?::\d{1,5})?")
+            raw_proxies = re.findall(pattern, proxy_text)
+            
+            # Filter and validate proxies
+            valid_proxies, stats = self.filter_proxies('\n'.join(raw_proxies))
+            
+            return list(valid_proxies), stats
+        except Exception as e:
+            logger.debug(f"Failed to scrape from {self.source_name} ({self.get_url()}): {e}")
+            return [], {"total": 0, "filtered_bad": 0, "filtered_invalid": 0, "valid": 0}
 
 
 # From spys.me
 class SpysMeScraper(Scraper):
+    """Scraper for spys.me proxy source."""
 
-    def __init__(self, method):
-        super().__init__(method, "https://spys.me/{mode}.txt")
+    def __init__(self, method: str):
+        super().__init__(method, "https://spys.me/{mode}.txt", timeout=15)
 
-    def get_url(self, **kwargs):
+    def get_url(self, **kwargs) -> str:
+        """Get URL with appropriate mode for the proxy method."""
         mode = "proxy" if self.method == "http" else "socks" if self.method == "socks" else "unknown"
         if mode == "unknown":
-            raise NotImplementedError
+            raise NotImplementedError(f"Method {self.method} not supported by SpysMeScraper")
         return super().get_url(mode=mode, **kwargs)
 
 
 # From proxyscrape.com
 class ProxyScrapeScraper(Scraper):
+    """Scraper for proxyscrape.com API."""
 
-    def __init__(self, method, timeout=1000, country="All"):
-        self.timout = timeout
+    def __init__(self, method: str, timeout: int = 1000, country: str = "All"):
+        self.api_timeout = timeout  # Renamed to avoid confusion with HTTP timeout
         self.country = country
         super().__init__(method,
                          "https://api.proxyscrape.com/?request=getproxies"
                          "&proxytype={method}"
-                         "&timeout={timout}"
-                         "&country={country}")
+                         "&timeout={api_timeout}"
+                         "&country={country}", 
+                         timeout=20)  # HTTP timeout
 
-    def get_url(self, **kwargs):
-        return super().get_url(timout=self.timout, country=self.country, **kwargs)
+    def get_url(self, **kwargs) -> str:
+        """Get URL with API parameters."""
+        return super().get_url(api_timeout=self.api_timeout, country=self.country, **kwargs)
 
 # From geonode.com - A little dirty, grab http(s) and socks but use just for socks
 class GeoNodeScraper(Scraper):
+    """Scraper for geonode.com proxy API."""
 
-    def __init__(self, method, limit="500", page="1", sort_by="lastChecked", sort_type="desc"):
+    def __init__(self, method: str, limit: str = "500", page: str = "1", 
+                 sort_by: str = "lastChecked", sort_type: str = "desc"):
         self.limit = limit
         self.page = page
         self.sort_by = sort_by
@@ -72,90 +205,219 @@ def __init__(self, method, limit="500", page="1", sort_by="lastChecked", sort_ty
                          "&limit={limit}"
                          "&page={page}"
                          "&sort_by={sort_by}"
-                         "&sort_type={sort_type}")
+                         "&sort_type={sort_type}",
+                         timeout=15)
+
+    def get_url(self, **kwargs) -> str:
+        """Get URL with API parameters."""
+        return super().get_url(limit=self.limit, page=self.page, 
+                               sort_by=self.sort_by, sort_type=self.sort_type, **kwargs)
 
-    def get_url(self, **kwargs):
-        return super().get_url(limit=self.limit, page=self.page, sort_by=self.sort_by, sort_type=self.sort_type, **kwargs)
 
 # From proxy-list.download
 class ProxyListDownloadScraper(Scraper):
+    """Scraper for proxy-list.download API."""
 
-    def __init__(self, method, anon):
+    def __init__(self, method: str, anon: str):
         self.anon = anon
-        super().__init__(method, "https://www.proxy-list.download/api/v1/get?type={method}&anon={anon}")
+        super().__init__(method, "https://www.proxy-list.download/api/v1/get?type={method}&anon={anon}", timeout=15)
 
-    def get_url(self, **kwargs):
+    def get_url(self, **kwargs) -> str:
+        """Get URL with anonymity level parameter."""
         return super().get_url(anon=self.anon, **kwargs)
 
 
 # For websites using table in html
 class GeneralTableScraper(Scraper):
+    """Scraper for websites that use HTML tables to display proxies."""
 
-    async def handle(self, response):
-        soup = BeautifulSoup(response.text, "html.parser")
-        proxies = set()
-        table = soup.find("table", attrs={"class": "table table-striped table-bordered"})
-        for row in table.findAll("tr"):
-            count = 0
-            proxy = ""
-            for cell in row.findAll("td"):
-                if count == 1:
-                    proxy += ":" + cell.text.replace("&nbsp;", "")
-                    proxies.add(proxy)
-                    break
-                proxy += cell.text.replace("&nbsp;", "")
-                count += 1
-        return "\n".join(proxies)
+    async def handle(self, response: httpx.Response) -> str:
+        """Parse HTML table to extract proxies."""
+        try:
+            soup = BeautifulSoup(response.text, "html.parser")
+            proxies: Set[str] = set()
+            table = soup.find("table", attrs={"class": "table table-striped table-bordered"})
+            
+            if table is None:
+                logger.debug("No table found with expected class")
+                return ""
+                
+            for row in table.find_all("tr"):
+                cells = row.find_all("td")
+                if len(cells) >= 2:
+                    ip = cells[0].get_text(strip=True).replace("&nbsp;", "")
+                    port = cells[1].get_text(strip=True).replace("&nbsp;", "")
+                    if ip and port:
+                        proxies.add(f"{ip}:{port}")
+            
+            return "\n".join(proxies)
+        except Exception as e:
+            logger.debug(f"Error parsing HTML table: {e}")
+            return ""
 
 
 # For websites using div in html
 class GeneralDivScraper(Scraper):
+    """Scraper for websites that use HTML divs to display proxies."""
 
-    async def handle(self, response):
-        soup = BeautifulSoup(response.text, "html.parser")
-        proxies = set()
-        table = soup.find("div", attrs={"class": "list"})
-        for row in table.findAll("div"):
-            count = 0
-            proxy = ""
-            for cell in row.findAll("div", attrs={"class": "td"}):
-                if count == 2:
-                    break
-                proxy += cell.text+":"
-                count += 1
-            proxy = proxy.rstrip(":")
-            proxies.add(proxy)
-        return "\n".join(proxies)
+    async def handle(self, response: httpx.Response) -> str:
+        """Parse HTML divs to extract proxies."""
+        try:
+            soup = BeautifulSoup(response.text, "html.parser")
+            proxies: Set[str] = set()
+            container = soup.find("div", attrs={"class": "list"})
+            
+            if container is None:
+                logger.debug("No div found with class 'list'")
+                return ""
+                
+            for row in container.find_all("div"):
+                cells = row.find_all("div", attrs={"class": "td"})
+                if len(cells) >= 2:
+                    ip = cells[0].get_text(strip=True)
+                    port = cells[1].get_text(strip=True)
+                    if ip and port:
+                        proxies.add(f"{ip}:{port}")
+            
+            return "\n".join(proxies)
+        except Exception as e:
+            logger.debug(f"Error parsing HTML divs: {e}")
+            return ""
     
 # For scraping live proxylist from github
 class GitHubScraper(Scraper):
+    """Scraper for GitHub raw proxy lists."""
         
-    async def handle(self, response):
-        tempproxies = response.text.split("\n")
+    async def handle(self, response: httpx.Response) -> str:
+        """Parse GitHub raw proxy list format."""
+        try:
+            temp_proxies = response.text.strip().split("\n")
+            proxies: Set[str] = set()
+            
+            for proxy_line in temp_proxies:
+                proxy_line = proxy_line.strip()
+                if not proxy_line:
+                    continue
+                    
+                # Handle different formats: "type://ip:port" or just "ip:port"
+                if self.method in proxy_line:
+                    # Extract IP:port from lines like "http://1.2.3.4:8080"
+                    if "//" in proxy_line:
+                        proxy = proxy_line.split("//")[-1]
+                    else:
+                        proxy = proxy_line
+                    
+                    # Validate IP:port format
+                    if re.match(r"\d{1,3}(?:\.\d{1,3}){3}:\d{1,5}", proxy):
+                        proxies.add(proxy)
+
+            return "\n".join(proxies)
+        except Exception as e:
+            logger.debug(f"Error parsing GitHub proxy list: {e}")
+            return ""
+
+# For scraping from proxy list APIs with JSON response
+class ProxyListApiScraper(Scraper):
+    """Scraper for APIs that return JSON proxy lists."""
+    
+    def _extract_proxy_from_item(self, item: dict) -> Optional[str]:
+        """Extract proxy string from a single item."""
+        if not isinstance(item, dict):
+            return None
+            
+        ip = item.get('ip')
+        port = item.get('port')
+        if ip and port:
+            return f"{ip}:{port}"
+        return None
+    
+    def _process_list_data(self, data: list) -> Set[str]:
+        """Process list-type JSON data."""
         proxies = set()
-        for prxy in tempproxies:
-            if self.method in prxy:
-                proxies.add(prxy.split("//")[-1])
-
-        return "\n".join(proxies)
-
+        for item in data:
+            proxy = self._extract_proxy_from_item(item)
+            if proxy:
+                proxies.add(proxy)
+        return proxies
+    
+    def _process_dict_data(self, data: dict) -> Set[str]:
+        """Process dict-type JSON data."""
+        proxies = set()
+        if 'data' in data and isinstance(data['data'], list):
+            for item in data['data']:
+                proxy = self._extract_proxy_from_item(item)
+                if proxy:
+                    proxies.add(proxy)
+        return proxies
 
+    async def handle(self, response: httpx.Response) -> str:
+        """Parse JSON API response for proxies."""
+        try:
+            data = response.json()
+            proxies: Set[str] = set()
+            
+            # Handle different JSON structures
+            if isinstance(data, list):
+                proxies = self._process_list_data(data)
+            elif isinstance(data, dict):
+                proxies = self._process_dict_data(data)
+                            
+            return "\n".join(proxies)
+        except Exception as e:
+            logger.debug(f"Error parsing JSON API response: {e}")
+            return ""
+
+# For scraping from plain text sources
+class PlainTextScraper(Scraper):
+    """Scraper for plain text proxy lists."""
+    
+    async def handle(self, response: httpx.Response) -> str:
+        """Parse plain text proxy list."""
+        try:
+            proxies: Set[str] = set()
+            lines = response.text.strip().split('\n')
+            
+            for line in lines:
+                line = line.strip()
+                if not line or line.startswith('#'):
+                    continue
+                    
+                # Look for IP:port pattern
+                if re.match(r"\d{1,3}(?:\.\d{1,3}){3}:\d{1,5}", line):
+                    proxies.add(line)
+                    
+            return "\n".join(proxies)
+        except Exception as e:
+            logger.debug(f"Error parsing plain text proxy list: {e}")
+            return ""
+
+
+# Improved scrapers list with better organization
 scrapers = [
+    # Direct API scrapers
     SpysMeScraper("http"),
     SpysMeScraper("socks"),
     ProxyScrapeScraper("http"),
     ProxyScrapeScraper("socks4"),
     ProxyScrapeScraper("socks5"),
     GeoNodeScraper("socks"),
+    
+    # Download API scrapers
     ProxyListDownloadScraper("https", "elite"),
     ProxyListDownloadScraper("http", "elite"),
     ProxyListDownloadScraper("http", "transparent"),
     ProxyListDownloadScraper("http", "anonymous"),
+    
+    # HTML table scrapers
     GeneralTableScraper("https", "http://sslproxies.org"),
     GeneralTableScraper("http", "http://free-proxy-list.net"),
     GeneralTableScraper("http", "http://us-proxy.org"),
     GeneralTableScraper("socks", "http://socks-proxy.net"),
+    
+    # HTML div scrapers
     GeneralDivScraper("http", "https://freeproxy.lunaproxy.com/"),
+    
+    # GitHub raw list scrapers (established sources)
     GitHubScraper("http", "https://raw.githubusercontent.com/proxifly/free-proxy-list/main/proxies/all/data.txt"),
     GitHubScraper("socks4", "https://raw.githubusercontent.com/proxifly/free-proxy-list/main/proxies/all/data.txt"),
     GitHubScraper("socks5", "https://raw.githubusercontent.com/proxifly/free-proxy-list/main/proxies/all/data.txt"),
@@ -165,78 +427,230 @@ async def handle(self, response):
     GitHubScraper("http", "https://raw.githubusercontent.com/zloi-user/hideip.me/main/http.txt"),
     GitHubScraper("socks4", "https://raw.githubusercontent.com/zloi-user/hideip.me/main/socks4.txt"),
     GitHubScraper("socks5", "https://raw.githubusercontent.com/zloi-user/hideip.me/main/socks5.txt"),
+    
+    # Additional GitHub sources
+    GitHubScraper("http", "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt"),
+    GitHubScraper("socks4", "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt"),
+    GitHubScraper("socks5", "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt"),
+    GitHubScraper("http", "https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt"),
+    GitHubScraper("https", "https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/https.txt"),
+    GitHubScraper("socks4", "https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt"),
+    GitHubScraper("socks5", "https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt"),
+    GitHubScraper("http", "https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-http.txt"),
+    GitHubScraper("https", "https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-https.txt"),
+    GitHubScraper("socks4", "https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks4.txt"),
+    GitHubScraper("socks5", "https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks5.txt"),
+    GitHubScraper("http", "https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt"),
+    GitHubScraper("http", "https://raw.githubusercontent.com/sunny9577/proxy-scraper/master/proxies.txt"),
+    GitHubScraper("http", "https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS_RAW.txt"),
+    GitHubScraper("socks4", "https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt"),
+    GitHubScraper("socks5", "https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt"),
+    GitHubScraper("http", "https://raw.githubusercontent.com/mmpx12/proxy-list/master/http.txt"),
+    GitHubScraper("https", "https://raw.githubusercontent.com/mmpx12/proxy-list/master/https.txt"),
+    GitHubScraper("socks4", "https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt"),
+    GitHubScraper("socks5", "https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt"),
+    
+    # Plain text sources
+    PlainTextScraper("http", "https://www.proxyscan.io/download?type=http"),
+    PlainTextScraper("socks4", "https://www.proxyscan.io/download?type=socks4"),
+    PlainTextScraper("socks5", "https://www.proxyscan.io/download?type=socks5"),
+    PlainTextScraper("http", "https://raw.githubusercontent.com/almroot/proxylist/master/list.txt"),
+    PlainTextScraper("http", "https://raw.githubusercontent.com/aslisk/proxyhttps/main/https.txt"),
+    PlainTextScraper("http", "https://raw.githubusercontent.com/proxy4parsing/proxy-list/main/http.txt"),
+    
+    # Additional table scrapers
+    GeneralTableScraper("http", "https://proxyspace.pro/http.txt"),
+    GeneralTableScraper("socks4", "https://proxyspace.pro/socks4.txt"),
+    GeneralTableScraper("socks5", "https://proxyspace.pro/socks5.txt"),
+    
+    # API-based scrapers
+    ProxyListApiScraper("http", "https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc&protocols=http"),
+    ProxyListApiScraper("socks5", "https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc&protocols=socks5"),
 ]
 
-def verbose_print(verbose, message):
+
+def verbose_print(verbose: bool, message: str) -> None:
+    """Print message if verbose mode is enabled."""
     if verbose:
         print(message)
 
-async def scrape(method, output, verbose):
-    now = time.time()
+
+def _determine_scraping_methods(method: str) -> List[str]:
+    """Determine which methods to scrape based on input."""
     methods = [method]
     if method == "socks":
-        methods += ["socks4", "socks5"]
+        methods.extend(["socks4", "socks5"])
+    return methods
+
+def _get_scrapers_for_methods(methods: List[str]) -> List:
+    """Get scrapers that match the specified methods."""
     proxy_scrapers = [s for s in scrapers if s.method in methods]
     if not proxy_scrapers:
-        raise ValueError("Method not supported")
-    verbose_print(verbose, "Scraping proxies...")
-    proxies = []
-
-    tasks = []
-    client = httpx.AsyncClient(follow_redirects=True)
+        raise ValueError(f"Methods '{methods}' not supported")
+    return proxy_scrapers
+
+def _create_http_client_config() -> Dict:
+    """Create HTTP client configuration."""
+    return {
+        "follow_redirects": True,
+        "timeout": 30.0,
+        "limits": httpx.Limits(max_keepalive_connections=20, max_connections=100),
+        "headers": {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+        },
+    }
+
+def _print_source_statistics(verbose: bool, source_stats: Dict) -> None:
+    """Print source statistics if verbose mode is enabled."""
+    if not verbose:
+        return
+        
+    print("\n📊 Source Statistics:")
+    print("-" * 50)
+    total_bad_filtered = 0
+    total_invalid_filtered = 0
+    for source, stats in source_stats.items():
+        print(f"{source}: {stats['valid']} valid, {stats['filtered_bad']} bad IPs, {stats['filtered_invalid']} invalid")
+        total_bad_filtered += stats['filtered_bad']
+        total_invalid_filtered += stats['filtered_invalid']
+    print(f"\nTotal filtered: {total_bad_filtered} bad IPs (CDN/etc), {total_invalid_filtered} invalid format")
+
+async def scrape(method: str, output: str, verbose: bool) -> None:
+    """
+    Main scraping function that coordinates all scrapers.
+    
+    Args:
+        method: Proxy type to scrape (http, https, socks, socks4, socks5)
+        output: Output file path
+        verbose: Enable verbose logging
+    """
+    start_time = time.time()
+    
+    # Setup scraping parameters
+    methods = _determine_scraping_methods(method)
+    proxy_scrapers = _get_scrapers_for_methods(methods)
+    client_config = _create_http_client_config()
+    
+    verbose_print(verbose, f"Scraping proxies using {len(proxy_scrapers)} sources...")
+    all_proxies: List[str] = []
+    source_stats: Dict[str, Dict[str, int]] = {}
 
-    async def scrape_scraper(scraper):
+    async def scrape_source(scraper, client) -> None:
+        """Scrape from a single source."""
         try:
-            verbose_print(verbose, f"Looking {scraper.get_url()}...")
-            proxies.extend(await scraper.scrape(client))
-        except Exception:
-            pass
-
-    for scraper in proxy_scrapers:
-        tasks.append(asyncio.ensure_future(scrape_scraper(scraper)))
-
-    await asyncio.gather(*tasks)
-    await client.aclose()
-
-    proxies = set(proxies)
-    verbose_print(verbose, f"Writing {len(proxies)} proxies to file...")
-    with open(output, "w") as f:
-        f.write("\n".join(proxies))
-    verbose_print(verbose, "Done!")
-    verbose_print(verbose, f"Took {time.time() - now} seconds")
-
-def main():
-    parser = argparse.ArgumentParser()
+            verbose_print(verbose, f"Scraping from {scraper.get_url()}...")
+            proxies, stats = await scraper.scrape(client)
+            all_proxies.extend(proxies)
+            source_stats[scraper.source_name] = stats
+            verbose_print(verbose, f"Found {len(proxies)} valid proxies from {scraper.source_name} ({stats['filtered_bad']} bad IPs filtered, {stats['filtered_invalid']} invalid filtered)")
+        except Exception as e:
+            logger.debug(f"Failed to scrape from {scraper.source_name}: {e}")
+            source_stats[scraper.source_name] = {"total": 0, "filtered_bad": 0, "filtered_invalid": 0, "valid": 0}
+
+    # Execute all scrapers concurrently
+    async with httpx.AsyncClient(**client_config) as client:
+        tasks = [scrape_source(scraper, client) for scraper in proxy_scrapers]
+        await asyncio.gather(*tasks, return_exceptions=True)
+
+    # Process results
+    unique_proxies: Set[str] = set(all_proxies)
+    _print_source_statistics(verbose, source_stats)
+
+    # Write results to file
+    verbose_print(verbose, f"Writing {len(unique_proxies)} unique proxies to {output}...")
+    try:
+        with open(output, "w", encoding="utf-8") as f:
+            f.write("\n".join(sorted(unique_proxies)) + "\n")
+    except IOError as e:
+        logger.error(f"Failed to write to output file {output}: {e}")
+        raise
+
+    elapsed_time = time.time() - start_time
+    verbose_print(verbose, f"Scraping completed in {elapsed_time:.2f} seconds")
+    verbose_print(verbose, f"Found {len(unique_proxies)} unique valid proxies")
+
+def _setup_argument_parser():
+    """Set up and return the argument parser."""
+    parser = argparse.ArgumentParser(
+        description="Scrape proxies from multiple sources",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s -p http -v                    # Scrape HTTP proxies with verbose output
+  %(prog)s -p socks -o socks.txt         # Scrape SOCKS proxies to custom file
+  %(prog)s -p https --verbose            # Scrape HTTPS proxies with verbose output
+        """,
+    )
+    
+    supported_methods = sorted(set(s.method for s in scrapers))
+    
     parser.add_argument(
-        "-p",
-        "--proxy",
-        help="Supported proxy type: " + ", ".join(sorted(set([s.method for s in scrapers]))),
+        "-p", "--proxy",
         required=True,
+        choices=supported_methods,
+        help=f"Proxy type to scrape. Supported types: {', '.join(supported_methods)}",
     )
     parser.add_argument(
-        "-o",
-        "--output",
-        help="Output file name to save .txt file",
+        "-o", "--output",
         default="output.txt",
+        help="Output file name to save proxies (default: %(default)s)",
     )
     parser.add_argument(
-        "-v",
-        "--verbose",
-        help="Increase output verbosity",
+        "-v", "--verbose",
         action="store_true",
+        help="Enable verbose output",
     )
-    args = parser.parse_args()
-
-    if sys.version_info >= (3, 7) and platform.system() == 'Windows':
-        loop = asyncio.get_event_loop()
-        loop.run_until_complete(scrape(args.proxy, args.output, args.verbose))
-        loop.close()
-    elif sys.version_info >= (3, 7):
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Enable debug logging",
+    )
+    
+    return parser
+
+def _configure_logging(args):
+    """Configure logging based on command line arguments."""
+    if args.debug:
+        logging.getLogger().setLevel(logging.DEBUG)
+    elif args.verbose:
+        logging.getLogger().setLevel(logging.INFO)
+    else:
+        logging.getLogger().setLevel(logging.WARNING)
+
+def _run_scraping(args):
+    """Run the scraping process with appropriate event loop handling."""
+    if sys.version_info >= (3, 7):
+        if platform.system() == 'Windows':
+            # Windows-specific asyncio policy for better compatibility
+            asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
         asyncio.run(scrape(args.proxy, args.output, args.verbose))
     else:
+        # Fallback for Python < 3.7
         loop = asyncio.get_event_loop()
-        loop.run_until_complete(scrape(args.proxy, args.output, args.verbose))
-        loop.close()
+        try:
+            loop.run_until_complete(scrape(args.proxy, args.output, args.verbose))
+        finally:
+            loop.close()
+
+def main() -> None:
+    """Main entry point for the proxy scraper."""
+    parser = _setup_argument_parser()
+    args = parser.parse_args()
+    
+    _configure_logging(args)
+
+    try:
+        _run_scraping(args)
+    except KeyboardInterrupt:
+        print("\nScraping interrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"Scraping failed: {e}")
+        if args.debug:
+            import traceback
+            traceback.print_exc()
+        sys.exit(1)
+
 
 if __name__ == "__main__":
     main()
diff --git a/requirements.txt b/requirements.txt
index 97b770a..c6a4ddd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,6 @@
-beautifulsoup4==4.11.1
-requests==2.27.1
-colorama==0.4.4
-urllib3==1.26.9
-httpx
-socks
-PySocks
\ No newline at end of file
+beautifulsoup4>=4.11.1,<5.0.0
+requests>=2.27.1,<3.0.0
+colorama>=0.4.4,<1.0.0
+urllib3>=1.26.9,<3.0.0
+httpx>=0.23.0,<1.0.0
+PySocks>=1.7.1,<2.0.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 575d218..8842a4b 100644
--- a/setup.py
+++ b/setup.py
@@ -2,12 +2,12 @@
 
 setup(
     name='proxyz',
-    version='0.2.0',
+    version='0.4.0',
     py_modules=['proxyScraper', 'proxyChecker'],
     install_requires=[
-        'httpx',
-        'beautifulsoup4',
-        'pysocks',
+        'httpx>=0.23.0,<1.0.0',
+        'beautifulsoup4>=4.11.1,<5.0.0',
+        'pysocks>=1.7.1,<2.0.0',
     ],
     entry_points={
         'console_scripts': [
@@ -21,14 +21,18 @@
     },
     author='Nima Akbarzadeh',
     author_email='iw4p@protonmail.com',
-    description='scrape proxies from more than 5 different sources and check which ones are still alive',
+    description='scrape proxies from more than 12 different sources and check which ones are still alive',
     long_description=open('README.md').read(),
     long_description_content_type='text/markdown',
     url='https://github.com/iw4p/proxy-scraper',
     classifiers=[
         'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
+        'Programming Language :: Python :: 3.11',
+        'Programming Language :: Python :: 3.12',
         'License :: OSI Approved :: MIT License',
         'Operating System :: OS Independent',
     ],
-    python_requires='>=3.7',
+    python_requires='>=3.9',
 )
diff --git a/user_agents.txt b/user_agents.txt
index ae82bd5..b9e20a0 100644
--- a/user_agents.txt
+++ b/user_agents.txt
@@ -1,3 +1,40 @@
+Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36
+Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36
+Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36
+Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36
+Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36
+Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36
+Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0
+Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0
+Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0
+Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:132.0) Gecko/20100101 Firefox/132.0
+Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0
+Mozilla/5.0 (X11; Linux x86_64; rv:132.0) Gecko/20100101 Firefox/132.0
+Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15
+Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15
+Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0
+Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0
+Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0
+Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 OPR/117.0.0.0
+Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 OPR/116.0.0.0
+Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 OPR/117.0.0.0
+Mozilla/5.0 (iPhone; CPU iPhone OS 18_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Mobile/15E148 Safari/604.1
+Mozilla/5.0 (iPhone; CPU iPhone OS 18_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Mobile/15E148 Safari/604.1
+Mozilla/5.0 (iPad; CPU OS 18_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Mobile/15E148 Safari/604.1
+Mozilla/5.0 (iPad; CPU OS 18_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Mobile/15E148 Safari/604.1
+Mozilla/5.0 (iPhone; CPU iPhone OS 18_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/131.0.6778.73 Mobile/15E148 Safari/604.1
+Mozilla/5.0 (iPhone; CPU iPhone OS 18_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/130.0.6723.90 Mobile/15E148 Safari/604.1
+Mozilla/5.0 (Android 14; Mobile; rv:133.0) Gecko/133.0 Firefox/133.0
+Mozilla/5.0 (Android 13; Mobile; rv:132.0) Gecko/132.0 Firefox/132.0
+Mozilla/5.0 (Linux; Android 14; SM-G998B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Mobile Safari/537.36
+Mozilla/5.0 (Linux; Android 13; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Mobile Safari/537.36
+Mozilla/5.0 (Linux; Android 14; Pixel 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Mobile Safari/537.36
+Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Mobile Safari/537.36
+Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36
+Mozilla/5.0 (Windows NT 11.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0
+Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0
+Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0
+Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:132.0) Gecko/20100101 Firefox/132.0
 Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/37.0.2062.94 Chrome/37.0.2062.94 Safari/537.36
 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36
 Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko

From 0c1b4db03e07ded28fb92cdf0356b30d626c9f9a Mon Sep 17 00:00:00 2001
From: Dikky Hardian <30888372+FosterG4@users.noreply.github.com>
Date: Sat, 26 Jul 2025 01:34:17 +0700
Subject: [PATCH 2/5] Update setup.py

version mismatch
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 8842a4b..4f12769 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='proxyz',
-    version='0.4.0',
+    version='0.3.0',
     py_modules=['proxyScraper', 'proxyChecker'],
     install_requires=[
         'httpx>=0.23.0,<1.0.0',

From 364f155f18e3709b95494233c85b8650ff39fb8a Mon Sep 17 00:00:00 2001
From: FosterG4 <dikkyhardian@gmail.com>
Date: Sat, 26 Jul 2025 01:45:49 +0700
Subject: [PATCH 3/5] Fix Unicode encoding issues and update CI workflow

- Replace emoji characters with ASCII equivalents in all Python files
- Prevents UnicodeEncodeError in Windows CI environment
- Update CI workflow to use Python 3.8-3.12 (3.7 no longer available)
- Update GitHub Actions to latest versions (checkout@v4, setup-python@v4)
- Ensures cross-platform compatibility for all CI environments
---
 .github/workflows/tests.yml | 12 ++++++------
 proxyChecker.py             | 28 ++++++++++++++--------------
 proxyScraper.py             |  2 +-
 3 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 3004d06..0b962d4 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -15,20 +15,20 @@ jobs:
         os:
           - ubuntu-latest
         python-version:
-          - '3.7'
           - '3.8'
           - '3.9'
           - '3.10'
-          - 'pypy-3.8'
+          - '3.11'
+          - '3.12'
         include:
           - os: windows-latest
-            python-version: '3.10'
+            python-version: '3.11'
           - os: macos-latest
-            python-version: '3.10'
+            python-version: '3.11'
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Install Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
diff --git a/proxyChecker.py b/proxyChecker.py
index 6e3ee80..c1fa9a8 100644
--- a/proxyChecker.py
+++ b/proxyChecker.py
@@ -17,7 +17,7 @@
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 
-#fallback user agents (will be extended from user_agents.txt if available)
+# Fallback user agents (will be extended from user_agents.txt if available)
 user_agents = [
     "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
     "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0",
@@ -153,7 +153,7 @@ def _check_socks_proxy(self, site: str, timeout: int, verbose: bool, start_time:
                 end_time = time()
                 time_taken = end_time - start_time
                 
-                verbose_print(verbose, f"✓ Proxy {self.proxy} ({self.method.upper()}) is valid, time: {time_taken:.2f}s")
+                verbose_print(verbose, f"[+] Proxy {self.proxy} ({self.method.upper()}) is valid, time: {time_taken:.2f}s")
                 return True, time_taken, None
                 
             finally:
@@ -162,7 +162,7 @@ def _check_socks_proxy(self, site: str, timeout: int, verbose: bool, start_time:
                 
         except Exception as e:
             socket.socket = original_socket  # Ensure cleanup even on error
-            verbose_print(verbose, f"✗ Proxy {self.proxy} ({self.method.upper()}) failed: {e}")
+            verbose_print(verbose, f"[-] Proxy {self.proxy} ({self.method.upper()}) failed: {e}")
             return False, 0.0, e
 
     def _check_http_proxy(self, site: str, timeout: int, user_agent: str, verbose: bool, start_time: float) -> Tuple[bool, float, Optional[Exception]]:
@@ -190,11 +190,11 @@ def _check_http_proxy(self, site: str, timeout: int, user_agent: str, verbose: b
             end_time = time()
             time_taken = end_time - start_time
             
-            verbose_print(verbose, f"✓ Proxy {self.proxy} ({self.method.upper()}) is valid, time: {time_taken:.2f}s")
+            verbose_print(verbose, f"[+] Proxy {self.proxy} ({self.method.upper()}) is valid, time: {time_taken:.2f}s")
             return True, time_taken, None
             
         except Exception as e:
-            verbose_print(verbose, f"✗ Proxy {self.proxy} ({self.method.upper()}) failed: {e}")
+            verbose_print(verbose, f"[-] Proxy {self.proxy} ({self.method.upper()}) failed: {e}")
             return False, 0.0, e
 
     def __str__(self) -> str:
@@ -406,7 +406,7 @@ def check(file: str, timeout: int, method: str, site: str, verbose: bool, random
     print(f"Average time per proxy: {elapsed_time/len(proxies):.2f}s")
     
     if len(valid_proxies) == 0:
-        print("⚠️  No working proxies found. Consider:")
+        print("WARNING: No working proxies found. Consider:")
         print("   - Increasing timeout value")
         print("   - Trying a different target site")
         print("   - Using fresh proxy list")
@@ -519,13 +519,13 @@ def main() -> None:
     site = _configure_logging_and_validate_args(args)
     
     # Display startup information
-    print("🔍 Proxy Checker v2.0")
-    print(f"📁 Proxy file: {args.list}")
-    print(f"🎯 Target site: {site}")
-    print(f"⏱️  Timeout: {args.timeout}s")
-    print(f"🔧 Method: {args.proxy.upper()}")
-    print(f"🧵 Max threads: {args.max_threads}")
-    print(f"👤 User agents: {len(user_agents)} available")
+    print("*** Proxy Checker v2.0 ***")
+    print(f"Proxy file: {args.list}")
+    print(f"Target site: {site}")
+    print(f"Timeout: {args.timeout}s")
+    print(f"Method: {args.proxy.upper()}")
+    print(f"Max threads: {args.max_threads}")
+    print(f"User agents: {len(user_agents)} available")
     print("=" * 60)
     
     try:
@@ -539,7 +539,7 @@ def main() -> None:
         )
         
     except KeyboardInterrupt:
-        print("\n⚠️  Operation interrupted by user")
+        print("\nWARNING: Operation interrupted by user")
         sys.exit(1)
     except Exception as e:
         logger.error(f"Proxy checking failed: {e}")
diff --git a/proxyScraper.py b/proxyScraper.py
index c774462..ec5c56f 100644
--- a/proxyScraper.py
+++ b/proxyScraper.py
@@ -505,7 +505,7 @@ def _print_source_statistics(verbose: bool, source_stats: Dict) -> None:
     if not verbose:
         return
         
-    print("\n📊 Source Statistics:")
+    print("\n*** Source Statistics ***")
     print("-" * 50)
     total_bad_filtered = 0
     total_invalid_filtered = 0

From 86d0f7833b9fce7b78523ce59ca5d5bac82b2a27 Mon Sep 17 00:00:00 2001
From: FosterG4 <dikkyhardian@gmail.com>
Date: Sat, 26 Jul 2025 23:21:54 +0700
Subject: [PATCH 4/5] Fix complexity issues and update ProxyScrape API to v4

---
 .flake8                     |   4 +-
 .github/workflows/tests.yml |   2 +-
 README.md                   |  99 ++++++++--
 proxyChecker.py             |  84 ++++++---
 proxyScraper.py             | 367 +++++++++++++++++++++++-------------
 5 files changed, 372 insertions(+), 184 deletions(-)

diff --git a/.flake8 b/.flake8
index 71dc6e8..fb8bfa7 100644
--- a/.flake8
+++ b/.flake8
@@ -1,8 +1,8 @@
 [flake8]
 exclude = .git,__pycache__,env,venv,.eggs,.tox,.nox,build,dist
-max-line-lenght = 120
+max-line-length = 120
 max-complexity = 8
 ignore = W,BLK,
-    E24,E121,E123,E125,E126,E221,E226,E266,E704,
+    E24,E121,E123,E126,E221,E226,E266,E704,
     E265,E722,E501,E731,E306,E401,E302,E222,E303,
     E402,E305,E261,E262,E203,N816
\ No newline at end of file
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 0b962d4..44e36d8 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -41,6 +41,6 @@ jobs:
       - name: Run proxyScraper
         run: python3 proxyScraper.py -p http
       - name: Run proxyChecker
-        run: python3 proxyChecker.py -t 20 -s google.com -l output.txt
+        run: python3 proxyChecker.py -t 20 -s google.com -l output.txt --limit 10
       - name: Run proxyGeolocation
         run: python3 proxyGeolocation.py -i 8.8.8.8
\ No newline at end of file
diff --git a/README.md b/README.md
index cef17f5..f3b69b9 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 [![Tests](https://github.com/iw4p/proxy-scraper/actions/workflows/tests.yml/badge.svg)](https://github.com/iw4p/proxy-scraper/actions/workflows/tests.yml)
 [![Downloads](https://static.pepy.tech/badge/proxyz)](https://pepy.tech/project/proxyz)
 
-**Fast, reliable proxy scraper that collects 30K+ HTTP/HTTPS/SOCKS proxies from 24+ sources in seconds.**
+**Fast, reliable proxy scraper that collects 30K+ HTTP/HTTPS/SOCKS proxies from 5+ sources in seconds.**
 
 ✨ **Features:**
 - ⚡ **Fast scraping** - All sources scraped concurrently  
@@ -56,18 +56,49 @@ python proxyChecker.py --help
 # Get HTTP proxies (basic)
 proxy_scraper -p http
 
-# Get SOCKS5 proxies with detailed output
-proxy_scraper -p socks5 -v
+# Get HTTPS proxies
+proxy_scraper -p https
 
-# Save to custom file
-proxy_scraper -p http -o my_proxies.txt -v
+# Get SOCKS4 proxies
+proxy_scraper -p socks4
+
+# Get SOCKS5 proxies
+proxy_scraper -p socks5
+
+# Get all SOCKS proxies (SOCKS4 + SOCKS5)
+proxy_scraper -p socks
+
+# Save to custom file (example: HTTP)
+proxy_scraper -p http -o output.txt -v
+
+# Save HTTPS proxies with verbose output
+proxy_scraper -p https -v -o output.txt
+
+# Save SOCKS4 proxies
+proxy_scraper -p socks4 -o output.txt
+
+# Save SOCKS5 proxies
+proxy_scraper -p socks5 -o output.txt
 ```
 
+
 ### Step 2: Check Proxy Quality
 ```bash
-# Test scraped proxies (basic)
+# Test scraped HTTP proxies (basic)
 proxy_checker -l output.txt -t 10
 
+# Test HTTP proxies
+proxy_checker -p http -l output.txt -t 10
+
+# Test HTTPS proxies
+proxy_checker -p https -l output.txt -t 10
+
+# Test SOCKS4 proxies
+proxy_checker -p socks4 -l output.txt -t 10
+
+# Test SOCKS5 proxies
+proxy_checker -p socks5 -l output.txt -t 10
+
 # Test against specific site with verbose output
 proxy_checker -l output.txt -s https://google.com -v
 
@@ -78,12 +109,30 @@ proxy_checker -l output.txt -r -v
 ### Step 3: Complete Workflow Example
 ```bash
 # 1. Scrape HTTP proxies
-proxy_scraper -p http -v -o fresh_proxies.txt
+proxy_scraper -p http -v -o output.txt
+
+# 2. Scrape HTTPS proxies
+proxy_scraper -p https -v -o output.txt
+
+# 3. Scrape SOCKS4 proxies
+proxy_scraper -p socks4 -v -o output.txt
+
+# 4. Scrape SOCKS5 proxies
+proxy_scraper -p socks5 -v -o output.txt
+
+# 5. Check HTTP proxies
+proxy_checker -l output.txt -t 15 -v
 
-# 2. Check their quality
-proxy_checker -l fresh_proxies.txt -t 15 -v
+# 6. Check HTTPS proxies
+proxy_checker -l output.txt -t 15 -v
 
-# 3. Result: output.txt contains only working proxies
+# 7. Check SOCKS4 proxies
+proxy_checker -l output.txt -t 15 -v
+
+# 8. Check SOCKS5 proxies
+proxy_checker -l output.txt -t 15 -v
+
+# 9. Result: output.txt contains only working proxies (for each type)
 ```
 
 ## Supported Proxy Types
@@ -100,14 +149,14 @@ We collect proxies from **24 sources**:
 - spys.me, free-proxy-list.net, proxyscrape.com, geonode.com
 - sslproxies.org, us-proxy.org, socks-proxy.net  
 - proxy-list.download, proxyscan.io, proxyspace.pro
-- freeproxy.lunaproxy.com
+- freeproxy.lunaproxy.com, more
 
 **📦 GitHub Repositories (13 sources)**  
 - proxifly/free-proxy-list, monosans/proxy-list, TheSpeedX/PROXY-List
 - jetkai/proxy-list, roosterkid/openproxylist, mmpx12/proxy-list
 - ShiftyTR/Proxy-List, clarketm/proxy-list, sunny9577/proxy-scraper
 - zloi-user/hideip.me, almroot/proxylist, aslisk/proxyhttps
-- proxy4parsing/proxy-list
+- proxy4parsing/proxy-list, more
 
 ## Advanced Usage
 
@@ -121,6 +170,8 @@ Options:
   -p, --proxy     Proxy type: http, https, socks, socks4, socks5
   -o, --output    Output file (default: output.txt)  
   -v, --verbose   Show detailed statistics
+  -l, --list      Input proxy file (default: output.txt)
+  -h, --help      Show this help message
 ```
 
 **Checking:**
@@ -129,10 +180,13 @@ proxy_checker [-l input.txt] [-t timeout] [-s site] [-v]
 
 Options:
   -l, --list      Input proxy file (default: output.txt)
+  -p, --proxy     Proxy type: http, https, socks, socks4, socks5
+  -o, --output    Output file (default: output.txt)
   -t, --timeout   Timeout in seconds (default: 20)
   -s, --site      Test site (default: https://google.com)
   -r, --random_agent  Use random user agents
   -v, --verbose   Show detailed progress
+  --max-threads  Maximum concurrent threads (default: 10)
 ```
 
 ### From Source Code
@@ -160,15 +214,20 @@ python proxyChecker.py -l output.txt -v
 
 ## Example Output
 ```bash
-Scraping proxies using 24 sources...
-📊 Source Statistics:
+*** Source Statistics ***
 --------------------------------------------------
-ProxyScrapeScraper: 18769 valid, 16408 bad IPs filtered  
-PlainTextScraper: 13516 valid, 5515 bad IPs filtered
-GitHubScraper: 1767 valid, 739 bad IPs filtered
-...
-Total filtered: 22177 bad IPs (CDN/etc), 1 invalid format
-Found 30938 unique valid proxies
+PlainTextScraper: 0 valid, 0 bad IPs, 0 invalid
+GeneralTableScraper: 0 valid, 0 bad IPs, 0 invalid
+ProxyScrapeScraper: 1666 valid, 334 bad IPs, 0 invalid
+GitHubScraper: 0 valid, 0 bad IPs, 0 invalid
+ProxyListApiScraper: 261 valid, 0 bad IPs, 0 invalid
+GeneralDivScraper: 0 valid, 0 bad IPs, 0 invalid
+SpysMeScraper: 400 valid, 0 bad IPs, 0 invalid
+
+Total filtered: 334 bad IPs (CDN/etc), 0 invalid format
+Writing 37030 unique proxies to output.txt...
+Scraping completed in 13.13 seconds
+Found 37030 unique valid proxies
 ```
 
 ## 🌍 Proxy Geolocation & Analysis
diff --git a/proxyChecker.py b/proxyChecker.py
index c1fa9a8..f0398b3 100644
--- a/proxyChecker.py
+++ b/proxyChecker.py
@@ -238,13 +238,14 @@ def _read_proxy_file(file_path: str) -> List[str]:
         sys.exit(1)
 
 
-def load_proxies_from_file(file_path: str, method: str) -> List[Proxy]:
+def load_proxies_from_file(file_path: str, method: str, limit: Optional[int] = None) -> List[Proxy]:
     """
     Load proxies from file and create Proxy objects.
     
     Args:
         file_path: Path to proxy list file
         method: Proxy method to use
+        limit: Maximum number of proxies to load (None for all)
         
     Returns:
         List of valid Proxy objects
@@ -255,19 +256,22 @@ def load_proxies_from_file(file_path: str, method: str) -> List[Proxy]:
     lines = _read_proxy_file(file_path)
     
     for line_num, line in enumerate(lines, 1):
+        # Check if we've reached the limit
+        if limit is not None and len(proxies) >= limit:
+            logger.info(f"Reached limit of {limit} proxies, stopping load")
+            break
+            
         proxy = _process_proxy_line(line, line_num, method)
         if proxy is not None:
             proxies.append(proxy)
         else:
             if line.strip() and not line.strip().startswith('#'):
                 invalid_count += 1
-    
+
     if invalid_count > 0:
         logger.warning(f"Skipped {invalid_count} invalid proxy entries")
         
     return proxies
-
-
 def save_valid_proxies(file_path: str, valid_proxies: List[Proxy]) -> None:
     """
     Save valid proxies back to file.
@@ -291,10 +295,10 @@ def save_valid_proxies(file_path: str, valid_proxies: List[Proxy]) -> None:
         raise
 
 
-def _prepare_checking_environment(file: str, method: str, site: str, timeout: int, random_user_agent: bool) -> Tuple[List[Proxy], str, int]:
+def _prepare_checking_environment(file: str, method: str, site: str, timeout: int, random_user_agent: bool, limit: Optional[int] = None) -> Tuple[List[Proxy], str, int]:
     """Prepare the environment for proxy checking."""
     print(f"Loading proxies from {file}...")
-    proxies = load_proxies_from_file(file, method)
+    proxies = load_proxies_from_file(file, method, limit)
     print(f"Loaded {len(proxies)} valid proxies for checking")
     
     if not proxies:
@@ -346,7 +350,7 @@ def check_single_proxy(proxy: Proxy) -> None:
     return check_single_proxy
 
 
-def check(file: str, timeout: int, method: str, site: str, verbose: bool, random_user_agent: bool) -> None:
+def check(file: str, timeout: int, method: str, site: str, verbose: bool, random_user_agent: bool, limit: Optional[int] = None) -> None:
     """
     Main proxy checking function.
     
@@ -357,12 +361,13 @@ def check(file: str, timeout: int, method: str, site: str, verbose: bool, random
         site: Target website for testing
         verbose: Enable verbose output
         random_user_agent: Use random user agent per proxy
+        limit: Maximum number of proxies to check
     """
     start_time = time()
     
     # Prepare checking environment
     proxies, base_user_agent, max_threads = _prepare_checking_environment(
-        file, method, site, timeout, random_user_agent,
+        file, method, site, timeout, random_user_agent, limit,
     )
     
     if not proxies:
@@ -379,24 +384,12 @@ def check(file: str, timeout: int, method: str, site: str, verbose: bool, random
         random_user_agent, base_user_agent, len(proxies), verbose,
     )
     
-    # Execute checking with thread pool
-    with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
-        futures = [executor.submit(check_single_proxy, proxy) for proxy in proxies]
-        
-        try:
-            concurrent.futures.wait(futures, timeout=None)
-        except KeyboardInterrupt:
-            print("\nChecking interrupted by user")
-            executor.shutdown(wait=False)
-            return
-    
-    # Save results
-    save_valid_proxies(file, valid_proxies)
-    
-    # Final statistics
+    _run_proxy_check_threadpool(
+        check_single_proxy, proxies, valid_proxies, checked_count_ref, file, start_time,
+    )
     elapsed_time = time() - start_time
+    # Final statistics
     success_rate = (len(valid_proxies) / len(proxies)) * 100 if proxies else 0
-    
     print("-" * 60)
     print("Proxy checking completed!")
     print(f"Total checked: {len(proxies)}")
@@ -404,7 +397,6 @@ def check(file: str, timeout: int, method: str, site: str, verbose: bool, random
     print(f"Success rate: {success_rate:.1f}%")
     print(f"Time taken: {elapsed_time:.2f} seconds")
     print(f"Average time per proxy: {elapsed_time/len(proxies):.2f}s")
-    
     if len(valid_proxies) == 0:
         print("WARNING: No working proxies found. Consider:")
         print("   - Increasing timeout value")
@@ -412,6 +404,32 @@ def check(file: str, timeout: int, method: str, site: str, verbose: bool, random
         print("   - Using fresh proxy list")
 
 
+def _run_proxy_check_threadpool(check_single_proxy, proxies, valid_proxies, checked_count_ref, file, start_time):
+    """Helper to run proxy checking in a thread pool, handles KeyboardInterrupt and saving."""
+    executor = None
+    try:
+        executor = concurrent.futures.ThreadPoolExecutor(max_workers=min(len(proxies), 100))
+        futures = [executor.submit(check_single_proxy, proxy) for proxy in proxies]
+        for _ in concurrent.futures.as_completed(futures):
+            pass
+    except KeyboardInterrupt:
+        print("\n[!] Proxy checking cancelled by user. Stopping threads and saving progress...")
+        if executor is not None:
+            try:
+                executor.shutdown(wait=False, cancel_futures=True)
+            except Exception:
+                pass
+        save_valid_proxies(file, valid_proxies)
+        elapsed_time = time() - start_time
+        print("-" * 60)
+        print(f"Check cancelled. {len(valid_proxies)} valid proxies saved to {file}.")
+        print(f"Checked: {checked_count_ref[0]} / {len(proxies)} | Time: {elapsed_time:.2f}s")
+        sys.exit(130)
+    if executor is not None:
+        executor.shutdown(wait=True)
+    save_valid_proxies(file, valid_proxies)
+
+
 def _setup_argument_parser() -> argparse.ArgumentParser:
     """Set up and configure the argument parser."""
     parser = argparse.ArgumentParser(
@@ -422,11 +440,15 @@ def _setup_argument_parser() -> argparse.ArgumentParser:
   %(prog)s -p http -t 10 -v                    # Check HTTP proxies with 10s timeout
   %(prog)s -p socks4 -l socks.txt -r           # Check SOCKS4 with random user agents
   %(prog)s -p https -s httpbin.org/ip --debug  # Check HTTPS proxies against custom site
-
+  %(prog)s -p http --limit 50 -v               # Check only the first 50 HTTP proxies
+  %(prog)s -p socks5 -l proxies.txt -t 30 --max-threads 20 # Check SOCKS5 proxies with 30s timeout and 20 threads
 Notes:
   - Dead proxies are automatically removed from the list file
   - Use --debug for detailed error information
   - Higher timeout values may find more working proxies but take longer
+  - Use --limit for quick testing or when you don't want to check all proxies
+  - Random user agents can help avoid detection by target sites
+  - Use --max-threads to control concurrency, default is 10
         """,
     )
     
@@ -470,9 +492,14 @@ def _setup_argument_parser() -> argparse.ArgumentParser:
     parser.add_argument(
         "--max-threads",
         type=int,
-        default=100,
+        default=10,
         help="Maximum number of concurrent threads (default: %(default)s)",
     )
+    parser.add_argument(
+        "--limit",
+        type=int,
+        help="Maximum number of proxies to check (default: check all)",
+    )
     
     return parser
 
@@ -525,6 +552,8 @@ def main() -> None:
     print(f"Timeout: {args.timeout}s")
     print(f"Method: {args.proxy.upper()}")
     print(f"Max threads: {args.max_threads}")
+    if args.limit:
+        print(f"Limit: {args.limit} proxies")
     print(f"User agents: {len(user_agents)} available")
     print("=" * 60)
     
@@ -536,6 +565,7 @@ def main() -> None:
             site=site,
             verbose=args.verbose,
             random_user_agent=args.random_agent,
+            limit=args.limit,
         )
         
     except KeyboardInterrupt:
diff --git a/proxyScraper.py b/proxyScraper.py
index ec5c56f..24a2062 100644
--- a/proxyScraper.py
+++ b/proxyScraper.py
@@ -7,6 +7,7 @@
 import sys
 import time
 from typing import Dict, List, Optional, Set, Tuple
+from urllib.parse import urlparse
 
 import httpx
 from bs4 import BeautifulSoup
@@ -15,6 +16,44 @@
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 
+# --- Module-level helpers for source statistics ---
+def _extract_domain(url):
+    """Extract domain from URL for statistics."""
+    try:
+        domain = urlparse(url).netloc or urlparse('//' + url).netloc
+        if not domain:
+            domain = url
+    except Exception:
+        domain = url
+    return domain
+
+def _aggregate_domain_stats(source_stats):
+    """Aggregate statistics by domain."""
+    total_bad_filtered = 0
+    total_invalid_filtered = 0
+    domain_valid = {}
+    skipped = 0
+    for source, stats in source_stats.items():
+        url = source.split(": ", 1)[-1]
+        domain = _extract_domain(url)
+        if stats['valid'] > 0:
+            domain_valid[domain] = domain_valid.get(domain, 0) + stats['valid']
+        else:
+            skipped += 1
+        total_bad_filtered += stats['filtered_bad']
+        total_invalid_filtered += stats['filtered_invalid']
+    return domain_valid, skipped, total_bad_filtered, total_invalid_filtered
+
+def _print_summary(domain_valid, skipped, total_bad_filtered, total_invalid_filtered):
+    """Print formatted statistics summary."""
+    print("\n*** Source Statistics ***")
+    print("-" * 50)
+    for domain, valid_count in sorted(domain_valid.items(), key=lambda x: -x[1]):
+        print(f"{valid_count} valid from {domain}")
+    if skipped:
+        print(f"...{skipped} sources returned 0 valid proxies and are hidden...")
+    print(f"\nTotal filtered: {total_bad_filtered} bad IPs (CDN/etc), {total_invalid_filtered} invalid format")
+
 # Known bad IP ranges to filter out (Cloudflare, major CDNs, etc.)
 BAD_IP_RANGES = [
     # Cloudflare
@@ -100,41 +139,41 @@ def filter_proxies(self, proxy_text: str) -> Tuple[Set[str], Dict[str, int]]:
             line = line.strip()
             if not line:
                 continue
-                
+
             stats["total"] += 1
-            
+
             # Basic format validation
             if ':' not in line:
                 stats["filtered_invalid"] += 1
                 continue
-                
+
             try:
                 ip, port = line.split(':', 1)
                 ip = ip.strip()
                 port = port.strip()
-                
+
                 # Validate IP format
                 ipaddress.ip_address(ip)
-                
+
                 # Validate port
                 port_num = int(port)
                 if not (1 <= port_num <= 65535):
                     stats["filtered_invalid"] += 1
                     continue
-                
+
                 # Check if it's a bad IP (CDN, etc.)
                 if is_bad_ip(ip):
                     stats["filtered_bad"] += 1
                     logger.debug(f"Filtered bad IP from {self.source_name}: {ip}:{port}")
                     continue
-                
+
                 proxies.add(f"{ip}:{port}")
                 stats["valid"] += 1
-                
+
             except (ValueError, ipaddress.AddressValueError):
                 stats["filtered_invalid"] += 1
                 continue
-        
+
         return proxies, stats
 
     async def scrape(self, client: httpx.AsyncClient) -> Tuple[List[str], Dict[str, int]]:
@@ -171,48 +210,56 @@ def get_url(self, **kwargs) -> str:
             raise NotImplementedError(f"Method {self.method} not supported by SpysMeScraper")
         return super().get_url(mode=mode, **kwargs)
 
+    async def handle(self, response: httpx.Response) -> str:
+        """Parse spys.me format to extract only IP:port."""
+        try:
+            lines = response.text.strip().split('\n')
+            proxies: Set[str] = set()
+            
+            for line in lines:
+                line = line.strip()
+                if not line:
+                    continue
+
+                # Skip header lines and comments
+                if (line.startswith('Proxy list') or
+                        line.startswith('Socks proxy=') or
+                        line.startswith('Support by') or
+                        line.startswith('BTC ') or
+                        line.startswith('IP address:Port') or
+                        line.startswith('#')):
+                    continue
+                
+                # Extract IP:port from lines like "89.58.55.193:80 DE-A + "
+                # The format is: IP:PORT COUNTRY-ANONYMITY-SSL GOOGLE_PASSED
+                parts = line.split()
+                if parts and ':' in parts[0]:
+                    proxy = parts[0].strip()
+                    # Validate IP:port format
+                    if re.match(r"\d{1,3}(?:\.\d{1,3}){3}:\d{1,5}", proxy):
+                        proxies.add(proxy)
+            
+            return "\n".join(proxies)
+        except Exception as e:
+            logger.debug(f"Error parsing spys.me format: {e}")
+            return ""
+
 
 # From proxyscrape.com
 class ProxyScrapeScraper(Scraper):
-    """Scraper for proxyscrape.com API."""
+    """Scraper for proxyscrape.com v4 API."""
 
-    def __init__(self, method: str, timeout: int = 1000, country: str = "All"):
-        self.api_timeout = timeout  # Renamed to avoid confusion with HTTP timeout
+    def __init__(self, method: str, country: str = "all"):
         self.country = country
         super().__init__(method,
-                         "https://api.proxyscrape.com/?request=getproxies"
-                         "&proxytype={method}"
-                         "&timeout={api_timeout}"
-                         "&country={country}", 
-                         timeout=20)  # HTTP timeout
+                         "https://api.proxyscrape.com/v4/free-proxy-list/get?"
+                         "request=display_proxies&proxy_format=ipport&format=text"
+                         "&protocol={method}&country={country}", 
+                         timeout=20)
 
     def get_url(self, **kwargs) -> str:
         """Get URL with API parameters."""
-        return super().get_url(api_timeout=self.api_timeout, country=self.country, **kwargs)
-
-# From geonode.com - A little dirty, grab http(s) and socks but use just for socks
-class GeoNodeScraper(Scraper):
-    """Scraper for geonode.com proxy API."""
-
-    def __init__(self, method: str, limit: str = "500", page: str = "1", 
-                 sort_by: str = "lastChecked", sort_type: str = "desc"):
-        self.limit = limit
-        self.page = page
-        self.sort_by = sort_by
-        self.sort_type = sort_type
-        super().__init__(method,
-                         "https://proxylist.geonode.com/api/proxy-list?"
-                         "&limit={limit}"
-                         "&page={page}"
-                         "&sort_by={sort_by}"
-                         "&sort_type={sort_type}",
-                         timeout=15)
-
-    def get_url(self, **kwargs) -> str:
-        """Get URL with API parameters."""
-        return super().get_url(limit=self.limit, page=self.page, 
-                               sort_by=self.sort_by, sort_type=self.sort_type, **kwargs)
-
+        return super().get_url(country=self.country, **kwargs)
 
 # From proxy-list.download
 class ProxyListDownloadScraper(Scraper):
@@ -321,29 +368,27 @@ class ProxyListApiScraper(Scraper):
     """Scraper for APIs that return JSON proxy lists."""
     
     def _extract_proxy_from_item(self, item: dict) -> Optional[str]:
-        """Extract proxy string from a single item."""
+        """Extract proxy string from a single item for new www.proxy-list.download format."""
         if not isinstance(item, dict):
             return None
-            
-        ip = item.get('ip')
-        port = item.get('port')
+        # Support both old and new keys
+        ip = item.get('ip') or item.get('IP')
+        port = item.get('port') or item.get('PORT')
         if ip and port:
             return f"{ip}:{port}"
         return None
-    
-    def _process_list_data(self, data: list) -> Set[str]:
-        """Process list-type JSON data."""
-        proxies = set()
-        for item in data:
-            proxy = self._extract_proxy_from_item(item)
-            if proxy:
-                proxies.add(proxy)
-        return proxies
-    
+
     def _process_dict_data(self, data: dict) -> Set[str]:
-        """Process dict-type JSON data."""
+        """Process dict-type JSON data for new www.proxy-list.download format."""
         proxies = set()
-        if 'data' in data and isinstance(data['data'], list):
+        # New format: proxies are in 'LISTA' key
+        if 'LISTA' in data and isinstance(data['LISTA'], list):
+            for item in data['LISTA']:
+                proxy = self._extract_proxy_from_item(item)
+                if proxy:
+                    proxies.add(proxy)
+        # Fallback for old format
+        elif 'data' in data and isinstance(data['data'], list):
             for item in data['data']:
                 proxy = self._extract_proxy_from_item(item)
                 if proxy:
@@ -351,22 +396,41 @@ def _process_dict_data(self, data: dict) -> Set[str]:
         return proxies
 
     async def handle(self, response: httpx.Response) -> str:
-        """Parse JSON API response for proxies."""
+        """Parse JSON API response for proxies (new and old format)."""
         try:
             data = response.json()
             proxies: Set[str] = set()
-            
-            # Handle different JSON structures
-            if isinstance(data, list):
-                proxies = self._process_list_data(data)
-            elif isinstance(data, dict):
+            if isinstance(data, dict):
                 proxies = self._process_dict_data(data)
-                            
             return "\n".join(proxies)
         except Exception as e:
             logger.debug(f"Error parsing JSON API response: {e}")
             return ""
 
+# Helper functions for PlainTextScraper
+def _is_protocol_match(protocol: str, method: str) -> bool:
+    """Check if protocol matches the scraper method."""
+    return (protocol.lower() == method.lower() or 
+            (method == "socks" and protocol.lower() in ["socks4", "socks5"]))
+
+def _is_valid_proxy_format(address: str) -> bool:
+    """Validate IP:port format."""
+    return bool(re.match(r"\d{1,3}(?:\.\d{1,3}){3}:\d{1,5}", address))
+
+def _process_protocol_line(line: str, method: str) -> Optional[str]:
+    """Process a line with protocol://ip:port format."""
+    protocol, address = line.split("://", 1)
+    if _is_protocol_match(protocol, method):
+        if _is_valid_proxy_format(address):
+            return address
+    return None
+
+def _process_plain_line(line: str) -> Optional[str]:
+    """Process a plain IP:port line."""
+    if _is_valid_proxy_format(line):
+        return line
+    return None
+
 # For scraping from plain text sources
 class PlainTextScraper(Scraper):
     """Scraper for plain text proxy lists."""
@@ -381,91 +445,129 @@ async def handle(self, response: httpx.Response) -> str:
                 line = line.strip()
                 if not line or line.startswith('#'):
                     continue
-                    
-                # Look for IP:port pattern
-                if re.match(r"\d{1,3}(?:\.\d{1,3}){3}:\d{1,5}", line):
-                    proxies.add(line)
-                    
+                
+                # Handle protocol://ip:port format (ProxyScrape v4 API)
+                if "://" in line:
+                    proxy = _process_protocol_line(line, self.method)
+                    if proxy:
+                        proxies.add(proxy)
+                else:
+                    # Look for plain IP:port pattern (legacy format)
+                    proxy = _process_plain_line(line)
+                    if proxy:
+                        proxies.add(proxy)
+                        
             return "\n".join(proxies)
         except Exception as e:
             logger.debug(f"Error parsing plain text proxy list: {e}")
             return ""
 
 
-# Improved scrapers list with better organization
+# Latest and most frequently updated proxy sources (2025)
 scrapers = [
-    # Direct API scrapers
+    # Primary API scrapers (most reliable)
     SpysMeScraper("http"),
     SpysMeScraper("socks"),
     ProxyScrapeScraper("http"),
     ProxyScrapeScraper("socks4"),
     ProxyScrapeScraper("socks5"),
-    GeoNodeScraper("socks"),
     
-    # Download API scrapers
-    ProxyListDownloadScraper("https", "elite"),
-    ProxyListDownloadScraper("http", "elite"),
-    ProxyListDownloadScraper("http", "transparent"),
-    ProxyListDownloadScraper("http", "anonymous"),
-    
-    # HTML table scrapers
-    GeneralTableScraper("https", "http://sslproxies.org"),
-    GeneralTableScraper("http", "http://free-proxy-list.net"),
-    GeneralTableScraper("http", "http://us-proxy.org"),
-    GeneralTableScraper("socks", "http://socks-proxy.net"),
-    
-    # HTML div scrapers
-    GeneralDivScraper("http", "https://freeproxy.lunaproxy.com/"),
-    
-    # GitHub raw list scrapers (established sources)
-    GitHubScraper("http", "https://raw.githubusercontent.com/proxifly/free-proxy-list/main/proxies/all/data.txt"),
-    GitHubScraper("socks4", "https://raw.githubusercontent.com/proxifly/free-proxy-list/main/proxies/all/data.txt"),
-    GitHubScraper("socks5", "https://raw.githubusercontent.com/proxifly/free-proxy-list/main/proxies/all/data.txt"),
-    GitHubScraper("http", "https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/all.txt"),
-    GitHubScraper("socks", "https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/all.txt"),
-    GitHubScraper("https", "https://raw.githubusercontent.com/zloi-user/hideip.me/main/https.txt"),
-    GitHubScraper("http", "https://raw.githubusercontent.com/zloi-user/hideip.me/main/http.txt"),
-    GitHubScraper("socks4", "https://raw.githubusercontent.com/zloi-user/hideip.me/main/socks4.txt"),
-    GitHubScraper("socks5", "https://raw.githubusercontent.com/zloi-user/hideip.me/main/socks5.txt"),
-    
-    # Additional GitHub sources
+    # TheSpeedX/PROXY-List (updated daily)
     GitHubScraper("http", "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt"),
     GitHubScraper("socks4", "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt"),
     GitHubScraper("socks5", "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt"),
-    GitHubScraper("http", "https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt"),
-    GitHubScraper("https", "https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/https.txt"),
-    GitHubScraper("socks4", "https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt"),
-    GitHubScraper("socks5", "https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt"),
+
+    # jetkai/proxy-list (hourly updates, geolocation)
     GitHubScraper("http", "https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-http.txt"),
     GitHubScraper("https", "https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-https.txt"),
     GitHubScraper("socks4", "https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks4.txt"),
     GitHubScraper("socks5", "https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks5.txt"),
-    GitHubScraper("http", "https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt"),
-    GitHubScraper("http", "https://raw.githubusercontent.com/sunny9577/proxy-scraper/master/proxies.txt"),
+
+    # prxchk/proxy-list (10 min updates, deduplicated)
+    GitHubScraper("http", "https://raw.githubusercontent.com/prxchk/proxy-list/main/http.txt"),
+    GitHubScraper("socks4", "https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt"),
+    GitHubScraper("socks5", "https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt"),
+
+    # roosterkid/openproxylist (hourly updates)
     GitHubScraper("http", "https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS_RAW.txt"),
     GitHubScraper("socks4", "https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt"),
     GitHubScraper("socks5", "https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt"),
+
+    # mmpx12/proxy-list (hourly updates)
     GitHubScraper("http", "https://raw.githubusercontent.com/mmpx12/proxy-list/master/http.txt"),
     GitHubScraper("https", "https://raw.githubusercontent.com/mmpx12/proxy-list/master/https.txt"),
     GitHubScraper("socks4", "https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt"),
     GitHubScraper("socks5", "https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt"),
-    
-    # Plain text sources
+
+
+
+    # ProxyScrape API v4 (live, no key needed)
+    PlainTextScraper("http", "https://api.proxyscrape.com/v4/free-proxy-list/get?request=display_proxies&protocol=http&proxy_format=protocolipport&format=text&timeout=20000"),
+    PlainTextScraper("socks4", "https://api.proxyscrape.com/v4/free-proxy-list/get?request=display_proxies&protocol=socks4&proxy_format=protocolipport&format=text&timeout=20000"),
+    PlainTextScraper("socks5", "https://api.proxyscrape.com/v4/free-proxy-list/get?request=display_proxies&protocol=socks5&proxy_format=protocolipport&format=text&timeout=20000"),
+
+    # OpenProxyList API (10 min updates)
+    PlainTextScraper("http", "https://api.openproxylist.xyz/http.txt"),
+    PlainTextScraper("https", "https://api.openproxylist.xyz/https.txt"),
+    PlainTextScraper("socks4", "https://api.openproxylist.xyz/socks4.txt"),
+    PlainTextScraper("socks5", "https://api.openproxylist.xyz/socks5.txt"),
     PlainTextScraper("http", "https://www.proxyscan.io/download?type=http"),
     PlainTextScraper("socks4", "https://www.proxyscan.io/download?type=socks4"),
-    PlainTextScraper("socks5", "https://www.proxyscan.io/download?type=socks5"),
-    PlainTextScraper("http", "https://raw.githubusercontent.com/almroot/proxylist/master/list.txt"),
-    PlainTextScraper("http", "https://raw.githubusercontent.com/aslisk/proxyhttps/main/https.txt"),
-    PlainTextScraper("http", "https://raw.githubusercontent.com/proxy4parsing/proxy-list/main/http.txt"),
+    PlainTextScraper("socks5", "https://raw.githubusercontent.com/Surfboardv2ray/Proxy-sorter/main/socks5.txt"),
+    
+    # JSON APIs
+    ProxyListApiScraper("http", "https://www.proxy-list.download/api/v2/get?l=en&t=http"),
+    ProxyListApiScraper("https", "https://www.proxy-list.download/api/v2/get?l=en&t=https"),
+    ProxyListApiScraper("socks4", "https://www.proxy-list.download/api/v2/get?l=en&t=socks4"),
+    ProxyListApiScraper("socks5", "https://www.proxy-list.download/api/v2/get?l=en&t=socks5"),
+    
+    # Fresh community sources (updated daily)
+    GitHubScraper("http", "https://raw.githubusercontent.com/prxchk/proxy-list/main/http.txt"),
+    GitHubScraper("socks4", "https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt"),
+    GitHubScraper("socks5", "https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt"),
+    
+    # Ultra-fresh sources (updated every few hours)
+    PlainTextScraper("http", "https://api.openproxylist.xyz/http.txt"),
+    PlainTextScraper("socks4", "https://api.openproxylist.xyz/socks4.txt"),
+    PlainTextScraper("socks5", "https://api.openproxylist.xyz/socks5.txt"),
     
-    # Additional table scrapers
-    GeneralTableScraper("http", "https://proxyspace.pro/http.txt"),
-    GeneralTableScraper("socks4", "https://proxyspace.pro/socks4.txt"),
-    GeneralTableScraper("socks5", "https://proxyspace.pro/socks5.txt"),
+    # Elite proxy APIs
+
+    
+    # New 2025 sources
+    GitHubScraper("http", "https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/http.txt"),
+    GitHubScraper("https", "https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/https.txt"),
+    GitHubScraper("socks4", "https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks4.txt"),
+    GitHubScraper("socks5", "https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks5.txt"),
+    
+    # Quality HTML scrapers (still active)
+    GeneralTableScraper("https", "http://sslproxies.org"),
+    GeneralTableScraper("http", "http://free-proxy-list.net"),
+    GeneralTableScraper("http", "http://us-proxy.org"),
+    GeneralTableScraper("socks", "http://socks-proxy.net"),
     
-    # API-based scrapers
-    ProxyListApiScraper("http", "https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc&protocols=http"),
-    ProxyListApiScraper("socks5", "https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc&protocols=socks5"),
+
+    GeneralTableScraper("http", "https://premproxy.com/proxy-by-country/"),
+    GeneralTableScraper("https", "https://premproxy.com/socks-list/"),
+    GeneralTableScraper("http", "https://proxyservers.pro/proxy/list/protocol/http"),
+    GeneralTableScraper("https", "https://proxyservers.pro/proxy/list/protocol/https"),
+    
+    # Updated HTML div scrapers
+    GeneralDivScraper("http", "https://freeproxy.lunaproxy.com/"),
+    GeneralDivScraper("http", "https://www.freeproxylists.net/"),
+    GeneralDivScraper("socks4", "https://www.freeproxylists.net/socks4.html"),
+    GeneralDivScraper("socks5", "https://www.freeproxylists.net/socks5.html"),
+    
+    # Modern proxy sites with table format
+    GeneralTableScraper("http", "https://hidemy.name/en/proxy-list/?type=h"),
+    GeneralTableScraper("https", "https://hidemy.name/en/proxy-list/?type=s"),
+    GeneralTableScraper("socks4", "https://hidemy.name/en/proxy-list/?type=4"),
+    GeneralTableScraper("socks5", "https://hidemy.name/en/proxy-list/?type=5"),
+
+    # Additional HTML sources
+    GeneralTableScraper("http", "https://www.proxynova.com/proxy-server-list/"),
+    GeneralTableScraper("http", "https://www.proxydocker.com/en/proxylist/"),
+    GeneralTableScraper("https", "https://www.proxydocker.com/en/proxylist/type/https"),
 ]
 
 
@@ -504,16 +606,8 @@ def _print_source_statistics(verbose: bool, source_stats: Dict) -> None:
     """Print source statistics if verbose mode is enabled."""
     if not verbose:
         return
-        
-    print("\n*** Source Statistics ***")
-    print("-" * 50)
-    total_bad_filtered = 0
-    total_invalid_filtered = 0
-    for source, stats in source_stats.items():
-        print(f"{source}: {stats['valid']} valid, {stats['filtered_bad']} bad IPs, {stats['filtered_invalid']} invalid")
-        total_bad_filtered += stats['filtered_bad']
-        total_invalid_filtered += stats['filtered_invalid']
-    print(f"\nTotal filtered: {total_bad_filtered} bad IPs (CDN/etc), {total_invalid_filtered} invalid format")
+    domain_valid, skipped, total_bad_filtered, total_invalid_filtered = _aggregate_domain_stats(source_stats)
+    _print_summary(domain_valid, skipped, total_bad_filtered, total_invalid_filtered)
 
 async def scrape(method: str, output: str, verbose: bool) -> None:
     """
@@ -538,14 +632,16 @@ async def scrape(method: str, output: str, verbose: bool) -> None:
     async def scrape_source(scraper, client) -> None:
         """Scrape from a single source."""
         try:
+            source_id = f"{scraper.source_name}: {scraper.get_url()}"
             verbose_print(verbose, f"Scraping from {scraper.get_url()}...")
             proxies, stats = await scraper.scrape(client)
             all_proxies.extend(proxies)
-            source_stats[scraper.source_name] = stats
-            verbose_print(verbose, f"Found {len(proxies)} valid proxies from {scraper.source_name} ({stats['filtered_bad']} bad IPs filtered, {stats['filtered_invalid']} invalid filtered)")
+            source_stats[source_id] = stats
+            verbose_print(verbose, f"Found {len(proxies)} valid proxies from {source_id} ({stats['filtered_bad']} bad IPs filtered, {stats['filtered_invalid']} invalid filtered)")
         except Exception as e:
-            logger.debug(f"Failed to scrape from {scraper.source_name}: {e}")
-            source_stats[scraper.source_name] = {"total": 0, "filtered_bad": 0, "filtered_invalid": 0, "valid": 0}
+            source_id = f"{scraper.source_name}: {scraper.get_url()}"
+            logger.debug(f"Failed to scrape from {source_id}: {e}")
+            source_stats[source_id] = {"total": 0, "filtered_bad": 0, "filtered_invalid": 0, "valid": 0}
 
     # Execute all scrapers concurrently
     async with httpx.AsyncClient(**client_config) as client:
@@ -578,7 +674,10 @@ def _setup_argument_parser():
 Examples:
   %(prog)s -p http -v                    # Scrape HTTP proxies with verbose output
   %(prog)s -p socks -o socks.txt         # Scrape SOCKS proxies to custom file
-  %(prog)s -p https --verbose            # Scrape HTTPS proxies with verbose output
+  %(prog)s -p https --verbose           # Scrape HTTPS proxies with verbose output
+  %(prog)s -p socks4 --debug             # Scrape SOCKS4 proxies with debug logging
+  %(prog)s -p socks5 -o output.txt -v     # Scrape SOCKS5 proxies to output.txt with verbose logging
+  %(prog)s -p http -o proxies.txt --debug  # Scrape HTTP proxies to proxies.txt with debug logging
         """,
     )
     

From ebcfef7e4cbad270624bc3cef4e9bd3e64961eea Mon Sep 17 00:00:00 2001
From: Dikky Hardian <30888372+FosterG4@users.noreply.github.com>
Date: Sat, 26 Jul 2025 23:26:54 +0700
Subject: [PATCH 5/5] Update .flake8

---
 .flake8 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.flake8 b/.flake8
index fb8bfa7..2af47eb 100644
--- a/.flake8
+++ b/.flake8
@@ -3,6 +3,6 @@ exclude = .git,__pycache__,env,venv,.eggs,.tox,.nox,build,dist
 max-line-length = 120
 max-complexity = 8
 ignore = W,BLK,
-    E24,E121,E123,E126,E221,E226,E266,E704,
+    E24,E121,E123,E125,E126,E221,E226,E266,E704,
     E265,E722,E501,E731,E306,E401,E302,E222,E303,
-    E402,E305,E261,E262,E203,N816
\ No newline at end of file
+    E402,E305,E261,E262,E203,N816