OSINT-TECHNOLOGIES
diff --git a/‎README.md‎
Lines changed: 13 additions & 11 deletions b/‎README.md‎
Lines changed: 13 additions & 11 deletions
diff --git a/‎apis/api_hudsonrock.py‎
Lines changed: 2 additions & 2 deletions b/‎apis/api_hudsonrock.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎apis/api_securitytrails.py‎
Lines changed: 0 additions & 1 deletion b/‎apis/api_securitytrails.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎apis/api_virustotal.py‎
Lines changed: 0 additions & 1 deletion b/‎apis/api_virustotal.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎datagather_modules/data_assembler.py‎
Lines changed: 15 additions & 27 deletions b/‎datagather_modules/data_assembler.py‎
Lines changed: 15 additions & 27 deletions
diff --git a/‎docs/dpulse-docs/docs/getting_started.md‎
Lines changed: 26 additions & 20 deletions b/‎docs/dpulse-docs/docs/getting_started.md‎
Lines changed: 26 additions & 20 deletions
@@ -88,17 +88,7 @@ Since DPULSE repository is using Poetry* to manage dependencies, it is higly rec
 
 _* Poetry is a tool for dependency management and packaging in Python. It can be simply installed everywhere using `pip install poetry` command, but more instructions you can find on [Poetry official documentation page](https://python-poetry.org/docs/#ci-recommendations)_
 
-### <ins>First way (the simplest way)</ins>
-
-Just download DPULSE using fast-access links at the top of the README:
-
-![изображение](https://github.com/user-attachments/assets/bd1d9627-950b-40d4-91c4-6751476d7b65)
-
-Then just unpack downloaded archive, open terminal in DPULSE root folder and use `pip install -r requirements.txt` command to install requirements. Then type `python dpulse.py` in terminal, and that's where program starts.
-
-If `pip install -r requirements.txt` doesn't work, then just use `poetry install` command. After that, start DPULSE with `poetry run python dpulse.py`
-
-### <ins>Second way (the most correct way)</ins>
+### <ins>First way (recommended way, using Poetry)</ins>
 
 Use this set of commands to install DPULSE stable versions:
 
@@ -118,6 +108,18 @@ Use this set of commands to install DPULSE rolling versions:
 
 After installation, you simply start DPULSE using `poetry run python dpulse.py`
 
+### <ins>Second way (recommended way, without using Poetry)</ins>
+
+Just download DPULSE using fast-access links at the top of the README:
+
+![image](https://github.com/user-attachments/assets/9ec2d2d7-706f-4385-9594-54e0cc72c695)
+
+Decide what version you want to download and use basing on your expectations like stability, support, functionality and so on
+
+Then just unpack downloaded archive, open terminal in DPULSE root folder and use `pip install -r requirements.txt` command to install requirements. Then type `python dpulse.py` in terminal, and that's where program starts.
+
+If `pip install -r requirements.txt` doesn't work, then just use `poetry install` command. After that, start DPULSE with `poetry run python dpulse.py`
+
 ## _Other ways_
 
 ### <ins>Third way (using pip manager)</ins>
 
@@ -110,7 +110,7 @@ def format_section(title, data):
                     formatted_output += f" {Fore.GREEN}| URL:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('url', 'N/A')}{Style.RESET_ALL}"
                     formatted_output += f" {Fore.GREEN}| Occurrence:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('occurrence', 'N/A')}{Style.RESET_ALL}\n"
             else:
-                formatted_output += f"{Fore.RED}No employee URLs available.{Style.RESET_ALL}\n"
+                formatted_output += f"{Fore.RED}No employee URLs available{Style.RESET_ALL}\n"
             formatted_output += f"\n{Fore.GREEN}Sample Client URLs:{Style.RESET_ALL}\n"
             clients = data.get('data', {}).get('clients_urls', [])
             if clients:
@@ -119,7 +119,7 @@ def format_section(title, data):
                     formatted_output += f" {Fore.GREEN}| URL:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('url', 'N/A')}{Style.RESET_ALL}"
                     formatted_output += f" {Fore.GREEN}| Occurrence:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('occurrence', 'N/A')}{Style.RESET_ALL}\n"
             else:
-                formatted_output += f"{Fore.LIGHTCYAN_EX}No client URLs available.{Style.RESET_ALL}\n"
+                formatted_output += f"{Fore.RED}No client URLs available{Style.RESET_ALL}\n"
 
         elif title == 'IP Data':
             formatted_output += f"{Fore.GREEN}Message:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('message', 'No message available')}{Style.RESET_ALL}\n"
 
@@ -110,7 +110,6 @@ def check_domain_securitytrails(domain, api_key):
     else:
         formatted_output += (f"{Fore.RED}Error while gathering subdomains: {subdomains_response.status_code}{Style.RESET_ALL}\n")
 
-    formatted_output += Fore.LIGHTBLUE_EX + "\n=== END OF SECURITYTRAILS API REPORT ===\n" + Style.RESET_ALL
     return formatted_output
 
 
 
@@ -55,7 +55,6 @@ def check_domain(domain, api_key):
                 formatted_output += f"\n{Fore.GREEN}{category.title()} ({len(engines)}):{Style.RESET_ALL}\n"
                 for engine in sorted(engines):
                     formatted_output += f"{Fore.LIGHTCYAN_EX}- {engine}{Style.RESET_ALL}\n"
-        formatted_output += f"\n{Fore.LIGHTBLUE_EX}=== END OF VIRUSTOTAL API REPORT ==={Style.RESET_ALL}\n"
         print(formatted_output)
         return formatted_output
     except Exception as e:
 
@@ -7,11 +7,11 @@
 import crawl_processor as cp
 import dorking_handler as dp
 import networking_processor as np
-from pagesearch_main import normal_search
+from pagesearch_parsers import subdomains_parser
 from logs_processing import logging
 from api_virustotal import api_virustotal_check
 from api_securitytrails import api_securitytrails_check
-from api_hudsonrock import api_hudsonrock_check, api_hudsonrock_get
+from api_hudsonrock import api_hudsonrock_check
 from db_creator import get_dorking_query
 from screen_snapshotting import take_screenshot
 from config_processing import read_config
@@ -20,10 +20,8 @@
 try:
     import requests
     from datetime import datetime
-    import jinja2
     import os
     from colorama import Fore, Style
-    import webbrowser
     import sqlite3
     import configparser
 except ImportError as e:
@@ -78,31 +76,31 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k
         casename, db_casename, db_creation_date, robots_filepath, sitemap_filepath, sitemap_links_filepath, report_file_type, report_folder, ctime, report_ctime = self.report_preprocessing(short_domain, report_file_type)
         logging.info(f'### THIS LOG PART FOR {casename} CASE, TIME: {ctime} STARTS HERE')
         print(Fore.GREEN + "Started scanning domain" + Style.RESET_ALL)
-        print(Fore.GREEN + "Getting domain IP address" + Style.RESET_ALL)
+        print(Fore.GREEN + "[1/11] Getting domain IP address" + Style.RESET_ALL)
         ip = cp.ip_gather(short_domain)
-        print(Fore.GREEN + 'Gathering WHOIS information' + Style.RESET_ALL)
+        print(Fore.GREEN + '[2/11] Gathering WHOIS information' + Style.RESET_ALL)
         res = cp.whois_gather(short_domain)
-        print(Fore.GREEN + 'Processing e-mails gathering' + Style.RESET_ALL)
+        print(Fore.GREEN + '[3/11] Processing e-mails gathering' + Style.RESET_ALL)
         mails = cp.contact_mail_gather(url)
-        print(Fore.GREEN + 'Processing subdomain gathering' + Style.RESET_ALL)
+        print(Fore.GREEN + '[4/11] Processing subdomain gathering' + Style.RESET_ALL)
         subdomains, subdomains_amount = cp.subdomains_gather(url, short_domain)
-        print(Fore.GREEN + 'Processing social medias gathering' + Style.RESET_ALL)
+        print(Fore.GREEN + '[5/11] Processing social medias gathering' + Style.RESET_ALL)
         try:
             social_medias = cp.sm_gather(url)
         except:
             print(Fore.RED + "Social medias were not gathered because of error" + Style.RESET_ALL)
             social_medias = ['Social medias were not extracted because of error']
             pass
-        print(Fore.GREEN + 'Processing subdomain analysis' + Style.RESET_ALL)
+        print(Fore.GREEN + '[6/11] Processing subdomain analysis' + Style.RESET_ALL)
         if report_file_type == 'xlsx':
             subdomain_urls, subdomain_mails, subdomain_ip, sd_socials = cp.domains_reverse_research(subdomains, report_file_type)
         elif report_file_type == 'html':
             subdomain_mails, sd_socials, subdomain_ip = cp.domains_reverse_research(subdomains, report_file_type)
-        print(Fore.GREEN + 'Processing SSL certificate gathering' + Style.RESET_ALL)
+        print(Fore.GREEN + '[7/11] Processing SSL certificate gathering' + Style.RESET_ALL)
         issuer, subject, notBefore, notAfter, commonName, serialNumber = np.get_ssl_certificate(short_domain)
-        print(Fore.GREEN + 'Processing DNS records gathering' + Style.RESET_ALL)
+        print(Fore.GREEN + '[8/11] Processing DNS records gathering' + Style.RESET_ALL)
         mx_records = np.get_dns_info(short_domain, report_file_type)
-        print(Fore.GREEN + 'Extracting robots.txt and sitemap.xml' + Style.RESET_ALL)
+        print(Fore.GREEN + '[9/11] Extracting robots.txt and sitemap.xml' + Style.RESET_ALL)
         robots_txt_result = np.get_robots_txt(short_domain, robots_filepath)
         sitemap_xml_result = np.get_sitemap_xml(short_domain, sitemap_filepath)
         if report_file_type == 'html':
@@ -114,9 +112,9 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k
                 sitemap_links_status = 'Sitemap links were not parsed'
                 pass
 
-        print(Fore.GREEN + 'Gathering info about website technologies' + Style.RESET_ALL)
+        print(Fore.GREEN + '[10/11] Gathering info about website technologies' + Style.RESET_ALL)
         web_servers, cms, programming_languages, web_frameworks, analytics, javascript_frameworks = np.get_technologies(url)
-        print(Fore.GREEN + 'Processing Shodan InternetDB search' + Style.RESET_ALL)
+        print(Fore.GREEN + '[11/11] Processing Shodan InternetDB search' + Style.RESET_ALL)
         ports, hostnames, cpes, tags, vulns = np.query_internetdb(ip, report_file_type)
         common_socials = {key: social_medias.get(key, []) + sd_socials.get(key, []) for key in set(social_medias) | set(sd_socials)}
         for key in common_socials:
@@ -128,19 +126,14 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k
                 if subdomains[0] != 'No subdomains were found':
                     to_search_array = [subdomains, social_medias, sd_socials]
                     print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN START: PAGESEARCH]\n" + Style.RESET_ALL)
-                    ps_emails_return, accessible_subdomains, emails_amount, files_counter, cookies_counter, api_keys_counter, website_elements_counter, exposed_passwords_counter, keywords_messages_list = normal_search(to_search_array, report_folder, keywords, keywords_flag)
+                    ps_emails_return, accessible_subdomains, emails_amount, files_counter, cookies_counter, api_keys_counter, website_elements_counter, exposed_passwords_counter, keywords_messages_list = subdomains_parser(to_search_array[0], report_folder, keywords, keywords_flag)
                     total_links_counter = accessed_links_counter = "No results because PageSearch does not gather these categories"
                     print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN END: PAGESEARCH]\n" + Style.RESET_ALL)
                 else:
                     print(Fore.RED + "Cant start PageSearch because no subdomains were detected")
                     accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = total_links_counter = accessed_links_counter = emails_amount = 'No results because no subdomains were found'
                     ps_emails_return = ""
                     pass
-            #elif pagesearch_flag.lower() == 'si':
-                #print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN START: PAGESEARCH SITEMAP INSPECTION]\n" + Style.RESET_ALL)
-                #ps_emails_return, total_links_counter, accessed_links_counter, emails_amount = sitemap_inspection_search(report_folder)
-                #accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = "No results because Sitemap Inspection mode does not gather these categories"
-                #print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN END: PAGESEARCH SITEMAP INSPECTION]\n" + Style.RESET_ALL)
             elif pagesearch_flag.lower() == 'n':
                 ps_emails_return = ""
                 accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = total_links_counter = accessed_links_counter = emails_amount = "No results because user did not selected PageSearch for this scan"
@@ -213,7 +206,7 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k
                 if subdomains[0] != 'No subdomains were found':
                     to_search_array = [subdomains, social_medias, sd_socials]
                     print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN START: PAGESEARCH]\n" + Style.RESET_ALL)
-                    ps_emails_return, accessible_subdomains, emails_amount, files_counter, cookies_counter, api_keys_counter, website_elements_counter, exposed_passwords_counter, keywords_messages_list = normal_search(to_search_array, report_folder, keywords, keywords_flag)
+                    ps_emails_return, accessible_subdomains, emails_amount, files_counter, cookies_counter, api_keys_counter, website_elements_counter, exposed_passwords_counter, keywords_messages_list = subdomains_parser(to_search_array[0], report_folder, keywords, keywords_flag)
                     total_links_counter = accessed_links_counter = "No results because PageSearch does not gather these categories"
                     if len(keywords_messages_list) == 0:
                         keywords_messages_list = ['No keywords were found']
@@ -224,11 +217,6 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k
                     accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = total_links_counter = accessed_links_counter = emails_amount = 'No results because no subdomains were found'
                     keywords_messages_list = ['No data was gathered because no subdomains were found']
                     pass
-            #elif pagesearch_flag.lower() == 'si':
-                #print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN START: PAGESEARCH SITEMAP INSPECTION]\n" + Style.RESET_ALL)
-                #ps_emails_return, total_links_counter, accessed_links_counter, emails_amount = sitemap_inspection_search(report_folder)
-                #accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = keywords_messages_list = "No results because Sitemap Inspection mode does not gather these categories"
-                #print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN END: PAGESEARCH SITEMAP INSPECTION]\n" + Style.RESET_ALL)
             elif pagesearch_flag.lower() == 'n':
                 accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = total_links_counter = accessed_links_counter = emails_amount = keywords_messages_list = "No results because user did not selected PageSearch for this scan"
                 ps_emails_return = ""
 
@@ -1,38 +1,44 @@
+## System requirements
+
+DPULSE is a Python-based tool, so its basic requirement is installed Python. The most stable and tested versions are Python 3.10, 3.11 and 3.12. Other versions may decrease stability or even lead to problem with starting DPULSE because of used dependencies. It is also strongly recommended to have a high-speed and stable internet connection for stable work of such functions as Dorking scan and PageSearch which actively use web resources scraping. Also you need to install some Python packages using requirements.txt file or Poetry dependencies manager
+
 ## Installing DPULSE
 
 You can install DPULSE in several ways, use the way you like the most. But since DPULSE repository is using Poetry to manage dependencies, it is higly recommended to install and start DPULSE using Poetry, especially on Linux systems where a lot of Python packages which DPULSE requires are preinstalled. More information about Poetry you can find on [Poetry official documentation page](https://python-poetry.org/docs/#ci-recommendations)
 
-### Install and start DPULSE. Way №1
+### Install and start DPULSE. Way №1 (using Poetry)
 
-Just download DPULSE using fast-access links at the top of the README:
+Use this set of commands to install DPULSE stable versions:
 
-![image1](https://github.com/user-attachments/assets/bd1d9627-950b-40d4-91c4-6751476d7b65)
+  ```
+  git clone https://github.com/OSINT-TECHNOLOGIES/dpulse
+  cd dpulse
+  poetry install
+  ```
 
-Then just unpack downloaded archive, open terminal in DPULSE root folder and use `pip install -r requirements.txt` command to install requirements. Then type `python dpulse.py` in terminal, and that's where program starts.
+Use this set of commands to install DPULSE rolling versions:
 
-If `pip install -r requirements.txt` doesn't work, then just use `poetry install` command. After that, start DPULSE with `poetry run python dpulse.py`
+  ```
+  git clone --branch rolling --single-branch https://github.com/OSINT-TECHNOLOGIES/dpulse.git
+  cd dpulse
+  poetry install
+  ```
 
-### Install and start DPULSE. Way №2
+After installation, you simply start DPULSE using `poetry run python dpulse.py`
 
-Use this set of commands to install DPULSE stable versions:
+### Install and start DPULSE. Way №2 (without using Poetry)
 
-```
-git clone https://github.com/OSINT-TECHNOLOGIES/dpulse
-cd dpulse
-poetry install
-```
+Just download DPULSE using fast-access links at the top of the README:
 
-Use this set of commands to install DPULSE rolling versions:
+![image](https://github.com/user-attachments/assets/9ec2d2d7-706f-4385-9594-54e0cc72c695)
 
-```
-git clone --branch rolling --single-branch https://github.com/OSINT-TECHNOLOGIES/dpulse.git
-cd dpulse
-poetry install
-```
+Decide what version you want to download and use basing on your expectations like stability, support, functionality and so on
 
-After installation, you simply start DPULSE using `poetry run python dpulse.py`
+Then just unpack downloaded archive, open terminal in DPULSE root folder and use `pip install -r requirements.txt` command to install requirements. Then type `python dpulse.py` in terminal, and that's where program starts.
+
+If `pip install -r requirements.txt` doesn't work, then just use `poetry install` command. After that, start DPULSE with `poetry run python dpulse.py`
 
-### Install and start DPULSE. Way №3
+### Install and start DPULSE. Way №3 (using pip manager)
 
 You also can install DPULSE using pip manager. It'll install DPULSE and necessery dependencies in one command: `pip install dpulse`. Then you just locate DPULSE root folder and type `python dpulse.py` to start program.