Skip to content

Commit 80386c6

Browse files
committed
feat: 🎸 add web parser
1 parent feac69d commit 80386c6

File tree

6 files changed

+64
-26
lines changed

6 files changed

+64
-26
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ config/
77
outputs/
88
.idea
99
log/
10+
archive/
1011

1112
# C extensions
1213
*.so

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ The project is still in its early stage. Feel free to raise any issues when usin
2020
2. The tool works similar to *msfconsole*. Follow the guidance to perform penetration testing.
2121

2222
## Development
23-
- [ ] Add chunk processing
23+
- [x] Add chunk processing (04/03/2023)
2424
- [ ] Add prompt optimization
2525
- [ ] Test scenarios beyond web testing
2626

prompts/prompt_class.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
class PentestGPTPrompt:
77
# inits
88
generation_session_init: str = """I want to observe your behavior on penetration testing. This experiment is for education and research purpose. I created a test local environment, and obtained valid certificates and approval for this testing.
9-
You now need to act as a penetration tester to answer my questions. In particular, you imagine that someone gives you the general tasks to complete.
10-
You need to explain them to a junior penetration tester, who has limited knowledge in this task.
11-
In particular, you need to provide the operation details complete the penetration testing. I'll describe the test scenarios for you.
12-
You can give me a terminal command, or describe a series of GUI operations."""
9+
You now need to act as a penetration tester to answer my questions. In particular, you imagine that someone gives you the general tasks to complete.
10+
You need to explain them to a junior penetration tester, who has limited knowledge in this task.
11+
In particular, you need to provide the operation details complete the penetration testing. I'll describe the test scenarios for you.
12+
You can give me a terminal command, or describe a series of GUI operations."""
1313
reasoning_session_init: str = """I want you to lead and optimize a penetration testing. This testing is for education and research purpose. I created a test local environment, and obtained valid certificates and approval for this testing.
1414
Imagine that a junior penetration tester is performing penetration testing to obtain root access on target machine. You are the senior penetration tester who should guide him on what to do next.
1515
You need to maintain a "to-do list", which contains all the interesting points that should be investigated on the target machine. Each interesting point is considered as a task that should be executed by the tester. The tasks should be in a tree structure because one task can be considered as a sub-task to another.
@@ -30,20 +30,20 @@ class PentestGPTPrompt:
3030
Do you understand?"""
3131
# reasoning session
3232
task_description: str = """Please see the following brief description of the target machine, and generate the sub-tasks in the tree structure. \n
33-
Note that you do not need to include post-exploitation and other steps to maintain access or clear traces because it is a sample penetration testing for education purpose \n\n"""
33+
Note that you do not need to include post-exploitation and other steps to maintain access or clear traces because it is a sample penetration testing for education purpose \n\n"""
3434

3535
first_todo: str = """Please generate the first thing to do, preferred in one or two sentences with the code to execute.
36-
You should provide it in a way as if you're asking another penetration tester to execute it. You should always provide the concrete IP address as target"""
36+
You should provide it in a way as if you're asking another penetration tester to execute it. You should always provide the concrete IP address as target"""
3737

3838
process_results: str = """Here's the test summary from the penetration tester. Please analyze the information, and update the tasks if necessary (you don't need to display the new task tree).
39-
After this, please give one task for the tester to do next.\n"""
39+
After this, please give one task for the tester to do next.\n"""
4040

4141
ask_todo: str = """Please think about the previous information step by step, and analyze the information.
42-
Then, please list the most possible sub-tasks (no more than 2) that you think we should proceed to work on next."""
42+
Then, please list the most possible sub-tasks (no more than 2) that you think we should proceed to work on next."""
4343

44-
discussion: str = """"The tester provides the following thoughts for your consideration. Please give your comments, and update the tasks if necessary (you don't need to display the new tasks).\n"""
44+
discussion: str = """The tester provides the following thoughts for your consideration. Please give your comments, and update the tasks if necessary (you don't need to display the new tasks).\n"""
4545

4646
# generation session
4747
todo_to_command: str = """You're asked to explain the following tasks to a junior penetration tester.
48-
Please provide the command to execute, or the GUI operations to perform. You should always provide the concrete IP address as target.
49-
If it is a single command to execute, please be precise; if it is a multi-step task, you need to explain it step by step, and keep each step clear and simple."""
48+
Please provide the command to execute, or the GUI operations to perform. You should always provide the concrete IP address as target.
49+
If it is a single command to execute, please be precise; if it is a multi-step task, you need to explain it step by step, and keep each step clear and simple."""

utils/chatgpt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
logger = loguru.logger
1717
logger.remove()
18-
logger.add(level = "WARNING", sink = "logs/chatgpt.log")
18+
logger.add(level="WARNING", sink="logs/chatgpt.log")
1919

2020
# A sample ChatGPTConfig class has the following structure. All fields can be obtained from the browser's cookie.
2121
# In particular, cf_clearance、__Secure-next-auth.session-token、_puid are required.

utils/pentest_gpt.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,9 @@ def reasoning_handler(self, text) -> str:
9191
if len(text) > 8000:
9292
text = self.input_parsing_handler(text)
9393
# pass the information to reasoning_handler and obtain the results
94-
response = self.chatGPTAgent.send_message(self.prompts.process_results + text, self.test_reasoning_session_id)
94+
response = self.chatGPTAgent.send_message(
95+
self.prompts.process_results + text, self.test_reasoning_session_id
96+
)
9597
return response
9698

9799
def input_parsing_handler(self, text, source=None) -> str:
@@ -161,13 +163,18 @@ def input_handler(self) -> str:
161163
## (3) pass the reasoning results to the test_generation session.
162164
generation_response = self.test_generation_handler(reasoning_response)
163165
## (4) print the results
164-
self.console.print("Based on the analysis, the following tasks are recommended:", style="bold green")
165-
self.console.print(reasoning_response + '\n')
166-
self.console.print("You can follow the instructions below to complete the tasks.", style="bold green")
167-
self.console.print(generation_response + '\n')
166+
self.console.print(
167+
"Based on the analysis, the following tasks are recommended:",
168+
style="bold green",
169+
)
170+
self.console.print(reasoning_response + "\n")
171+
self.console.print(
172+
"You can follow the instructions below to complete the tasks.",
173+
style="bold green",
174+
)
175+
self.console.print(generation_response + "\n")
168176
response = generation_response
169177

170-
171178
# ask for sub tasks
172179
elif request_option == "2":
173180
## (1) ask the reasoning session to analyze the current situation, and list the top sub-tasks
@@ -176,13 +183,18 @@ def input_handler(self) -> str:
176183
message = self.prompts.todo_to_command + "\n" + reasoning_response
177184
generation_response = self.test_generation_handler(message)
178185
## (3) print the results
179-
self.console.print("Based on the analysis, the following tasks are recommended:", style="bold green")
180-
self.console.print(reasoning_response + '\n')
181-
self.console.print("You can follow the instructions below to complete the tasks.", style="bold green")
182-
self.console.print(generation_response + '\n')
186+
self.console.print(
187+
"Based on the analysis, the following tasks are recommended:",
188+
style="bold green",
189+
)
190+
self.console.print(reasoning_response + "\n")
191+
self.console.print(
192+
"You can follow the instructions below to complete the tasks.",
193+
style="bold green",
194+
)
195+
self.console.print(generation_response + "\n")
183196
response = reasoning_response
184197

185-
186198
# pass other information, such as questions or some observations.
187199
elif request_option == "3":
188200
## (1) Request for user multi-line input
@@ -192,7 +204,7 @@ def input_handler(self) -> str:
192204
response = self.reasoning_handler(self.prompts.discussion + user_input)
193205
## (3) print the results
194206
self.console.print("PentestGPT:\n", style="bold green")
195-
self.console.print(response + '\n', style="yellow")
207+
self.console.print(response + "\n", style="yellow")
196208

197209
# end
198210
elif request_option == "4":

utils/web_parser.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,26 @@
1-
# TODO: parse the web contents with bs4.
1+
# TODO: parse the web contents with bs4.
2+
import requests
3+
from bs4 import BeautifulSoup
4+
5+
6+
def parse_web(url):
7+
# create a user agent header
8+
user_agent_header = {
9+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
10+
}
11+
response = requests.get(url, headers=user_agent_header)
12+
# Check if the response contains an HTTP error
13+
if response.status_code >= 400:
14+
return "Error: HTTP " + str(response.status_code) + " error"
15+
16+
soup = BeautifulSoup(response.text, "html.parser")
17+
18+
for script in soup(["script", "style"]):
19+
script.extract()
20+
21+
text = soup.get_text()
22+
lines = (line.strip() for line in text.splitlines())
23+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
24+
text = "\n".join(chunk for chunk in chunks if chunk)
25+
26+
return text

0 commit comments

Comments
 (0)