From 009e19a06daff32c6c529255a8d94ba25fc74a05 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 30 Sep 2025 14:47:02 +0000 Subject: [PATCH 1/2] Fix Python syntax errors in documentation examples - Replace unquoted dictionary keys with double-quoted strings - Remove JavaScript-style semicolons from Python code blocks - Convert JavaScript boolean values to Python (true -> True) - Apply fixes to all v2 snippet files and advanced-scraping-guide.mdx across all language variants Fixes affect: - Core snippet: snippets/v2/scrape/actions/python.mdx - Language variants: snippets/{zh,es,fr,ja,pt-BR}/v2/scrape/actions/python.mdx - Advanced guides: advanced-scraping-guide.mdx and all language variants Linear ticket: ENG-3648 Co-Authored-By: Micah Stairs --- advanced-scraping-guide.mdx | 14 +++++++------- es/advanced-scraping-guide.mdx | 16 ++++++++-------- fr/advanced-scraping-guide.mdx | 16 ++++++++-------- ja/advanced-scraping-guide.mdx | 16 ++++++++-------- pt-BR/advanced-scraping-guide.mdx | 16 ++++++++-------- snippets/es/v2/scrape/actions/python.mdx | 20 ++++++++++---------- snippets/fr/v2/scrape/actions/python.mdx | 20 ++++++++++---------- snippets/ja/v2/scrape/actions/python.mdx | 20 ++++++++++---------- snippets/pt-BR/v2/scrape/actions/python.mdx | 20 ++++++++++---------- snippets/v2/scrape/actions/python.mdx | 20 ++++++++++---------- snippets/zh/v2/scrape/actions/python.mdx | 20 ++++++++++---------- zh/advanced-scraping-guide.mdx | 16 ++++++++-------- 12 files changed, 107 insertions(+), 107 deletions(-) diff --git a/advanced-scraping-guide.mdx b/advanced-scraping-guide.mdx index 1062fcdf..e5b21613 100644 --- a/advanced-scraping-guide.mdx +++ b/advanced-scraping-guide.mdx @@ -129,14 +129,14 @@ from firecrawl import Firecrawl firecrawl = Firecrawl(api_key='fc-YOUR-API-KEY') doc = firecrawl.scrape('https://example.com', { - actions: [ - { type: 'wait', milliseconds: 1000 }, - { type: 'click', selector: '#accept' }, - { type: 'scroll', direction: 'down' }, - { type: 'write', selector: '#q', text: 'firecrawl' }, - { type: 'press', key: 'Enter' } + "actions": [ + {"type": "wait", "milliseconds": 1000}, + {"type": "click", "selector": "#accept"}, + {"type": "scroll", "direction": "down"}, + {"type": "write", "selector": "#q", "text": "firecrawl"}, + {"type": "press", "key": "Enter"} ], - formats: ['markdown'] + "formats": ['markdown'] }) print(doc.markdown) diff --git a/es/advanced-scraping-guide.mdx b/es/advanced-scraping-guide.mdx index e3c21b22..acd15883 100644 --- a/es/advanced-scraping-guide.mdx +++ b/es/advanced-scraping-guide.mdx @@ -153,14 +153,14 @@ from firecrawl import Firecrawl firecrawl = Firecrawl(api_key='fc-YOUR-API-KEY') doc = firecrawl.scrape('https://example.com', { - actions: [ - { type: 'wait', milliseconds: 1000 }, - { type: 'click', selector: '#accept' }, - { type: 'scroll', direction: 'down' }, - { type: 'write', selector: '#q', text: 'firecrawl' }, - { type: 'press', key: 'Enter' } + "actions": [ + {"type": "wait", "milliseconds": 1000}, + {"type": "click", "selector": "#accept"}, + {"type": "scroll", "direction": "down"}, + {"type": "write", "selector": "#q", "text": "firecrawl"}, + {"type": "press", "key": "Enter"} ], - formats: ['markdown'] + "formats": ['markdown'] }) print(doc.markdown) @@ -528,4 +528,4 @@ curl -X POST https://api.firecrawl.dev/v2/map \ Consulta la referencia de la API: [Documentación del endpoint /map](https://docs.firecrawl.dev/api-reference/endpoint/map) -¡Gracias por leer! \ No newline at end of file +¡Gracias por leer! diff --git a/fr/advanced-scraping-guide.mdx b/fr/advanced-scraping-guide.mdx index 54a7c517..1bc7e347 100644 --- a/fr/advanced-scraping-guide.mdx +++ b/fr/advanced-scraping-guide.mdx @@ -153,14 +153,14 @@ from firecrawl import Firecrawl firecrawl = Firecrawl(api_key='fc-YOUR-API-KEY') doc = firecrawl.scrape('https://example.com', { - actions: [ - { type: 'wait', milliseconds: 1000 }, - { type: 'click', selector: '#accept' }, - { type: 'scroll', direction: 'down' }, - { type: 'write', selector: '#q', text: 'firecrawl' }, - { type: 'press', key: 'Enter' } + "actions": [ + {"type": "wait", "milliseconds": 1000}, + {"type": "click", "selector": "#accept"}, + {"type": "scroll", "direction": "down"}, + {"type": "write", "selector": "#q", "text": "firecrawl"}, + {"type": "press", "key": "Enter"} ], - formats: ['markdown'] + "formats": ['markdown'] }) print(doc.markdown) @@ -526,4 +526,4 @@ curl -X POST https://api.firecrawl.dev/v2/map \ Voici la référence de l’API correspondante : [Documentation du point de terminaison /map](https://docs.firecrawl.dev/api-reference/endpoint/map) -Merci de votre lecture ! \ No newline at end of file +Merci de votre lecture ! diff --git a/ja/advanced-scraping-guide.mdx b/ja/advanced-scraping-guide.mdx index 82f7b244..6317e51a 100644 --- a/ja/advanced-scraping-guide.mdx +++ b/ja/advanced-scraping-guide.mdx @@ -153,14 +153,14 @@ from firecrawl import Firecrawl firecrawl = Firecrawl(api_key='fc-YOUR-API-KEY') doc = firecrawl.scrape('https://example.com', { - actions: [ - { type: 'wait', milliseconds: 1000 }, - { type: 'click', selector: '#accept' }, - { type: 'scroll', direction: 'down' }, - { type: 'write', selector: '#q', text: 'firecrawl' }, - { type: 'press', key: 'Enter' } + "actions": [ + {"type": "wait", "milliseconds": 1000}, + {"type": "click", "selector": "#accept"}, + {"type": "scroll", "direction": "down"}, + {"type": "write", "selector": "#q", "text": "firecrawl"}, + {"type": "press", "key": "Enter"} ], - formats: ['markdown'] + "formats": ['markdown'] }) print(doc.markdown) @@ -526,4 +526,4 @@ curl -X POST https://api.firecrawl.dev/v2/map \ 該当するAPIリファレンスはこちら: [Map Endpoint Documentation](https://docs.firecrawl.dev/api-reference/endpoint/map) -お読みいただきありがとうございました。 \ No newline at end of file +お読みいただきありがとうございました。 diff --git a/pt-BR/advanced-scraping-guide.mdx b/pt-BR/advanced-scraping-guide.mdx index 1d35ea41..35a6aa67 100644 --- a/pt-BR/advanced-scraping-guide.mdx +++ b/pt-BR/advanced-scraping-guide.mdx @@ -153,14 +153,14 @@ from firecrawl import Firecrawl firecrawl = Firecrawl(api_key='fc-YOUR-API-KEY') doc = firecrawl.scrape('https://example.com', { - actions: [ - { type: 'wait', milliseconds: 1000 }, - { type: 'click', selector: '#accept' }, - { type: 'scroll', direction: 'down' }, - { type: 'write', selector: '#q', text: 'firecrawl' }, - { type: 'press', key: 'Enter' } + "actions": [ + {"type": "wait", "milliseconds": 1000}, + {"type": "click", "selector": "#accept"}, + {"type": "scroll", "direction": "down"}, + {"type": "write", "selector": "#q", "text": "firecrawl"}, + {"type": "press", "key": "Enter"} ], - formats: ['markdown'] + "formats": ['markdown'] }) print(doc.markdown) @@ -526,4 +526,4 @@ curl -X POST https://api.firecrawl.dev/v2/map \ Aqui está a referência da API correspondente: [Documentação do endpoint /map](https://docs.firecrawl.dev/api-reference/endpoint/map) -Obrigado pela leitura! \ No newline at end of file +Obrigado pela leitura! diff --git a/snippets/es/v2/scrape/actions/python.mdx b/snippets/es/v2/scrape/actions/python.mdx index 7db4ee0e..12615958 100644 --- a/snippets/es/v2/scrape/actions/python.mdx +++ b/snippets/es/v2/scrape/actions/python.mdx @@ -4,16 +4,16 @@ from firecrawl import Firecrawl firecrawl = Firecrawl(api_key="fc-YOUR-API-KEY") doc = firecrawl.scrape('https://example.com/login', { - formats=['markdown'], - actions=[ - { type: 'write', text: 'john@example.com' }, - { type: 'press', key: 'Tab' }, - { type: 'write', text: 'secret' }, - { type: 'click', selector: 'button[type="submit"]' }, - { type: 'wait', milliseconds: 1500 }, - { type: 'screenshot', fullPage: true }, + "formats": ['markdown'], + "actions": [ + {"type": "write", "text": "john@example.com"}, + {"type": "press", "key": "Tab"}, + {"type": "write", "text": "secret"}, + {"type": "click", "selector": "button[type=\"submit\"]"}, + {"type": "wait", "milliseconds": 1500}, + {"type": "screenshot", "fullPage": True}, ], -}); +}) -print(doc.markdown, doc.screenshot); +print(doc.markdown, doc.screenshot) ``` diff --git a/snippets/fr/v2/scrape/actions/python.mdx b/snippets/fr/v2/scrape/actions/python.mdx index 7db4ee0e..12615958 100644 --- a/snippets/fr/v2/scrape/actions/python.mdx +++ b/snippets/fr/v2/scrape/actions/python.mdx @@ -4,16 +4,16 @@ from firecrawl import Firecrawl firecrawl = Firecrawl(api_key="fc-YOUR-API-KEY") doc = firecrawl.scrape('https://example.com/login', { - formats=['markdown'], - actions=[ - { type: 'write', text: 'john@example.com' }, - { type: 'press', key: 'Tab' }, - { type: 'write', text: 'secret' }, - { type: 'click', selector: 'button[type="submit"]' }, - { type: 'wait', milliseconds: 1500 }, - { type: 'screenshot', fullPage: true }, + "formats": ['markdown'], + "actions": [ + {"type": "write", "text": "john@example.com"}, + {"type": "press", "key": "Tab"}, + {"type": "write", "text": "secret"}, + {"type": "click", "selector": "button[type=\"submit\"]"}, + {"type": "wait", "milliseconds": 1500}, + {"type": "screenshot", "fullPage": True}, ], -}); +}) -print(doc.markdown, doc.screenshot); +print(doc.markdown, doc.screenshot) ``` diff --git a/snippets/ja/v2/scrape/actions/python.mdx b/snippets/ja/v2/scrape/actions/python.mdx index 7db4ee0e..12615958 100644 --- a/snippets/ja/v2/scrape/actions/python.mdx +++ b/snippets/ja/v2/scrape/actions/python.mdx @@ -4,16 +4,16 @@ from firecrawl import Firecrawl firecrawl = Firecrawl(api_key="fc-YOUR-API-KEY") doc = firecrawl.scrape('https://example.com/login', { - formats=['markdown'], - actions=[ - { type: 'write', text: 'john@example.com' }, - { type: 'press', key: 'Tab' }, - { type: 'write', text: 'secret' }, - { type: 'click', selector: 'button[type="submit"]' }, - { type: 'wait', milliseconds: 1500 }, - { type: 'screenshot', fullPage: true }, + "formats": ['markdown'], + "actions": [ + {"type": "write", "text": "john@example.com"}, + {"type": "press", "key": "Tab"}, + {"type": "write", "text": "secret"}, + {"type": "click", "selector": "button[type=\"submit\"]"}, + {"type": "wait", "milliseconds": 1500}, + {"type": "screenshot", "fullPage": True}, ], -}); +}) -print(doc.markdown, doc.screenshot); +print(doc.markdown, doc.screenshot) ``` diff --git a/snippets/pt-BR/v2/scrape/actions/python.mdx b/snippets/pt-BR/v2/scrape/actions/python.mdx index 7db4ee0e..12615958 100644 --- a/snippets/pt-BR/v2/scrape/actions/python.mdx +++ b/snippets/pt-BR/v2/scrape/actions/python.mdx @@ -4,16 +4,16 @@ from firecrawl import Firecrawl firecrawl = Firecrawl(api_key="fc-YOUR-API-KEY") doc = firecrawl.scrape('https://example.com/login', { - formats=['markdown'], - actions=[ - { type: 'write', text: 'john@example.com' }, - { type: 'press', key: 'Tab' }, - { type: 'write', text: 'secret' }, - { type: 'click', selector: 'button[type="submit"]' }, - { type: 'wait', milliseconds: 1500 }, - { type: 'screenshot', fullPage: true }, + "formats": ['markdown'], + "actions": [ + {"type": "write", "text": "john@example.com"}, + {"type": "press", "key": "Tab"}, + {"type": "write", "text": "secret"}, + {"type": "click", "selector": "button[type=\"submit\"]"}, + {"type": "wait", "milliseconds": 1500}, + {"type": "screenshot", "fullPage": True}, ], -}); +}) -print(doc.markdown, doc.screenshot); +print(doc.markdown, doc.screenshot) ``` diff --git a/snippets/v2/scrape/actions/python.mdx b/snippets/v2/scrape/actions/python.mdx index bf4ec1e8..bff015f6 100644 --- a/snippets/v2/scrape/actions/python.mdx +++ b/snippets/v2/scrape/actions/python.mdx @@ -4,18 +4,18 @@ from firecrawl import Firecrawl firecrawl = Firecrawl(api_key="fc-YOUR-API-KEY") doc = firecrawl.scrape('https://example.com/login', { - formats=['markdown'], - actions=[ - { type: 'write', text: 'john@example.com' }, - { type: 'press', key: 'Tab' }, - { type: 'write', text: 'secret' }, - { type: 'click', selector: 'button[type="submit"]' }, - { type: 'wait', milliseconds: 1500 }, - { type: 'screenshot', fullPage: true }, + "formats": ['markdown'], + "actions": [ + {"type": "write", "text": "john@example.com"}, + {"type": "press", "key": "Tab"}, + {"type": "write", "text": "secret"}, + {"type": "click", "selector": "button[type=\"submit\"]"}, + {"type": "wait", "milliseconds": 1500}, + {"type": "screenshot", "fullPage": True}, ], -}); +}) -print(doc.markdown, doc.screenshot); +print(doc.markdown, doc.screenshot) ``` diff --git a/snippets/zh/v2/scrape/actions/python.mdx b/snippets/zh/v2/scrape/actions/python.mdx index 7db4ee0e..12615958 100644 --- a/snippets/zh/v2/scrape/actions/python.mdx +++ b/snippets/zh/v2/scrape/actions/python.mdx @@ -4,16 +4,16 @@ from firecrawl import Firecrawl firecrawl = Firecrawl(api_key="fc-YOUR-API-KEY") doc = firecrawl.scrape('https://example.com/login', { - formats=['markdown'], - actions=[ - { type: 'write', text: 'john@example.com' }, - { type: 'press', key: 'Tab' }, - { type: 'write', text: 'secret' }, - { type: 'click', selector: 'button[type="submit"]' }, - { type: 'wait', milliseconds: 1500 }, - { type: 'screenshot', fullPage: true }, + "formats": ['markdown'], + "actions": [ + {"type": "write", "text": "john@example.com"}, + {"type": "press", "key": "Tab"}, + {"type": "write", "text": "secret"}, + {"type": "click", "selector": "button[type=\"submit\"]"}, + {"type": "wait", "milliseconds": 1500}, + {"type": "screenshot", "fullPage": True}, ], -}); +}) -print(doc.markdown, doc.screenshot); +print(doc.markdown, doc.screenshot) ``` diff --git a/zh/advanced-scraping-guide.mdx b/zh/advanced-scraping-guide.mdx index 4ade6ab0..7c95d336 100644 --- a/zh/advanced-scraping-guide.mdx +++ b/zh/advanced-scraping-guide.mdx @@ -153,14 +153,14 @@ from firecrawl import Firecrawl firecrawl = Firecrawl(api_key='fc-YOUR-API-KEY') doc = firecrawl.scrape('https://example.com', { - actions: [ - { type: 'wait', milliseconds: 1000 }, - { type: 'click', selector: '#accept' }, - { type: 'scroll', direction: 'down' }, - { type: 'write', selector: '#q', text: 'firecrawl' }, - { type: 'press', key: 'Enter' } + "actions": [ + {"type": "wait", "milliseconds": 1000}, + {"type": "click", "selector": "#accept"}, + {"type": "scroll", "direction": "down"}, + {"type": "write", "selector": "#q", "text": "firecrawl"}, + {"type": "press", "key": "Enter"} ], - formats: ['markdown'] + "formats": ['markdown'] }) print(doc.markdown) @@ -526,4 +526,4 @@ curl -X POST https://api.firecrawl.dev/v2/map \ 相关 API 参考:[/map 端点文档](https://docs.firecrawl.dev/api-reference/endpoint/map) -感谢阅读! \ No newline at end of file +感谢阅读! From 89d097db7736807a78bf33fc1888e8d88296c821 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 30 Sep 2025 14:51:44 +0000 Subject: [PATCH 2/2] Add Python syntax validation script and documentation - Add scripts/validate_python_syntax.py to validate Python code blocks in docs - Update README with validation instructions for contributors - Script checks all MDX/markdown files for syntactically valid Python examples - Helps prevent future syntax errors in documentation Linear ticket: ENG-3648 Co-Authored-By: Micah Stairs --- README.md | 21 ++++++ scripts/validate_python_syntax.py | 114 ++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100755 scripts/validate_python_syntax.py diff --git a/README.md b/README.md index 05e2993a..387336ca 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,27 @@ When contributing, please follow these guidelines: - Keep the documentation clear, concise, and easy to understand. - Use proper formatting and adhere to the existing document structure. - Test your changes locally before submitting a pull request. +- **Validate Python code examples** before submitting to ensure they use proper syntax. + +### Validating Python Examples + +Before submitting changes to Python code examples, run the validation script to ensure all Python code blocks are syntactically valid: + +```bash +# Validate all Python examples in the repository +python3 scripts/validate_python_syntax.py + +# Validate a specific file +python3 scripts/validate_python_syntax.py advanced-scraping-guide.mdx + +# Validate a specific directory +python3 scripts/validate_python_syntax.py snippets/ +``` + +This helps catch common syntax errors like: +- Unquoted dictionary keys (use `{"key": "value"}` not `{ key: 'value' }`) +- JavaScript-style semicolons in Python code +- Invalid Python syntax that would confuse users ## Contact diff --git a/scripts/validate_python_syntax.py b/scripts/validate_python_syntax.py new file mode 100755 index 00000000..933be1a4 --- /dev/null +++ b/scripts/validate_python_syntax.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +""" +Validate Python syntax in markdown documentation files. + +This script extracts Python code blocks from MDX/markdown files and verifies +they are syntactically valid. Run this before submitting PRs to catch Python +syntax errors in documentation examples. + +Usage: + python3 scripts/validate_python_syntax.py [file_or_directory] + +Examples: + python3 scripts/validate_python_syntax.py + + python3 scripts/validate_python_syntax.py advanced-scraping-guide.mdx + + python3 scripts/validate_python_syntax.py snippets/ +""" +import re +import ast +import sys +from pathlib import Path +from typing import List, Tuple + +def extract_python_blocks(content: str) -> List[str]: + """Extract Python code blocks from markdown content.""" + pattern = r'```python.*?\n(.*?)```' + matches = re.findall(pattern, content, re.DOTALL) + return matches + +def validate_python_code(code: str) -> Tuple[bool, str]: + """Check if Python code is syntactically valid.""" + try: + ast.parse(code) + return True, "" + except SyntaxError as e: + return False, str(e) + +def find_python_files(path: Path) -> List[Path]: + """Find all MDX and markdown files that might contain Python code.""" + if path.is_file(): + return [path] + + patterns = ['**/*.mdx', '**/*.md'] + files = [] + for pattern in patterns: + files.extend(path.glob(pattern)) + return sorted(set(files)) + +def main(target_path: str = None): + """Validate Python syntax in documentation files.""" + if target_path: + base_path = Path(target_path) + if not base_path.exists(): + print(f"❌ Path not found: {target_path}") + return 1 + else: + base_path = Path.cwd() + + files = find_python_files(base_path) + + if not files: + print(f"⚠️ No markdown/MDX files found in {base_path}") + return 1 + + all_valid = True + total_blocks = 0 + files_with_python = 0 + + for filepath in files: + try: + content = filepath.read_text() + except Exception as e: + print(f"❌ Error reading {filepath}: {e}") + continue + + blocks = extract_python_blocks(content) + + if not blocks: + continue + + files_with_python += 1 + print(f"\n📄 {filepath.relative_to(base_path if base_path.is_dir() else base_path.parent)}") + + for i, block in enumerate(blocks, 1): + total_blocks += 1 + is_valid, error = validate_python_code(block) + if is_valid: + print(f" ✅ Block {i}: Valid Python syntax") + else: + print(f" ❌ Block {i}: Invalid Python syntax") + print(f" Error: {error}") + print(f" Code preview:\n{block[:200]}...") + all_valid = False + + print(f"\n{'='*60}") + print(f"Files checked: {len(files)}") + print(f"Files with Python blocks: {files_with_python}") + print(f"Total Python blocks validated: {total_blocks}") + + if total_blocks == 0: + print("⚠️ No Python code blocks found") + return 0 + + if all_valid: + print("✅ All Python code blocks are syntactically valid!") + return 0 + else: + print("❌ Some Python code blocks have syntax errors!") + return 1 + +if __name__ == "__main__": + target = sys.argv[1] if len(sys.argv) > 1 else None + sys.exit(main(target))