rajatomar788
diff --git a/‎.gitignore‎
Lines changed: 0 additions & 1 deletion b/‎.gitignore‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 84 additions & 12 deletions b/‎README.md‎
Lines changed: 84 additions & 12 deletions
diff --git a/‎docs/index.md‎
Lines changed: 51 additions & 1 deletion b/‎docs/index.md‎
Lines changed: 51 additions & 1 deletion
diff --git a/‎examples.py‎
Lines changed: 1 addition & 0 deletions b/‎examples.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mkdocs.yml‎
Lines changed: 1 addition & 0 deletions b/‎mkdocs.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pywebcopy/logger.py‎ renamed to ‎obsolute/_logging.py‎
Lines changed: 4 additions & 4 deletions b/‎pywebcopy/logger.py‎ renamed to ‎obsolute/_logging.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎obsolute/core.py‎ b/‎obsolute/core.py‎
diff --git a/‎obsolute/utils.py‎
Lines changed: 2 additions & 2 deletions b/‎obsolute/utils.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pywebcopy/__init__.py‎
Lines changed: 36 additions & 38 deletions b/‎pywebcopy/__init__.py‎
Lines changed: 36 additions & 38 deletions
@@ -9,4 +9,3 @@
 Pipfile
 Pipfile.lock
 /testing/
-
@@ -22,11 +22,11 @@ Why it's great? because it -
 - custom html tags handler support
 - lots of configuration for many custom needs
 - provides several scraping packages in one objects for scraping under one class
-  - beautifulsoup4
   - lxml
   - requests
-  - requests_html
+  - beautifulsoup4
   - pyquery
+  - requests_html
 
 Email me at `rajatomar788@gmail.com` of any query :)
 
@@ -94,9 +94,59 @@ Just run this command from root directory of pywebcopy package.
 
 
 ```shell
-$ python -m unittest pywebcopy.tests
+$ python -m pywebcopy run-tests
+```
+
+### 1.4 Command Line Interface
+`pywebcopy` have a very easy to use command-line interface which
+can help you do task without having to worrying about the inner
+long way.
+- #### Getting list of commands
+    ```shell
+    $ python -m pywebcopy -- --help
+    ```
+- #### Using apis
+    ```shell
+    $ python -m pywebcopy save_webpage http://google.com E://store// --bypass_robots=True
+    or
+    $ python -m pywebcopy save_website http://google.com E://store// --bypass_robots
+    ``` 
+- #### Running tests
+    ```shell
+    $ python -m pywebcopy run_tests
+    ```
+
+
+### 1.5 Authentication and Cookies
+Most of the time authentication is needed to access a certain page.
+Its real easy to authenticate with `pywebcopy` because it usage an 
+`requests.Session` object for base http activity which can be accessed 
+through `pywebcopy.SESSION` attribute. And as you know there
+are ton of tutorials on setting up authentication with `requests.Session`.
+
+Here is a basic example of simple http auth -
+```python
+import pywebcopy
+
+# Update the headers with suitable data
+
+pywebcopy.SESSION.headers.update({
+    'auth': {'username': 'password'},
+    'form': {'key1': 'value1'},
+})
+
+# Rest of the code is as usual
+kwargs = {
+    'url': 'http://localhost:5000',
+    'project_folder': 'e://saved_pages//',
+    'project_name': 'my_site'
+}
+pywebcopy.config.setup_config(**kwargs)
+pywebcopy.save_webpage(**kwargs)
+
 ```
 
+
 ### 2.1 `WebPage` class
 
 `WebPage` class, the engine of this saving actions.
@@ -227,7 +277,7 @@ through any method described above
 Multiple scraping packages are wrapped up in one object 
 which you can use to unlock the best of all those libraries
 at one go without having to go through the hassle of 
-instanciating each one of those libraries
+instantiating each one of those libraries
 
 > To use all the methods and properties documented below
 > just create a object once as described
@@ -303,9 +353,28 @@ wp = MultiParser(html, encoding)
     >>> [<Element 'a' href='http://kennethreitz.com/pages'>, ...]
     ```
 
-## `Crawler` class in `pywebcopy`
-Class on which website cloning depends upon.
+## `Crawler` object
+This is a subclass of `WebPage` class and can be used to mirror any website.
+
+```python
+>>> from pywebcopy import Crawler, config
+>>> url = 'http://some-url.com/some-page.html'
+>>> project_folder = '/home/desktop/'
+>>> project_name = 'my_project'
+>>> kwargs = {'bypass_robots': True}
+# You should always start with setting up the config or use apis
+>>> config.setup_config(url, project_folder, project_name, **kwargs)
 
+# Create a instance of the webpage object
+>>> wp = Crawler()
+
+# If you want to you can use `requests` to fetch the pages
+>>> wp.get(url, **{'auth': ('username', 'password')})
+
+# Then you can access several methods like
+>>> wp.crawl()
+
+```
 
 
 ## Common Settings and Errors
@@ -384,7 +453,7 @@ This use case is slightly more powerful as it can provide every functionallity o
 >>> config.setup_config(url, project_folder, project_name, **kwargs)
 
 # Create a instance of the webpage object
->>> wp = Webpage()
+>>> wp = WebPage()
 
 # If you want to use `requests` to fetch the page then
 >>> wp.get(url)
@@ -450,9 +519,10 @@ By creating a Crawler() object which provides several other functions as well.
 ```python
 >>> from pywebcopy import Crawler, config
 
->>> config.setup_config(project_url='http://localhost:5000/', project_folder='e://tests/', project_name='LocalHost')
+>>> config.setup_config(project_url='http://localhost:5000/', 
+project_folder='e://tests/', project_name='LocalHost')
 
->>> crawler = Crawler('http://localhost:5000/')
+>>> crawler = Crawler()
 >>> crawler.crawl()
 
 ```
@@ -601,8 +671,10 @@ then you can always create and pull request or email me.
 ## 6.1 Changelog
 
 ### [version 6.0.0]
-
-- `WebPage` class now doesn't take any argument **(breaking change)**
+- **Breaking Change** New command-line interface using `Python Fire` library.
+- Implemented type checks and path normalising in the `config.setup_paths`.
+- added new dynamic `pywebcopy.__all__` attr generation.
+- `WebPage` class now doesnt take any argument **(breaking change)**
 - `WebPage` class has new methods `WebPage.get` and `WebPage.set_source`
 - Queuing of downloads is replaced with a barrier to manage active threads
 
@@ -614,7 +686,7 @@ then you can always create and pull request or email me.
 
 ### [version 4.x]
 
-- *A complete rewrite and restructing of core functionality.*
+- *A complete rewrite and restructuring of core functionality.*
 
 ### [version 2.0.0]
 
 
@@ -93,6 +93,56 @@ Just run this command from root directory of pywebcopy package.
 
 
 ```shell
-$ python -m unittest pywebcopy.tests
+$ python -m unittest tests
 ```
 
+
+
+### Command Line Interface
+`pywebcopy` have a very easy to use command-line interface which
+can help you do task without having to worrying about the inner
+long way.
+- #### Getting list of commands
+    ```shell
+    $ python -m pywebcopy -- --help
+    ```
+- #### Using apis
+    ```shell
+    $ python -m pywebcopy save_webpage http://google.com E://store// --bypass_robots=True
+    or
+    $ python -m pywebcopy save_website http://google.com E://store// --bypass_robots
+    ``` 
+- #### Running tests
+    ```shell
+    $ python -m pywebcopy run_tests
+    ```
+
+
+### Authentication and Cookies
+Most of the time authentication is needed to access a certain page.
+Its real easy to authenticate with `pywebcopy` because it usage an 
+`requests.Session` object for base http activity which can be accessed 
+through `pywebcopy.SESSION` attribute. And as you know there
+are ton of tutorials on setting up authentication with `requests.Session`.
+
+Here is a basic example of simple http auth -
+```python
+import pywebcopy
+
+# Update the headers with suitable data
+
+pywebcopy.SESSION.headers.update({
+    'auth': {'username': 'password'},
+    'form': {'key1': 'value1'},
+})
+
+# Rest of the code is as usual
+kwargs = {
+    'url': 'http://localhost:5000',
+    'project_folder': 'e://saved_pages//',
+    'project_name': 'my_site'
+}
+pywebcopy.config.setup_config(**kwargs)
+pywebcopy.save_webpage(**kwargs)
+
+```
@@ -35,6 +35,7 @@
 # page_url = 'https://www.w3schools.com/'
 # page_url = 'https://test-domain.com/'
 page_url = 'http://localhost:5000'
+
 handle = open(os.path.join(os.getcwd(), 'tests', 'test.html'), 'rb')
 # page_url = 'https://getbootstrap.com/'
 
 
@@ -1,5 +1,6 @@
 site_name: PyWebcopy
 theme: readthedocs
+
 nav:
   - Home: index.md
   - How-To: how-tos.md
 
@@ -8,11 +8,11 @@
 - HTMLLogger instance with name, level, title, mode, version etc.
 - call log, debug, info etc. on the instance
 """
-
+from __future__ import absolute_import
 import time
 import logging
 
-from .globals import VERSION
+from . import __version__
 
 
 #: HTML header starts the document
@@ -189,7 +189,7 @@ def action(self, message, *args, **kws):
 logging.Logger.action = action
 
 
-def new_html_logger(title="PywebCopy Log", version=VERSION, filename='log.html', mode='w'):
+def new_html_logger(title="PywebCopy Log", version=__version__, filename='log.html', mode='w'):
     """Creates a new html file logging handler for use in logger.
 
     :rtype: HTMLFileHandler
@@ -214,7 +214,7 @@ def new_console_logger(level=logging.WARNING):
     """
     c_logger = logging.StreamHandler()
     c_logger.setLevel(level)
-    c_logger.setFormatter(logging.Formatter("%(levelname)s - %(message)s"))
+    c_logger.setFormatter(logging.Formatter("%(levelname)-8s - %(message)s"))
     return c_logger
 
 
 
@@ -9,13 +9,13 @@
 
 import os
 import re
-
+import logging
 from six.moves.urllib.parse import urljoin, urlsplit, urlparse
 from six.moves.urllib.request import pathname2url, url2pathname
 
-from pywebcopy import LOGGER
 from configs import config
 
+LOGGER = logging.getLogger('utils')
 DEBUG = config['DEBUG']
 
 
 
@@ -1,59 +1,57 @@
 # -*- coding: utf-8 -*-
+#
+# Copyright 2019 Raja Tomar
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
 """
+
 pywebcopy
 ~~~~~~~~~
 
-Python library to clone complete webpages and websites.
-
-
-Copyright 2019 Raja Tomar
+Python library to clone web-pages and websites with all its peripheral files.
 
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
+.. version changed :: 6.0.0
+    1. **Breaking Change** New command-line interface using `Python Fire` library.
+    2. Implemented type checks and path normalising in the `config.setup_paths`.
 
 """
-
-
 __author__ = 'Raja Tomar'
 __email__ = 'rajatomar788@gmail.com'
 __license__ = 'Apache License 2.0'
-__version__ = (6, 0, 0, 'rc', 1)
+__version__ = '6.0.0'
 
+import logging
 
-from .globals import *
-from .logger import LOGGER  # Global Logger instance
 from .configs import config, SESSION
-from .urls import URLTransformer, filename_present
-from .elements import LinkTag, ScriptTag, ImgTag, AnchorTag, TagBase
+from .parsers import Parser, MultiParser
 from .webpage import WebPage
-from .parsers import MultiParser
-from .core import get, new_file
 from .crawler import Crawler
 from .api import save_website, save_webpage
 
-
 __all__ = [
-    'save_webpage', 'save_website',                             #: apis
-    'config',                                                   #: configuration
-    'WebPage', 'Crawler', 'MultiParser',                        #: Classes
-    'SESSION',                                                  #: Http Session
-    'URLTransformer', 'filename_present',                       #: Url manipulation
-    'TagBase', 'LinkTag', 'ScriptTag', 'ImgTag', 'AnchorTag',   #: Customisable tag handling
-    'get', 'new_file',                                          #: some goodies
+    'WebPage', 'Crawler',
+    'save_webpage', 'save_website',
+    'config', 'SESSION',
+    'Parser', 'MultiParser',
 ]
 
-#: alias
-Webpage = WebPage
-
-
-def __dir__():
-    return __all__ + (__version__, __author__, __email__, __license__, Webpage)
-
-
+#: optimisations
+logging.logThreads = 0
+logging.logProcesses = 0
+logging._srcfile = None
+c_handler = logging.StreamHandler()
+logging.basicConfig(
+    level=logging.DEBUG,
+    handlers=[c_handler],
+    format='%(name)-10s - %(levelname)-8s - %(message)s'
+)
+c_handler.setLevel(logging.INFO)