enzoampil · mikeejazmines · Dec 18, 2021 · Dec 18, 2021 · Mar 11, 2022
diff --git a/examples/2021-01-11-backtest-multicore.ipynb b/examples/2021-01-11-backtest-multicore.ipynb
diff --git a/python/fastquant/backtest/backtest.py b/python/fastquant/backtest/backtest.py
@@ -69,6 +69,7 @@ def backtest(
     return_plot=False,
     channel="",
     symbol="",
+    max_cpus=1,
     allow_short=False,
     short_max=1.5,
     figsize=(30, 15),
@@ -109,6 +110,8 @@ def backtest(
         Channel to be used for notifications - e.g. "slack" (default=None)
     symbol : str
         Symbol to be referenced in the channel notification if not None (default=None)
+    max_cpus : int
+        Determines how many cores will be used. Value of None means that the max cores will be used. This is 1 by default
     allow_short : bool
         Whether to allow short selling, with max set as `short_max` times the portfolio value (default=False)
     short_max : float
@@ -125,7 +128,7 @@ def backtest(
     """
     # Setting initial support for 1 cpu
     # Return the full strategy object to get all run information
-    cerebro = bt.Cerebro(stdstats=False, maxcpus=1, optreturn=False)
+    cerebro = bt.Cerebro(stdstats=False, maxcpus=max_cpus, optreturn=False)
     cerebro.addobserver(bt.observers.Broker)
     cerebro.addobserver(bt.observers.Trades)
     cerebro.addobserver(bt.observers.BuySell)
@@ -164,9 +167,7 @@ def backtest(
         # Allow instance of BaseStrategy or from the predefined mapping
         if not isinstance(strategy, str) and issubclass(strategy, bt.Strategy):
             strat_name = (
-                strategy.__name__
-                if hasattr(strategy, "__name__")
-                else str(strategy)
+                strategy.__name__ if hasattr(strategy, "__name__") else str(strategy)
             )
         else:
             strat_name = strategy
@@ -255,9 +256,7 @@ def backtest(
                 **optim_params,
             )
         else:
-            fig = plot_results(
-                cerebro, data_format_dict, figsize, **plot_kwargs
-            )
+            fig = plot_results(cerebro, data_format_dict, figsize, **plot_kwargs)
 
     if return_history and return_plot:
         return sorted_combined_df, history_dict, fig

diff --git a/python/fastquant/backtest/backtest_indicators.py b/python/fastquant/backtest/backtest_indicators.py
@@ -106,9 +106,7 @@ def get_indicators_as_dict(strat_run, multi_line_indicators):
     indicators_dict = dict()
     for i, ind in enumerate(indicators):
         indicator_name = (
-            ind.plotlabel()
-            if hasattr(ind, "plotlabel")
-            else "indicator{}".format(i)
+            ind.plotlabel() if hasattr(ind, "plotlabel") else "indicator{}".format(i)
         )
 
         # Check if indicator contains multiple lines
@@ -138,7 +136,5 @@ def rename_indicator(name, line_name=None):
     # Changes the name to <indicator>_<line>_<param1>_<param2>
     tokens = indicator_regex.findall(name)
     if line_name:
-        tokens = [tokens[0], line_name] + (
-            tokens[1:] if len(tokens) > 1 else []
-        )
+        tokens = [tokens[0], line_name] + (tokens[1:] if len(tokens) > 1 else [])
     return "_".join(tokens)
diff --git a/python/fastquant/backtest/data_prep.py b/python/fastquant/backtest/data_prep.py
@@ -107,9 +107,7 @@ def include_sentiment_score(data, sentiments):
     senti_series = pd.Series(sentiments, name="sentiment_score", dtype=float)
 
     # join and reset the index for dt to become the first column
-    data = data.merge(
-        senti_series, left_index=True, right_index=True, how="left"
-    )
+    data = data.merge(senti_series, left_index=True, right_index=True, how="left")
     data = data.reset_index()
 
     return data

diff --git a/python/fastquant/backtest/post_backtest.py b/python/fastquant/backtest/post_backtest.py
@@ -49,12 +49,9 @@ def analyze_strategies(
         for i, strat in enumerate(stratrun):
             # Get indicator history
             st_dtime = [
-                bt.utils.date.num2date(num)
-                for num in strat.lines.datetime.plot()
+                bt.utils.date.num2date(num) for num in strat.lines.datetime.plot()
             ]
-            indicators_dict = get_indicators_as_dict(
-                strat, multi_line_indicators
-            )
+            indicators_dict = get_indicators_as_dict(strat, multi_line_indicators)
             indicators_df = pd.DataFrame(indicators_dict)
             indicators_df.insert(0, "dt", st_dtime)
 
@@ -106,9 +103,7 @@ def analyze_strategies(
                 order_history_dfs.append(order_history_df)
 
                 periodic_history_df = strat.periodic_history_df
-                periodic_history_df["dt"] = pd.to_datetime(
-                    periodic_history_df.dt
-                )
+                periodic_history_df["dt"] = pd.to_datetime(periodic_history_df.dt)
                 periodic_history_df.insert(0, "strat_name", history_key)
                 periodic_history_df.insert(0, "strat_id", strat_idx)
                 periodic_history_df[
@@ -143,18 +138,12 @@ def analyze_strategies(
             total = np.nan
 
         if "won" in tradeanalyzer.keys():
-            win_rate = (
-                tradeanalyzer["won"]["total"] / tradeanalyzer["total"]["total"]
-            )
+            win_rate = tradeanalyzer["won"]["total"] / tradeanalyzer["total"]["total"]
             won = tradeanalyzer["won"]["total"]
             won_avg = tradeanalyzer["won"]["pnl"]["average"]
-            won_avg_prcnt = (
-                tradeanalyzer["won"]["pnl"]["average"] / init_cash * 100
-            )
+            won_avg_prcnt = tradeanalyzer["won"]["pnl"]["average"] / init_cash * 100
             won_max = tradeanalyzer["won"]["pnl"]["max"]
-            won_max_prcnt = (
-                tradeanalyzer["won"]["pnl"]["max"] / init_cash * 100
-            )
+            won_max_prcnt = tradeanalyzer["won"]["pnl"]["max"] / init_cash * 100
         else:
             win_rate = np.nan
             won = np.nan
@@ -166,13 +155,9 @@ def analyze_strategies(
         if "lost" in tradeanalyzer.keys():
             lost = tradeanalyzer["lost"]["total"]
             lost_avg = tradeanalyzer["lost"]["pnl"]["average"]
-            lost_avg_prcnt = (
-                tradeanalyzer["lost"]["pnl"]["average"] / init_cash * 100
-            )
+            lost_avg_prcnt = tradeanalyzer["lost"]["pnl"]["average"] / init_cash * 100
             lost_max = tradeanalyzer["lost"]["pnl"]["max"]
-            lost_max_prcnt = (
-                tradeanalyzer["lost"]["pnl"]["max"] / init_cash * 100
-            )
+            lost_max_prcnt = tradeanalyzer["lost"]["pnl"]["max"] / init_cash * 100
         else:
             lost = np.nan
             lost_avg = np.nan
@@ -233,9 +218,7 @@ def analyze_strategies(
     return sorted_combined_df, optim_params, history_dict
 
 
-def sort_metrics_params_and_strats(
-    metrics_df, params_df, strat_ids, sort_by, verbose
-):
+def sort_metrics_params_and_strats(metrics_df, params_df, strat_ids, sort_by, verbose):
 
     # Get indices based on `sort_by` metric
     optim_idxs = np.argsort(metrics_df[sort_by].values)[::-1]
@@ -251,11 +234,7 @@ def sort_metrics_params_and_strats(
     )
     # drop extra columns #248
     if (
-        len(
-            set(["channel" "symbol"]).intersection(
-                sorted_combined_df.columns.values
-            )
-        )
+        len(set(["channel" "symbol"]).intersection(sorted_combined_df.columns.values))
         == 2
     ):
         sorted_combined_df.drop(["channel", "symbol"], axis=1, inplace=True)

diff --git a/python/fastquant/data/crypto/crypto.py b/python/fastquant/data/crypto/crypto.py
@@ -21,11 +21,7 @@ def unix_time_millis(date):
     # epoch = datetime.utcfromtimestamp(0)
 
     # value will only have : if the date passed is intraday
-    dt_format = (
-        DATETIME_FORMAT["intraday"]
-        if ":" in date
-        else DATETIME_FORMAT["daily"]
-    )
+    dt_format = DATETIME_FORMAT["intraday"] if ":" in date else DATETIME_FORMAT["daily"]
     dt = datetime.strptime(date, dt_format)
     # return int((dt - epoch).total_seconds() * 1000)
     return int(dt.timestamp() * 1000)
@@ -88,9 +84,9 @@ def get_crypto_data(
                 )
                 # Make sure we're at the start of that day
                 request_start_date_epoch = unix_time_millis(
-                    pd.to_datetime(
-                        request_start_date_epoch, unit="ms"
-                    ).strftime(dt_format)
+                    pd.to_datetime(request_start_date_epoch, unit="ms").strftime(
+                        dt_format
+                    )
                 )
                 previous_request_end_date_epoch = request_start_date_epoch - 1
                 continue
@@ -107,10 +103,7 @@ def get_crypto_data(
             # Get the last entry timestamp after we've retrieved (or attempted to) additional records
             current_request_end_date_epoch = int(ohlcv_df.dt.max())
 
-            if (
-                current_request_end_date_epoch
-                <= previous_request_end_date_epoch
-            ):
+            if current_request_end_date_epoch <= previous_request_end_date_epoch:
                 # We haven't gained any additional records, so there's no point in further requests
                 # Let's mark this for the data end date, mostly so both end_date and end_date_epoch will be
                 # in sync in case someone in future uses them in code futher down and to ensure the loop bails
@@ -124,9 +117,7 @@ def get_crypto_data(
                 # The next request should start a millisecond after this one ended
                 request_start_date_epoch = current_request_end_date_epoch + 1
                 # This request's end date should now be set as current for the next loop
-                previous_request_end_date_epoch = (
-                    current_request_end_date_epoch
-                )
+                previous_request_end_date_epoch = current_request_end_date_epoch
 
         if ohlcv_df is not None:
             # Convert the unix timestampe to datetime
@@ -142,8 +133,6 @@ def get_crypto_data(
         return ohlcv_df
     else:
         raise NotImplementedError(
-            "The exchange "
-            + exchange
-            + " is not yet supported. Available exchanges: "
+            "The exchange " + exchange + " is not yet supported. Available exchanges: "
             ", ".join(CRYPTO_EXCHANGES)
         )
diff --git a/python/fastquant/data/stocks/phisix.py b/python/fastquant/data/stocks/phisix.py
@@ -83,9 +83,7 @@ def get_phisix_data_by_date(symbol, date):
                 return None
 
 
-def get_phisix_data(
-    symbol, start_date, end_date, save=False, max_straight_nones=10
-):
+def get_phisix_data(symbol, start_date, end_date, save=False, max_straight_nones=10):
     """Returns pricing data for a PHISIX stock symbol.
 
     Parameters
@@ -103,10 +101,7 @@ def get_phisix_data(
         Stock data (in CV format) for the specified company and date range
     """
     date_range = (
-        pd.period_range(start_date, end_date, freq="D")
-        .to_series()
-        .astype(str)
-        .values
+        pd.period_range(start_date, end_date, freq="D").to_series().astype(str).values
     )
 
     max_straight_nones = min(max_straight_nones, len(date_range))

diff --git a/python/fastquant/data/stocks/pse.py b/python/fastquant/data/stocks/pse.py
@@ -73,9 +73,7 @@ def get_stock_table(stock_table_fp=None):
             pd.concat(
                 [
                     pd.read_html(r.text)[0],
-                    pd.DataFrame(
-                        {"attr": table.xpath("//tr/td/a/@onclick")[::2]}
-                    ),
+                    pd.DataFrame({"attr": table.xpath("//tr/td/a/@onclick")[::2]}),
                 ],
                 axis=1,
             )
@@ -112,9 +110,7 @@ def get_pse_all_stocks():
     return df
 
 
-def get_pse_data_old(
-    symbol, start_date, end_date, stock_table_fp=None, verbose=True
-):
+def get_pse_data_old(symbol, start_date, end_date, stock_table_fp=None, verbose=True):
     """Returns pricing data for a specified stock.
 
     Parameters
@@ -145,26 +141,18 @@ def get_pse_data_old(
 
     data = {
         "cmpy_id": int(
-            stock_table["company_id"][
-                stock_table["Stock Symbol"] == symbol
-            ].values[0]
+            stock_table["company_id"][stock_table["Stock Symbol"] == symbol].values[0]
         ),
         "security_id": int(
-            stock_table["security_id"][
-                stock_table["Stock Symbol"] == symbol
-            ].values[0]
+            stock_table["security_id"][stock_table["Stock Symbol"] == symbol].values[0]
         ),
         "startDate": datetime.strptime(start_date, CALENDAR_FORMAT).strftime(
             "%m-%d-%Y"
         ),
-        "endDate": datetime.strptime(end_date, CALENDAR_FORMAT).strftime(
-            "%m-%d-%Y"
-        ),
+        "endDate": datetime.strptime(end_date, CALENDAR_FORMAT).strftime("%m-%d-%Y"),
     }
 
-    r = requests.post(
-        url="https://edge.pse.com.ph/common/DisclosureCht.ax", json=data
-    )
+    r = requests.post(url="https://edge.pse.com.ph/common/DisclosureCht.ax", json=data)
     df = pd.DataFrame(r.json()["chartData"])
     rename_dict = {
         "CHART_DATE": "dt",
@@ -181,9 +169,7 @@ def get_pse_data_old(
     return df
 
 
-def get_pse_data_cache(
-    symbol=None, cache_fp=None, update=False, verbose=False
-):
+def get_pse_data_cache(symbol=None, cache_fp=None, update=False, verbose=False):
     """
     Loads cached historical data
     Returns all if symbol is None
@@ -198,22 +184,14 @@ def get_pse_data_cache(
         df.index = pd.to_datetime(df.index)
         if verbose:
             print("Loaded: ", cache_fp)
-        return (
-            df
-            if symbol is None
-            else df[symbol]
-            if symbol in df.columns
-            else None
-        )
+        return df if symbol is None else df[symbol] if symbol in df.columns else None
     else:
         errmsg = "Cache does not exist! Try update=True"
         print(errmsg)
         return None
 
 
-def update_pse_data_cache(
-    start_date="2010-01-01", verbose=True, cache_fp=None
-):
+def update_pse_data_cache(start_date="2010-01-01", verbose=True, cache_fp=None):
     """
     Downloads DOHLC data of all PSE comapnies using get_pse_old
     and saves as .zip in /data to be used as cache
@@ -229,9 +207,7 @@ def update_pse_data_cache(
     data, unavailable = {}, []
     for symbol in tqdm(names["Stock Symbol"].values):
         try:
-            df = get_pse_data_old(
-                symbol, start_date, date_today, verbose=False
-            )
+            df = get_pse_data_old(symbol, start_date, date_today, verbose=False)
             data[symbol] = df
         except Exception as e:
             unavailable.append(symbol)
@@ -281,9 +257,7 @@ def get_pse_data(
     start = datestring_to_datetime(start_date)
     end = datestring_to_datetime(end_date)
 
-    fp = Path(
-        DATA_PATH, "{}_stock_{}_{}.csv".format(symbol, start_date, end_date)
-    )
+    fp = Path(DATA_PATH, "{}_stock_{}_{}.csv".format(symbol, start_date, end_date))
 
     if "v" in format:
         if fp.exists():
@@ -309,9 +283,7 @@ def get_pse_data(
                 symbol, start_date, end_date, save=False, max_straight_nones=10
             )
             if not pse_data_df.empty:
-                pse_data_df = pd.concat(
-                    [cache, pse_data_df], ignore_index=True
-                )
+                pse_data_df = pd.concat([cache, pse_data_df], ignore_index=True)
         else:
             pse_data_df = cache.copy()
 
@@ -339,22 +311,16 @@ def pse_data_to_csv(symbol, start_date, end_date, pse_dir=DATA_PATH):
     pse = get_pse_data(symbol, start_date, end_date)
     fp = Path(
         pse_dir,
-        "{}_{}_{}_OHLCV.csCRYPTO_EXCHANGESv".format(
-            symbol, start_date, end_date
-        ),
+        "{}_{}_{}_OHLCV.csCRYPTO_EXCHANGESv".format(symbol, start_date, end_date),
     )
     if isinstance(pse, pd.DataFrame):
         pse.to_csv(fp)
     else:
         pse[0].to_csv(fp)
         performance_dict = pse[1]
         performance_dict["D"].to_csv(
-            Path(
-                pse_dir, "{}_{}_{}_D.csv".format(symbol, start_date, end_date)
-            )
+            Path(pse_dir, "{}_{}_{}_D.csv".format(symbol, start_date, end_date))
         )
         performance_dict["E"].to_csv(
-            Path(
-                pse_dir, "{}_{}_{}_E.csv".format(symbol, start_date, end_date)
-            )
+            Path(pse_dir, "{}_{}_{}_E.csv".format(symbol, start_date, end_date))
         )