diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8b1378917..fc0f1c5ef 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1 +1,7 @@ - +name: CI +on: [push] +jobs: + build: + runs-on: ubuntu-latest + steps: + - run: 'true' diff --git a/README.md b/README.md index 924e97df9..53cb2e0cf 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ :warning: **Used at one's own risk** :warning: -v7.2.12 +v7.2.13 ## Overview diff --git a/configs/approved_coins_pumpdump.json b/configs/approved_coins_pumpdump.json new file mode 100644 index 000000000..fea952ef3 --- /dev/null +++ b/configs/approved_coins_pumpdump.json @@ -0,0 +1 @@ +["ACE", "ACT", "AEVO", "AI", "AI16Z", "ALT", "AXL", "BAN", "BLZ", "BOME", "BOND", "CAT", "CATI", "CHILLGUY", "CYBER", "DEGEN", "DOGS", "DYM", "FXS", "GOAT", "HFT", "HIPPO", "HOOK", "ILV", "KEY", "KOMA", "LINA", "LOOM", "MANTA", "MAVIA", "MEME", "METIS", "MOODENG", "MYRO", "NEIROETH", "NFP", "OMNI", "PIXEL", "PONKE", "POPCAT", "PORTAL", "RATS", "RDNT", "REZ", "SAGA", "SATS", "STRK", "THE", "TNSR", "UNFI", "USTC", "USUAL", "VANRY", "W", "XAI", "ZEREBRO", "ZETA"] diff --git a/configs/approved_coins_top20mcap.json b/configs/approved_coins_top20mcap.json new file mode 100644 index 000000000..3b3fd4c6b --- /dev/null +++ b/configs/approved_coins_top20mcap.json @@ -0,0 +1 @@ +["BTC", "ETH", "XRP", "SOL", "BNB", "DOGE", "ADA", "TRX", "LINK", "AVAX", "XLM", "TON", "SUI", "HBAR", "SHIB", "LTC", "DOT", "BGB", "BCH", "HYPE"] \ No newline at end of file diff --git a/configs/examples/high_risk_btc_trailing_long.json b/configs/examples/high_risk_btc_trailing_long.json deleted file mode 100644 index 40efdbecd..000000000 --- a/configs/examples/high_risk_btc_trailing_long.json +++ /dev/null @@ -1,150 +0,0 @@ -{"analysis": {"adg": 0.009813373940044771, - "drawdown_worst": 0.6945115960400235, - "equity_balance_diff_max": 0.6930804946290117, - "equity_balance_diff_mean": 0.017493566929694827, - "loss_profit_ratio": 0.032608860175119325, - "loss_profit_ratio_long": 0.032608860175119325, - "loss_profit_ratio_short": 1.0, - "mdg": 0.007380387901004964, - "pnl_ratio_long_short": 1.0, - "sharpe_ratio": 0.3991849326303949}, - "backtest": {"base_dir": "backtests", - "cache_dir": "caches/hlcvs_data/ecaebc4a596ef216", - "compress_cache": true, - "end_date": "2024-11-14", - "exchange": "bybit", - "start_date": "2024-01-01", - "starting_balance": 100000, - "symbols": ["BTCUSDT"]}, - "bot": {"long": {"close_grid_markup_range": 0.005233755971494632, - "close_grid_min_markup": 0.029443141648836362, - "close_grid_qty_pct": 0.376056914689064, - "close_trailing_grid_ratio": 1, - "close_trailing_qty_pct": 0.47567248653991856, - "close_trailing_retracement_pct": 0.00020058926244927774, - "close_trailing_threshold_pct": 0.00466337060146426, - "ema_span_0": 228.0710753978957, - "ema_span_1": 767.344612064711, - "entry_grid_double_down_factor": 2.5099193684500376, - "entry_grid_spacing_pct": 0.052156960093609234, - "entry_grid_spacing_weight": 0.932020524544718, - "entry_initial_ema_dist": -0.0038751972323778647, - "entry_initial_qty_pct": 0.09980217963520135, - "entry_trailing_grid_ratio": 1, - "entry_trailing_retracement_pct": 0.00013698138685686628, - "entry_trailing_threshold_pct": 0.06233741446341446, - "filter_relative_volume_clip_pct": 0.8847687493808436, - "filter_rolling_window": 55.068004720674026, - "n_positions": 1.000092506770869, - "total_wallet_exposure_limit": 9.72504928658771, - "unstuck_close_pct": 0.08905490275771208, - "unstuck_ema_dist": -0.0017324226989119255, - "unstuck_loss_allowance_pct": 0.004726553565561598, - "unstuck_threshold": 0.4007945818444598}, - "short": {"close_grid_markup_range": 0.005528531510502695, - "close_grid_min_markup": 0.026473279904984614, - "close_grid_qty_pct": 0.37573717710600746, - "close_trailing_grid_ratio": 1, - "close_trailing_qty_pct": 0.7731148472127198, - "close_trailing_retracement_pct": 0.033976642966983356, - "close_trailing_threshold_pct": 0.044452344775101224, - "ema_span_0": 814.96662344569, - "ema_span_1": 461.488766970942, - "entry_grid_double_down_factor": 1.4705087038845643, - "entry_grid_spacing_pct": 0.021714380568355285, - "entry_grid_spacing_weight": 0.2754086481261349, - "entry_initial_ema_dist": -0.03926216961844701, - "entry_initial_qty_pct": 0.00739160119302463, - "entry_trailing_grid_ratio": 1, - "entry_trailing_retracement_pct": 0.06586528763664759, - "entry_trailing_threshold_pct": -0.006360049589975893, - "filter_relative_volume_clip_pct": 0.39064954228610194, - "filter_rolling_window": 57.90400312877959, - "n_positions": 0.0, - "total_wallet_exposure_limit": 2.1969382299041125, - "unstuck_close_pct": 0.008459139558869443, - "unstuck_ema_dist": -0.03573366946883049, - "unstuck_loss_allowance_pct": 0.013646444492036598, - "unstuck_threshold": 0.8281823987224207}}, - "disable_plotting": false, - "live": {"approved_coins": {"long": ["BTC"], "short": ["BTC"]}, - "auto_gs": true, - "coin_flags": {}, - "empty_means_all_approved": false, - "execution_delay_seconds": 2, - "filter_by_min_effective_cost": true, - "forced_mode_long": "", - "forced_mode_short": "", - "ignored_coins": {"long": [], "short": []}, - "leverage": 10, - "max_n_cancellations_per_batch": 5, - "max_n_creations_per_batch": 3, - "max_n_restarts_per_day": 10, - "minimum_coin_age_days": 30, - "ohlcvs_1m_rolling_window_days": 4, - "ohlcvs_1m_update_after_minutes": 10, - "pnls_max_lookback_days": 30, - "price_distance_threshold": 0.002, - "time_in_force": "good_till_cancelled", - "user": "bybit_01"}, - "optimize": {"bounds": {"long_close_grid_markup_range": [0, 0.03], - "long_close_grid_min_markup": [0.001, 0.03], - "long_close_grid_qty_pct": [0.05, 1], - "long_close_trailing_grid_ratio": [1, 1], - "long_close_trailing_qty_pct": [0.05, 1], - "long_close_trailing_retracement_pct": [0, 0.1], - "long_close_trailing_threshold_pct": [-0.1, 0.1], - "long_ema_span_0": [200, 1440], - "long_ema_span_1": [200, 1440], - "long_entry_grid_double_down_factor": [0.1, 3], - "long_entry_grid_spacing_pct": [0.001, 0.12], - "long_entry_grid_spacing_weight": [0, 10], - "long_entry_initial_ema_dist": [-0.1, 0.003], - "long_entry_initial_qty_pct": [0.005, 0.1], - "long_entry_trailing_grid_ratio": [1, 1], - "long_entry_trailing_retracement_pct": [0, 0.1], - "long_entry_trailing_threshold_pct": [-0.1, 0.1], - "long_filter_relative_volume_clip_pct": [0, 1], - "long_filter_rolling_window": [10, 360], - "long_n_positions": [1, 20], - "long_total_wallet_exposure_limit": [1, 10], - "long_unstuck_close_pct": [0.001, 0.1], - "long_unstuck_ema_dist": [-0.1, 0.01], - "long_unstuck_loss_allowance_pct": [0, 0.05], - "long_unstuck_threshold": [0.4, 0.95], - "short_close_grid_markup_range": [0, 0.03], - "short_close_grid_min_markup": [0.001, 0.03], - "short_close_grid_qty_pct": [0.05, 1], - "short_close_trailing_grid_ratio": [1, 1], - "short_close_trailing_qty_pct": [0.05, 1], - "short_close_trailing_retracement_pct": [0, 0.1], - "short_close_trailing_threshold_pct": [-0.1, 0.1], - "short_ema_span_0": [200, 1440], - "short_ema_span_1": [200, 1440], - "short_entry_grid_double_down_factor": [0.1, 3], - "short_entry_grid_spacing_pct": [0.001, 0.12], - "short_entry_grid_spacing_weight": [0, 10], - "short_entry_initial_ema_dist": [-0.1, 0.003], - "short_entry_initial_qty_pct": [0.005, 0.1], - "short_entry_trailing_grid_ratio": [1, 1], - "short_entry_trailing_retracement_pct": [0, 0.1], - "short_entry_trailing_threshold_pct": [-0.1, 0.1], - "short_filter_relative_volume_clip_pct": [0, 1], - "short_filter_rolling_window": [10, 360], - "short_n_positions": [1, 20], - "short_total_wallet_exposure_limit": [0, 10], - "short_unstuck_close_pct": [0.001, 0.1], - "short_unstuck_ema_dist": [-0.1, 0.01], - "short_unstuck_loss_allowance_pct": [0, 0.05], - "short_unstuck_threshold": [0.4, 0.95]}, - "compress_results_file": true, - "crossover_probability": 0.7, - "iters": 200000, - "limits": {"lower_bound_drawdown_worst": 0.5, - "lower_bound_equity_balance_diff_mean": 0.03, - "lower_bound_loss_profit_ratio": 0.6}, - "mutation_probability": 0.2, - "n_cpus": 10, - "population_size": 500, - "scoring": ["mdg", "sharpe_ratio"]}, - "results_filename": "optimize_results/2024-11-15T14_35_30_BTC_3225f17e_all_results.txt"} \ No newline at end of file diff --git a/configs/examples/pumpdump.json b/configs/examples/pumpdump.json new file mode 100644 index 000000000..0ea25d0b7 --- /dev/null +++ b/configs/examples/pumpdump.json @@ -0,0 +1,145 @@ +{"backtest": {"base_dir": "backtests", + "combine_ohlcvs": true, + "compress_cache": true, + "end_date": "now", + "exchanges": ["binance", "bybit", "gateio", "bitget"], + "gap_tolerance_ohlcvs_minutes": 120.0, + "start_date": "2021-04-01", + "starting_balance": 100000}, + "bot": {"long": {"close_grid_markup_range": 0.0095279, + "close_grid_min_markup": 0.0029382, + "close_grid_qty_pct": 0.43727, + "close_trailing_grid_ratio": -0.47276, + "close_trailing_qty_pct": 0.068105, + "close_trailing_retracement_pct": 0.048969, + "close_trailing_threshold_pct": 0.01036, + "ema_span_0": 676.33, + "ema_span_1": 1018.6, + "enforce_exposure_limit": true, + "entry_grid_double_down_factor": 1.4603, + "entry_grid_spacing_pct": 0.059984, + "entry_grid_spacing_weight": 1.2536, + "entry_initial_ema_dist": -0.0030698, + "entry_initial_qty_pct": 0.015053, + "entry_trailing_grid_ratio": -0.2859, + "entry_trailing_retracement_pct": 0.075419, + "entry_trailing_threshold_pct": 0.061579, + "filter_relative_volume_clip_pct": 0.38317, + "filter_rolling_window": 91.197, + "n_positions": 6.5431, + "total_wallet_exposure_limit": 1.9506, + "unstuck_close_pct": 0.031311, + "unstuck_ema_dist": -0.044175, + "unstuck_loss_allowance_pct": 0.015059, + "unstuck_threshold": 0.46849}, + "short": {"close_grid_markup_range": 0.0, + "close_grid_min_markup": 0.001, + "close_grid_qty_pct": 0.05, + "close_trailing_grid_ratio": 0.0, + "close_trailing_qty_pct": 0.05, + "close_trailing_retracement_pct": 0.0001, + "close_trailing_threshold_pct": 0.0, + "ema_span_0": 200.0, + "ema_span_1": 200.0, + "enforce_exposure_limit": true, + "entry_grid_double_down_factor": 0.1, + "entry_grid_spacing_pct": 0.001, + "entry_grid_spacing_weight": 0.0, + "entry_initial_ema_dist": 0.0, + "entry_initial_qty_pct": 0.004, + "entry_trailing_grid_ratio": 0.0, + "entry_trailing_retracement_pct": 0.0001, + "entry_trailing_threshold_pct": 0.0, + "filter_relative_volume_clip_pct": 0.0, + "filter_rolling_window": 10.0, + "n_positions": 0.0, + "total_wallet_exposure_limit": 0.0, + "unstuck_close_pct": 0.001, + "unstuck_ema_dist": 0.0, + "unstuck_loss_allowance_pct": 0.001, + "unstuck_threshold": 0.4}}, + "live": {"approved_coins": "", + "auto_gs": true, + "coin_flags": {}, + "empty_means_all_approved": true, + "execution_delay_seconds": 2.0, + "filter_by_min_effective_cost": true, + "forced_mode_long": "", + "forced_mode_short": "", + "ignored_coins": {"long": [], "short": []}, + "leverage": 10.0, + "market_orders_allowed": true, + "max_n_cancellations_per_batch": 5, + "max_n_creations_per_batch": 3, + "max_n_restarts_per_day": 10, + "minimum_coin_age_days": 7.0, + "ohlcvs_1m_rolling_window_days": 4.0, + "ohlcvs_1m_update_after_minutes": 10.0, + "pnls_max_lookback_days": 30.0, + "price_distance_threshold": 0.002, + "time_in_force": "good_till_cancelled", + "user": "bybit_01"}, + "optimize": {"bounds": {"long_close_grid_markup_range": [0, 0.03], + "long_close_grid_min_markup": [0.001, 0.03], + "long_close_grid_qty_pct": [0.05, 1.0], + "long_close_trailing_grid_ratio": [-1, 1], + "long_close_trailing_qty_pct": [0.05, 1.0], + "long_close_trailing_retracement_pct": [0.0001, 0.1], + "long_close_trailing_threshold_pct": [-0.01, 0.1], + "long_ema_span_0": [200, 1440], + "long_ema_span_1": [200, 1440], + "long_entry_grid_double_down_factor": [0.1, 3], + "long_entry_grid_spacing_pct": [0.001, 0.06], + "long_entry_grid_spacing_weight": [0, 10], + "long_entry_initial_ema_dist": [-0.1, 0.003], + "long_entry_initial_qty_pct": [0.004, 0.02], + "long_entry_trailing_grid_ratio": [-1, 1], + "long_entry_trailing_retracement_pct": [0.0001, 0.1], + "long_entry_trailing_threshold_pct": [-0.01, 0.1], + "long_filter_relative_volume_clip_pct": [0, 1], + "long_filter_rolling_window": [10, 360], + "long_n_positions": [6.4, 12.0], + "long_total_wallet_exposure_limit": [0.0, 2.0], + "long_unstuck_close_pct": [0.001, 0.1], + "long_unstuck_ema_dist": [-0.1, 0.01], + "long_unstuck_loss_allowance_pct": [0.001, 0.05], + "long_unstuck_threshold": [0.4, 0.95], + "short_close_grid_markup_range": [0, 0.03], + "short_close_grid_min_markup": [0.001, 0.03], + "short_close_grid_qty_pct": [0.05, 1.0], + "short_close_trailing_grid_ratio": [-1, 1], + "short_close_trailing_qty_pct": [0.05, 1.0], + "short_close_trailing_retracement_pct": [0.0001, 0.1], + "short_close_trailing_threshold_pct": [-0.01, 0.1], + "short_ema_span_0": [200, 1440], + "short_ema_span_1": [200, 1440], + "short_entry_grid_double_down_factor": [0.1, 3], + "short_entry_grid_spacing_pct": [0.001, 0.06], + "short_entry_grid_spacing_weight": [0, 10], + "short_entry_initial_ema_dist": [-0.1, 0.003], + "short_entry_initial_qty_pct": [0.004, 0.02], + "short_entry_trailing_grid_ratio": [-1, 1], + "short_entry_trailing_retracement_pct": [0.0001, 0.1], + "short_entry_trailing_threshold_pct": [-0.01, 0.1], + "short_filter_relative_volume_clip_pct": [0, 1], + "short_filter_rolling_window": [10, 360], + "short_n_positions": [6.4, 12.0], + "short_total_wallet_exposure_limit": [0.0, 2.0], + "short_unstuck_close_pct": [0.001, 0.1], + "short_unstuck_ema_dist": [-0.1, 0.01], + "short_unstuck_loss_allowance_pct": [0.001, 0.05], + "short_unstuck_threshold": [0.4, 0.95]}, + "compress_results_file": true, + "crossover_probability": 0.7, + "iters": 300000, + "limits": {"lower_bound_drawdown_worst": 0.333, + "lower_bound_drawdown_worst_mean_1pct": 0.2, + "lower_bound_equity_balance_diff_neg_max": 0.4, + "lower_bound_equity_balance_diff_neg_mean": 0.01, + "lower_bound_equity_balance_diff_pos_max": 0.5, + "lower_bound_equity_balance_diff_pos_mean": 0.02, + "lower_bound_loss_profit_ratio": 0.5}, + "mutation_probability": 0.2, + "n_cpus": 5, + "population_size": 500, + "scoring": ["mdg", "sterling_ratio"]}} diff --git a/configs/examples/top20mcap.json b/configs/examples/top20mcap.json new file mode 100644 index 000000000..63ba47492 --- /dev/null +++ b/configs/examples/top20mcap.json @@ -0,0 +1,145 @@ +{"backtest": {"base_dir": "backtests", + "combine_ohlcvs": true, + "compress_cache": true, + "end_date": "now", + "exchanges": ["binance", "bybit", "gateio", "bitget"], + "gap_tolerance_ohlcvs_minutes": 120.0, + "start_date": "2021-04-01", + "starting_balance": 100000}, + "bot": {"long": {"close_grid_markup_range": 0.0050591, + "close_grid_min_markup": 0.0050351, + "close_grid_qty_pct": 0.94386, + "close_trailing_grid_ratio": -0.048287, + "close_trailing_qty_pct": 0.66612, + "close_trailing_retracement_pct": 0.0090063, + "close_trailing_threshold_pct": 0.0070056, + "ema_span_0": 591.53, + "ema_span_1": 251.56, + "enforce_exposure_limit": 1.0, + "entry_grid_double_down_factor": 1.6344, + "entry_grid_spacing_pct": 0.029537, + "entry_grid_spacing_weight": 0.27803, + "entry_initial_ema_dist": -0.07151, + "entry_initial_qty_pct": 0.013548, + "entry_trailing_grid_ratio": -0.13036, + "entry_trailing_retracement_pct": 0.014867, + "entry_trailing_threshold_pct": 0.072569, + "filter_relative_volume_clip_pct": 0.035543, + "filter_rolling_window": 263.66, + "n_positions": 6.4463, + "total_wallet_exposure_limit": 1.6091, + "unstuck_close_pct": 0.066952, + "unstuck_ema_dist": -0.098354, + "unstuck_loss_allowance_pct": 0.012308, + "unstuck_threshold": 0.50078}, + "short": {"close_grid_markup_range": 0.027427, + "close_grid_min_markup": 0.0088639, + "close_grid_qty_pct": 0.38146, + "close_trailing_grid_ratio": -0.48905, + "close_trailing_qty_pct": 0.80056, + "close_trailing_retracement_pct": 0.002552, + "close_trailing_threshold_pct": -0.0075215, + "ema_span_0": 1242.6, + "ema_span_1": 717.7, + "enforce_exposure_limit": 1.0, + "entry_grid_double_down_factor": 0.36883, + "entry_grid_spacing_pct": 0.020096, + "entry_grid_spacing_weight": 5.7436, + "entry_initial_ema_dist": -0.082084, + "entry_initial_qty_pct": 0.017122, + "entry_trailing_grid_ratio": -0.90061, + "entry_trailing_retracement_pct": 0.023083, + "entry_trailing_threshold_pct": 0.078399, + "filter_relative_volume_clip_pct": 0.47361, + "filter_rolling_window": 251.65, + "n_positions": 7.2456, + "total_wallet_exposure_limit": 0.0, + "unstuck_close_pct": 0.08722, + "unstuck_ema_dist": -0.089717, + "unstuck_loss_allowance_pct": 0.028372, + "unstuck_threshold": 0.64427}}, + "live": {"approved_coins": "configs/approved_coins_top20mcap.json", + "auto_gs": true, + "coin_flags": {}, + "empty_means_all_approved": true, + "execution_delay_seconds": 2.0, + "filter_by_min_effective_cost": true, + "forced_mode_long": "", + "forced_mode_short": "", + "ignored_coins": {"long": [], "short": []}, + "leverage": 10.0, + "market_orders_allowed": true, + "max_n_cancellations_per_batch": 5, + "max_n_creations_per_batch": 3, + "max_n_restarts_per_day": 10, + "minimum_coin_age_days": 30.0, + "ohlcvs_1m_rolling_window_days": 4.0, + "ohlcvs_1m_update_after_minutes": 10.0, + "pnls_max_lookback_days": 30.0, + "price_distance_threshold": 0.002, + "time_in_force": "good_till_cancelled", + "user": "bybit_01"}, + "optimize": {"bounds": {"long_close_grid_markup_range": [0, 0.03], + "long_close_grid_min_markup": [0.001, 0.03], + "long_close_grid_qty_pct": [0.05, 1.0], + "long_close_trailing_grid_ratio": [-1, 1], + "long_close_trailing_qty_pct": [0.05, 1.0], + "long_close_trailing_retracement_pct": [0.0001, 0.1], + "long_close_trailing_threshold_pct": [-0.01, 0.1], + "long_ema_span_0": [200, 1440], + "long_ema_span_1": [200, 1440], + "long_entry_grid_double_down_factor": [0.1, 3], + "long_entry_grid_spacing_pct": [0.001, 0.06], + "long_entry_grid_spacing_weight": [0, 10], + "long_entry_initial_ema_dist": [-0.1, 0.003], + "long_entry_initial_qty_pct": [0.004, 0.02], + "long_entry_trailing_grid_ratio": [-1, 1], + "long_entry_trailing_retracement_pct": [0.0001, 0.1], + "long_entry_trailing_threshold_pct": [-0.01, 0.1], + "long_filter_relative_volume_clip_pct": [0, 1], + "long_filter_rolling_window": [10, 360], + "long_n_positions": [6.4, 12.0], + "long_total_wallet_exposure_limit": [0.0, 2.0], + "long_unstuck_close_pct": [0.001, 0.1], + "long_unstuck_ema_dist": [-0.1, 0.01], + "long_unstuck_loss_allowance_pct": [0.001, 0.05], + "long_unstuck_threshold": [0.4, 0.95], + "short_close_grid_markup_range": [0, 0.03], + "short_close_grid_min_markup": [0.001, 0.03], + "short_close_grid_qty_pct": [0.05, 1.0], + "short_close_trailing_grid_ratio": [-1, 1], + "short_close_trailing_qty_pct": [0.05, 1.0], + "short_close_trailing_retracement_pct": [0.0001, 0.1], + "short_close_trailing_threshold_pct": [-0.01, 0.1], + "short_ema_span_0": [200, 1440], + "short_ema_span_1": [200, 1440], + "short_entry_grid_double_down_factor": [0.1, 3], + "short_entry_grid_spacing_pct": [0.001, 0.06], + "short_entry_grid_spacing_weight": [0, 10], + "short_entry_initial_ema_dist": [-0.1, 0.003], + "short_entry_initial_qty_pct": [0.004, 0.02], + "short_entry_trailing_grid_ratio": [-1, 1], + "short_entry_trailing_retracement_pct": [0.0001, 0.1], + "short_entry_trailing_threshold_pct": [-0.01, 0.1], + "short_filter_relative_volume_clip_pct": [0, 1], + "short_filter_rolling_window": [10, 360], + "short_n_positions": [6.4, 12.0], + "short_total_wallet_exposure_limit": [0.0, 2.0], + "short_unstuck_close_pct": [0.001, 0.1], + "short_unstuck_ema_dist": [-0.1, 0.01], + "short_unstuck_loss_allowance_pct": [0.001, 0.05], + "short_unstuck_threshold": [0.4, 0.95]}, + "compress_results_file": true, + "crossover_probability": 0.7, + "iters": 300000, + "limits": {"lower_bound_drawdown_worst": 0.333, + "lower_bound_drawdown_worst_mean_1pct": 0.2, + "lower_bound_equity_balance_diff_neg_max": 0.4, + "lower_bound_equity_balance_diff_neg_mean": 0.01, + "lower_bound_equity_balance_diff_pos_max": 0.5, + "lower_bound_equity_balance_diff_pos_mean": 0.02, + "lower_bound_loss_profit_ratio": 0.5}, + "mutation_probability": 0.2, + "n_cpus": 5, + "population_size": 500, + "scoring": ["mdg", "sterling_ratio"]}} diff --git a/configs/examples/top_50_marketcap.json b/configs/examples/top_50_marketcap.json deleted file mode 100644 index 03f7a23f2..000000000 --- a/configs/examples/top_50_marketcap.json +++ /dev/null @@ -1,252 +0,0 @@ -{"analysis": {"adg": 0.002328559961369625, - "calmar_ratio": 0.010460202947848876, - "drawdown_worst": 0.2226113559152779, - "drawdown_worst_mean_1pct": 0.12720349985444393, - "equity_balance_diff_max": 0.18938379708155595, - "equity_balance_diff_mean": 0.003068666663144841, - "expected_shortfall_1pct": 0.08845277992108921, - "loss_profit_ratio": 0.562903665049109, - "mdg": 0.0016724103250521948, - "n_iters": 14354, - "omega_ratio": 1.6153833174459502, - "sharpe_ratio": 0.12344666626423802, - "sortino_ratio": 0.11412350951114446, - "sterling_ratio": 0.016244605963347106, - "w_0": -0.0016724103250521948, - "w_1": -0.11412350951114446}, - "backtest": {"base_dir": "backtests", - "compress_cache": true, - "end_date": "2024-11-25", - "exchange": "bybit", - "start_date": "2020-01-01", - "starting_balance": 100000}, - "bot": {"long": {"close_grid_markup_range": 0.009564351890738378, - "close_grid_min_markup": 0.00687139681293316, - "close_grid_qty_pct": 0.8879593445518993, - "close_trailing_grid_ratio": -0.13341967504235655, - "close_trailing_qty_pct": 0.378723778684219, - "close_trailing_retracement_pct": 0.0402190852158004, - "close_trailing_threshold_pct": -0.0726881036533627, - "ema_span_0": 375.2858529039368, - "ema_span_1": 764.409883564145, - "entry_grid_double_down_factor": 1.9465785439609855, - "entry_grid_spacing_pct": 0.04900696359588633, - "entry_grid_spacing_weight": 0.003360183457489358, - "entry_initial_ema_dist": -0.046211460472595736, - "entry_initial_qty_pct": 0.09887703332490187, - "entry_trailing_grid_ratio": -0.1821691091474685, - "entry_trailing_retracement_pct": 0.060387960214631865, - "entry_trailing_threshold_pct": -0.06332766465413839, - "filter_relative_volume_clip_pct": 0.3433039320342348, - "filter_rolling_window": 277.6691834113598, - "n_positions": 2.009686600446345, - "total_wallet_exposure_limit": 0.566755970011352, - "unstuck_close_pct": 0.08288581309067541, - "unstuck_ema_dist": -0.061729746826005356, - "unstuck_loss_allowance_pct": 0.048923551558195895, - "unstuck_threshold": 0.5072903498257371}, - "short": {"close_grid_markup_range": 0.01760817520313264, - "close_grid_min_markup": 0.0018907051791421476, - "close_grid_qty_pct": 0.4708995033328263, - "close_trailing_grid_ratio": -0.106369184301367, - "close_trailing_qty_pct": 0.53808787763, - "close_trailing_retracement_pct": 0.061911763498634025, - "close_trailing_threshold_pct": 0.006910432639207003, - "ema_span_0": 851.5157986001198, - "ema_span_1": 282.9201563034067, - "entry_grid_double_down_factor": 1.5361565080949973, - "entry_grid_spacing_pct": 0.009340419830136127, - "entry_grid_spacing_weight": 3.5449502928566736, - "entry_initial_ema_dist": -0.08175113211284604, - "entry_initial_qty_pct": 0.027422099105608645, - "entry_trailing_grid_ratio": 0.6247781815450105, - "entry_trailing_retracement_pct": 0.08211270169452607, - "entry_trailing_threshold_pct": 0.09129150542392346, - "filter_relative_volume_clip_pct": 0.4806042288535364, - "filter_rolling_window": 198.92694486020415, - "n_positions": 13.09824693803686, - "total_wallet_exposure_limit": 0.06207207261031298, - "unstuck_close_pct": 0.06339776582451993, - "unstuck_ema_dist": -0.07387579471516302, - "unstuck_loss_allowance_pct": 0.0028095044058393595, - "unstuck_threshold": 0.6203230951952984}}, - "live": {"approved_coins": {"long": ["BTC", - "ETH", - "SOL", - "BNB", - "XRP", - "DOGE", - "ADA", - "AVAX", - "TRX", - "TON", - "XLM", - "SHIB", - "DOT", - "LINK", - "BCH", - "SUI", - "PEPE", - "NEAR", - "UNI", - "LTC", - "APT", - "ICP", - "HBAR", - "ETC", - "POL", - "RENDER", - "TAO", - "FET", - "KAS", - "ARB", - "TIA", - "VET", - "BONK", - "FIL", - "WIF", - "STX", - "OM", - "ATOM", - "XMR", - "OP", - "IMX", - "INJ", - "AAVE", - "FTM", - "SEI", - "GRT", - "FLOKI", - "ALGO", - "THETA", - "RUNE"], - "short": ["BTC", - "ETH", - "SOL", - "BNB", - "XRP", - "DOGE", - "ADA", - "AVAX", - "TRX", - "TON", - "XLM", - "SHIB", - "DOT", - "LINK", - "BCH", - "SUI", - "PEPE", - "NEAR", - "UNI", - "LTC", - "APT", - "ICP", - "HBAR", - "ETC", - "POL", - "RENDER", - "TAO", - "FET", - "KAS", - "ARB", - "TIA", - "VET", - "BONK", - "FIL", - "WIF", - "STX", - "OM", - "ATOM", - "XMR", - "OP", - "IMX", - "INJ", - "AAVE", - "FTM", - "SEI", - "GRT", - "FLOKI", - "ALGO", - "THETA", - "RUNE"]}, - "auto_gs": true, - "coin_flags": {}, - "empty_means_all_approved": false, - "execution_delay_seconds": 2, - "filter_by_min_effective_cost": true, - "forced_mode_long": "", - "forced_mode_short": "", - "ignored_coins": {"long": [], "short": []}, - "leverage": 10, - "max_n_cancellations_per_batch": 5, - "max_n_creations_per_batch": 3, - "max_n_restarts_per_day": 10, - "minimum_coin_age_days": 30, - "ohlcvs_1m_rolling_window_days": 4, - "ohlcvs_1m_update_after_minutes": 10, - "pnls_max_lookback_days": 30, - "price_distance_threshold": 0.002, - "time_in_force": "good_till_cancelled", - "user": "bybit_01"}, - "optimize": {"bounds": {"long_close_grid_markup_range": [0, 0.03], - "long_close_grid_min_markup": [0.001, 0.03], - "long_close_grid_qty_pct": [0.05, 1], - "long_close_trailing_grid_ratio": [-1, 1], - "long_close_trailing_qty_pct": [0.05, 1], - "long_close_trailing_retracement_pct": [0, 0.1], - "long_close_trailing_threshold_pct": [-0.1, 0.1], - "long_ema_span_0": [200, 1440], - "long_ema_span_1": [200, 1440], - "long_entry_grid_double_down_factor": [0.1, 3], - "long_entry_grid_spacing_pct": [0.001, 0.12], - "long_entry_grid_spacing_weight": [0, 10], - "long_entry_initial_ema_dist": [-0.1, 0.003], - "long_entry_initial_qty_pct": [0.005, 0.1], - "long_entry_trailing_grid_ratio": [-1, 1], - "long_entry_trailing_retracement_pct": [0, 0.1], - "long_entry_trailing_threshold_pct": [-0.1, 0.1], - "long_filter_relative_volume_clip_pct": [0, 1], - "long_filter_rolling_window": [10, 360], - "long_n_positions": [1, 20], - "long_total_wallet_exposure_limit": [0, 5], - "long_unstuck_close_pct": [0.001, 0.1], - "long_unstuck_ema_dist": [-0.1, 0.01], - "long_unstuck_loss_allowance_pct": [0, 0.05], - "long_unstuck_threshold": [0.4, 0.95], - "short_close_grid_markup_range": [0, 0.03], - "short_close_grid_min_markup": [0.001, 0.03], - "short_close_grid_qty_pct": [0.05, 1], - "short_close_trailing_grid_ratio": [-1, 1], - "short_close_trailing_qty_pct": [0.05, 1], - "short_close_trailing_retracement_pct": [0, 0.1], - "short_close_trailing_threshold_pct": [-0.1, 0.1], - "short_ema_span_0": [200, 1440], - "short_ema_span_1": [200, 1440], - "short_entry_grid_double_down_factor": [0.1, 3], - "short_entry_grid_spacing_pct": [0.001, 0.12], - "short_entry_grid_spacing_weight": [0, 10], - "short_entry_initial_ema_dist": [-0.1, 0.003], - "short_entry_initial_qty_pct": [0.005, 0.1], - "short_entry_trailing_grid_ratio": [-1, 1], - "short_entry_trailing_retracement_pct": [0, 0.1], - "short_entry_trailing_threshold_pct": [-0.1, 0.1], - "short_filter_relative_volume_clip_pct": [0, 1], - "short_filter_rolling_window": [10, 360], - "short_n_positions": [1, 20], - "short_total_wallet_exposure_limit": [0, 5], - "short_unstuck_close_pct": [0.001, 0.1], - "short_unstuck_ema_dist": [-0.1, 0.01], - "short_unstuck_loss_allowance_pct": [0, 0.05], - "short_unstuck_threshold": [0.4, 0.95]}, - "compress_results_file": true, - "crossover_probability": 0.7, - "iters": 30000, - "limits": {"lower_bound_drawdown_worst": 0.25, - "lower_bound_drawdown_worst_mean_1pct": 0.15, - "lower_bound_equity_balance_diff_mean": 0.02, - "lower_bound_loss_profit_ratio": 0.6}, - "mutation_probability": 0.2, - "n_cpus": 10, - "population_size": 500, - "scoring": ["mdg", "sortino_ratio"]}} diff --git a/configs/template.json b/configs/template.json index 84111ff59..63ba47492 100644 --- a/configs/template.json +++ b/configs/template.json @@ -1,60 +1,64 @@ {"backtest": {"base_dir": "backtests", + "combine_ohlcvs": true, "compress_cache": true, "end_date": "now", - "exchanges": ["binance", "bybit"], - "start_date": "2021-05-01", - "starting_balance": 100000.0}, - "bot": {"long": {"close_grid_markup_range": 0.0013425, - "close_grid_min_markup": 0.0047292, - "close_grid_qty_pct": 0.85073, - "close_trailing_grid_ratio": 0.037504, - "close_trailing_qty_pct": 0.54254, - "close_trailing_retracement_pct": 0.021623, - "close_trailing_threshold_pct": 0.065009, - "ema_span_0": 469.33, - "ema_span_1": 1120.5, - "entry_grid_double_down_factor": 2.2661, - "entry_grid_spacing_pct": 0.05224, - "entry_grid_spacing_weight": 0.070246, - "entry_initial_ema_dist": -0.015187, - "entry_initial_qty_pct": 0.032679, - "entry_trailing_grid_ratio": -0.29357, - "entry_trailing_retracement_pct": 0.002646, - "entry_trailing_threshold_pct": -0.043522, - "filter_relative_volume_clip_pct": 0.51429, - "filter_rolling_window": 330.17, - "n_positions": 5.2399, - "total_wallet_exposure_limit": 1.2788, - "unstuck_close_pct": 0.05968, - "unstuck_ema_dist": -0.027416, - "unstuck_loss_allowance_pct": 0.035915, - "unstuck_threshold": 0.45572}, - "short": {"close_grid_markup_range": 0.0020933, - "close_grid_min_markup": 0.016488, - "close_grid_qty_pct": 0.93256, - "close_trailing_grid_ratio": 0.035892, - "close_trailing_qty_pct": 0.98975, - "close_trailing_retracement_pct": 0.0042704, - "close_trailing_threshold_pct": -0.046918, - "ema_span_0": 1174.4, - "ema_span_1": 1217.3, - "entry_grid_double_down_factor": 2.0966, - "entry_grid_spacing_pct": 0.070355, - "entry_grid_spacing_weight": 1.5293, - "entry_initial_ema_dist": -0.090036, - "entry_initial_qty_pct": 0.07003, - "entry_trailing_grid_ratio": 0.075994, - "entry_trailing_retracement_pct": 0.023943, - "entry_trailing_threshold_pct": -0.079098, - "filter_relative_volume_clip_pct": 0.49361, - "filter_rolling_window": 57.016, - "n_positions": 1.1103, + "exchanges": ["binance", "bybit", "gateio", "bitget"], + "gap_tolerance_ohlcvs_minutes": 120.0, + "start_date": "2021-04-01", + "starting_balance": 100000}, + "bot": {"long": {"close_grid_markup_range": 0.0050591, + "close_grid_min_markup": 0.0050351, + "close_grid_qty_pct": 0.94386, + "close_trailing_grid_ratio": -0.048287, + "close_trailing_qty_pct": 0.66612, + "close_trailing_retracement_pct": 0.0090063, + "close_trailing_threshold_pct": 0.0070056, + "ema_span_0": 591.53, + "ema_span_1": 251.56, + "enforce_exposure_limit": 1.0, + "entry_grid_double_down_factor": 1.6344, + "entry_grid_spacing_pct": 0.029537, + "entry_grid_spacing_weight": 0.27803, + "entry_initial_ema_dist": -0.07151, + "entry_initial_qty_pct": 0.013548, + "entry_trailing_grid_ratio": -0.13036, + "entry_trailing_retracement_pct": 0.014867, + "entry_trailing_threshold_pct": 0.072569, + "filter_relative_volume_clip_pct": 0.035543, + "filter_rolling_window": 263.66, + "n_positions": 6.4463, + "total_wallet_exposure_limit": 1.6091, + "unstuck_close_pct": 0.066952, + "unstuck_ema_dist": -0.098354, + "unstuck_loss_allowance_pct": 0.012308, + "unstuck_threshold": 0.50078}, + "short": {"close_grid_markup_range": 0.027427, + "close_grid_min_markup": 0.0088639, + "close_grid_qty_pct": 0.38146, + "close_trailing_grid_ratio": -0.48905, + "close_trailing_qty_pct": 0.80056, + "close_trailing_retracement_pct": 0.002552, + "close_trailing_threshold_pct": -0.0075215, + "ema_span_0": 1242.6, + "ema_span_1": 717.7, + "enforce_exposure_limit": 1.0, + "entry_grid_double_down_factor": 0.36883, + "entry_grid_spacing_pct": 0.020096, + "entry_grid_spacing_weight": 5.7436, + "entry_initial_ema_dist": -0.082084, + "entry_initial_qty_pct": 0.017122, + "entry_trailing_grid_ratio": -0.90061, + "entry_trailing_retracement_pct": 0.023083, + "entry_trailing_threshold_pct": 0.078399, + "filter_relative_volume_clip_pct": 0.47361, + "filter_rolling_window": 251.65, + "n_positions": 7.2456, "total_wallet_exposure_limit": 0.0, - "unstuck_close_pct": 0.063395, - "unstuck_ema_dist": -0.025704, - "unstuck_loss_allowance_pct": 0.04867, - "unstuck_threshold": 0.58437}}, - "live": {"approved_coins": [], + "unstuck_close_pct": 0.08722, + "unstuck_ema_dist": -0.089717, + "unstuck_loss_allowance_pct": 0.028372, + "unstuck_threshold": 0.64427}}, + "live": {"approved_coins": "configs/approved_coins_top20mcap.json", "auto_gs": true, "coin_flags": {}, "empty_means_all_approved": true, @@ -62,8 +66,9 @@ "filter_by_min_effective_cost": true, "forced_mode_long": "", "forced_mode_short": "", - "ignored_coins": [], + "ignored_coins": {"long": [], "short": []}, "leverage": 10.0, + "market_orders_allowed": true, "max_n_cancellations_per_batch": 5, "max_n_creations_per_batch": 3, "max_n_restarts_per_day": 10, @@ -74,64 +79,67 @@ "price_distance_threshold": 0.002, "time_in_force": "good_till_cancelled", "user": "bybit_01"}, - "optimize": {"bounds": {"long_close_grid_markup_range": [0.0, 0.03], + "optimize": {"bounds": {"long_close_grid_markup_range": [0, 0.03], "long_close_grid_min_markup": [0.001, 0.03], "long_close_grid_qty_pct": [0.05, 1.0], - "long_close_trailing_grid_ratio": [-1.0, 1.0], + "long_close_trailing_grid_ratio": [-1, 1], "long_close_trailing_qty_pct": [0.05, 1.0], - "long_close_trailing_retracement_pct": [0.0, 0.1], - "long_close_trailing_threshold_pct": [-0.1, 0.1], - "long_ema_span_0": [200.0, 1440.0], - "long_ema_span_1": [200.0, 1440.0], - "long_entry_grid_double_down_factor": [0.1, 3.0], - "long_entry_grid_spacing_pct": [0.001, 0.12], - "long_entry_grid_spacing_weight": [0.0, 10.0], + "long_close_trailing_retracement_pct": [0.0001, 0.1], + "long_close_trailing_threshold_pct": [-0.01, 0.1], + "long_ema_span_0": [200, 1440], + "long_ema_span_1": [200, 1440], + "long_entry_grid_double_down_factor": [0.1, 3], + "long_entry_grid_spacing_pct": [0.001, 0.06], + "long_entry_grid_spacing_weight": [0, 10], "long_entry_initial_ema_dist": [-0.1, 0.003], - "long_entry_initial_qty_pct": [0.005, 0.1], - "long_entry_trailing_grid_ratio": [-1.0, 1.0], - "long_entry_trailing_retracement_pct": [0.0, 0.1], - "long_entry_trailing_threshold_pct": [-0.1, 0.1], - "long_filter_relative_volume_clip_pct": [0.0, 1.0], - "long_filter_rolling_window": [10.0, 360.0], - "long_n_positions": [1.0, 20.0], - "long_total_wallet_exposure_limit": [0.0, 5.0], + "long_entry_initial_qty_pct": [0.004, 0.02], + "long_entry_trailing_grid_ratio": [-1, 1], + "long_entry_trailing_retracement_pct": [0.0001, 0.1], + "long_entry_trailing_threshold_pct": [-0.01, 0.1], + "long_filter_relative_volume_clip_pct": [0, 1], + "long_filter_rolling_window": [10, 360], + "long_n_positions": [6.4, 12.0], + "long_total_wallet_exposure_limit": [0.0, 2.0], "long_unstuck_close_pct": [0.001, 0.1], "long_unstuck_ema_dist": [-0.1, 0.01], - "long_unstuck_loss_allowance_pct": [0.0, 0.05], + "long_unstuck_loss_allowance_pct": [0.001, 0.05], "long_unstuck_threshold": [0.4, 0.95], - "short_close_grid_markup_range": [0.0, 0.03], + "short_close_grid_markup_range": [0, 0.03], "short_close_grid_min_markup": [0.001, 0.03], "short_close_grid_qty_pct": [0.05, 1.0], - "short_close_trailing_grid_ratio": [-1.0, 1.0], + "short_close_trailing_grid_ratio": [-1, 1], "short_close_trailing_qty_pct": [0.05, 1.0], - "short_close_trailing_retracement_pct": [0.0, 0.1], - "short_close_trailing_threshold_pct": [-0.1, 0.1], - "short_ema_span_0": [200.0, 1440.0], - "short_ema_span_1": [200.0, 1440.0], - "short_entry_grid_double_down_factor": [0.1, 3.0], - "short_entry_grid_spacing_pct": [0.001, 0.12], - "short_entry_grid_spacing_weight": [0.0, 10.0], + "short_close_trailing_retracement_pct": [0.0001, 0.1], + "short_close_trailing_threshold_pct": [-0.01, 0.1], + "short_ema_span_0": [200, 1440], + "short_ema_span_1": [200, 1440], + "short_entry_grid_double_down_factor": [0.1, 3], + "short_entry_grid_spacing_pct": [0.001, 0.06], + "short_entry_grid_spacing_weight": [0, 10], "short_entry_initial_ema_dist": [-0.1, 0.003], - "short_entry_initial_qty_pct": [0.005, 0.1], - "short_entry_trailing_grid_ratio": [-1.0, 1.0], - "short_entry_trailing_retracement_pct": [0.0, 0.1], - "short_entry_trailing_threshold_pct": [-0.1, 0.1], - "short_filter_relative_volume_clip_pct": [0.0, 1.0], - "short_filter_rolling_window": [10.0, 360.0], - "short_n_positions": [1.0, 20.0], - "short_total_wallet_exposure_limit": [0.0, 5.0], + "short_entry_initial_qty_pct": [0.004, 0.02], + "short_entry_trailing_grid_ratio": [-1, 1], + "short_entry_trailing_retracement_pct": [0.0001, 0.1], + "short_entry_trailing_threshold_pct": [-0.01, 0.1], + "short_filter_relative_volume_clip_pct": [0, 1], + "short_filter_rolling_window": [10, 360], + "short_n_positions": [6.4, 12.0], + "short_total_wallet_exposure_limit": [0.0, 2.0], "short_unstuck_close_pct": [0.001, 0.1], "short_unstuck_ema_dist": [-0.1, 0.01], - "short_unstuck_loss_allowance_pct": [0.0, 0.05], + "short_unstuck_loss_allowance_pct": [0.001, 0.05], "short_unstuck_threshold": [0.4, 0.95]}, "compress_results_file": true, "crossover_probability": 0.7, - "iters": 30000, - "limits": {"lower_bound_drawdown_worst": 0.25, - "lower_bound_drawdown_worst_mean_1pct": 0.15, - "lower_bound_equity_balance_diff_mean": 0.02, - "lower_bound_loss_profit_ratio": 0.6}, + "iters": 300000, + "limits": {"lower_bound_drawdown_worst": 0.333, + "lower_bound_drawdown_worst_mean_1pct": 0.2, + "lower_bound_equity_balance_diff_neg_max": 0.4, + "lower_bound_equity_balance_diff_neg_mean": 0.01, + "lower_bound_equity_balance_diff_pos_max": 0.5, + "lower_bound_equity_balance_diff_pos_mean": 0.02, + "lower_bound_loss_profit_ratio": 0.5}, "mutation_probability": 0.2, "n_cpus": 5, "population_size": 500, - "scoring": ["mdg", "sortino_ratio"]}} \ No newline at end of file + "scoring": ["mdg", "sterling_ratio"]}} diff --git a/docs/backtesting.md b/docs/backtesting.md index 5e1a891a6..33173df0b 100644 --- a/docs/backtesting.md +++ b/docs/backtesting.md @@ -15,4 +15,15 @@ If no config is specified, it will default to `configs/template.json` ## Backtest Results -Metrics and plots are dumped to `backtests/{exchange}/`. \ No newline at end of file +Metrics and plots are dumped to `backtests/{exchange}/`. + +## Backtest CLI args + +- `-dp` to disable individual coin plotting. +- `-co` to combine the ohlcv data from multiple exchanges into a single array. Otherwise, backtest for each exchange individually. + +For a comprehensive list of CLI args: +```shell +python3 src/backtest.py -h +``` + diff --git a/docs/configuration.md b/docs/configuration.md index 30420c785..fd5be63a6 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -32,6 +32,9 @@ Here follows an overview of the parameters found in `config/template.json`. - For example, `total_wallet_exposure_limit = 1.6` means 160% of (unleveraged) wallet balance is used. - Each position is given equal share of total exposure limit, i.e., `wallet_exposure_limit = total_wallet_exposure_limit / n_positions`. - See more: `docs/risk_management.md`. +- `enforce_exposure_limit`: If true, will enforce exposure limits for each position. + - E.g. if for any reason a position's exposure exceeds 1% of the limit, reduce the position at market price to exposure limit. + - Useful for risk management if, for example, user withdraws balance or changes settings. ### Grid Entry Parameters @@ -146,7 +149,8 @@ Coins selected for trading are filtered by volume and noisiness. First, filter c - Normal mode: passivbot manages the position as normal. - Manual mode: passivbot ignores the position. - Graceful stop: if there is a position, passivbot will manage it; otherwise, passivbot will not make new positions. - - Take profit only: passivbot will only manage closing orders. + - Take profit only mode: passivbot will only manage closing orders. + - Panic mode: passivbot will close the position immediately. - `-lw` or `-sw`: Long or short wallet exposure limit. - `-lev`: Leverage. - `-lc`: Path to live config. Load all of another config's bot parameters except `[n_positions, total_wallet_exposure_limit, unstuck_loss_allowance_pct, unstuck_close_pct]`. @@ -164,6 +168,7 @@ Coins selected for trading are filtered by volume and noisiness. First, filter c - May be split into long and short by giving a json on the form: - `{"long": ["COIN1", "COIN2"], "short": ["COIN2", "COIN3"]}` - `leverage`: Leverage set on exchange. Default is 10. +- `market_orders_allowed`: If true, allow Passivbot to place market orders when order price is very close to current market price. If false, will only place limit orders. Default is true. - `max_n_cancellations_per_batch`: Will cancel n open orders per execution. - `max_n_creations_per_batch`: Will create n new orders per execution. - `max_n_restarts_per_day`: If the bot crashes for any reason, restart the bot up to n times per day before stopping completely. diff --git a/docs/optimizing.md b/docs/optimizing.md index d473c7a13..8431a9058 100644 --- a/docs/optimizing.md +++ b/docs/optimizing.md @@ -1,31 +1,52 @@ # Optimizing -Passivbot's config parameters may be automatically optimized by iterating many backtests and extracting the optimal config. +Passivbot's configuration can be automatically optimized through iterative backtesting to find optimal parameters. ## Usage ```shell -python3 src/optimize.py +python3 src/optimize.py [path/to/config.json] ``` -Or -```shell -python3 src/optimize.py path/to/config.json -``` -If no config is specified, it will default to `configs/template.json` +Defaults to `configs/template.json` if no config specified. -## Optimizing Results +## Results Storage -All backtest results produced by the optimizer are stored in `optimize_results/`. The results file is generated during optimization with a filename constructed using the date, number of coins being optimized, and a unique identifier. -Each evaluation result is appended to the `.txt` file as a raw single line JSON string, including the analysis and the corresponding configuration. +Optimization results are stored in `optimize_results/`` with filenames containing date, exchanges, number of coins, and unique identifier. Each result is appended as a single-line JSON string containing analysis and configuration. -## Analyzing Results +## Analysis +The script automatically runs `src/tools/extract_best_config.py` after optimization to identify the best performing configuration, saving the best candidate and the pareto front to `optimize_results_analysis/`. -After optimization is complete, the script `src/tools/extract_best_config.py` will be run, analyzing all the backtest results and dumping the best one to `optimize_results_analysis/` -To manually analyze results, run: +Manual analysis: ```shell python3 src/tools/extract_best_config.py path/to/results_file.txt ``` -This script will extract the configuration that performed best according to the optimization criteria. +## Performance Metrics + +Based on daily equity changes: `daily_eqs = equity.groupby(day).pct_change()` + +### Key Metrics: + +- adg: Average daily gain (`daily_eqs.mean()`) +- mdg: Median daily gain +- gain: Final gain (`balance[-1] / balance[0]`) +- drawdown_worst: Maximum peak-to-trough equity decline +- drawdown_worst_mean_1pct: Mean of the 1% worst drawdowns on daily equity samples +- expected_shortfall_1pct: Average of worst 1% losses (CVaR) + +### Risk Ratios: + +- sharpe_ratio: Risk-adjusted return (`adg / daily_eqs.std()`) +- sortino_ratio: Downside risk-adjusted return (`adg / downside_eqs.std()`) +- calmar_ratio: Return to max drawdown ratio (`adg / drawdown_worst`) +- sterling_ratio: Return to average worst 1% drawdowns ratio (`adg / drawdown_worst_mean_1pct`) +- omega_ratio: Ratio of gains to losses +- loss_profit_ratio: Absolute loss sum to profit sum ratio +- equity_balance_diff_neg_max: greatest distance between balance and equity when equity is less than balance +- equity_balance_diff_neg_mean: mean distance between balance and equity when equity is less than balance +- equity_balance_diff_pos_max: greatest distance between balance and equity when equity is greater than balance +- equity_balance_diff_pos_mean: mean distance between balance and equity when equity is greater than balance + +Suffix `_w` indicates weighted mean across 10 temporal subsets (whole, last_half, last_third, ... last_tenth). diff --git a/docs/tools.md b/docs/tools.md new file mode 100644 index 000000000..b8b64b248 --- /dev/null +++ b/docs/tools.md @@ -0,0 +1,35 @@ +# Tools + +## Extract Pareto Frontier and best config from optimize output + +The pareto front and best config extracted will be dumped in `optimize_results_analysis/`. Results from an optimize session are usually dumped in `optimize_results/`. + +```shell +python3 src/tools/extract_best_config.py path/to/all_results.txt +``` + +## Copy ohlcv data from old location to new location + +In Passivbot v7.2.13 the location of ohlcv data changed. Run this script to copy data already downloaded in earlier versions. + +```shell +python3 src/tools/copy_ohlcvs_from_v7.2.12.py +``` + +## Generate list of approved coins based on market cap + +```shell +python3 src/tools/generate_mcap_list.py +``` + +Output from `python3 src/tools/generate_mcap_list.py -h`: +``` + --n_coins N_COINS, -n N_COINS + Maxiumum number of top market cap coins. Default=100 + --minimum_market_cap_dollars MINIMUM_MARKET_CAP_MILLIONS, -m MINIMUM_MARKET_CAP_MILLIONS + Minimum market cap in millions of USD. Default=300.0 + --exchange EXCHANGE, -e EXCHANGE + Optional: filter by coins available on exchange. Comma separated values. Default=None + --output OUTPUT, -o OUTPUT + Optional: Output path. Default=configs/approved_coins_{n_coins}_{min_mcap}.json +``` diff --git a/notebooks/notes_backtest.ipynb b/notebooks/notes_backtest.ipynb index 8c8301ceb..ff44cdaaf 100644 --- a/notebooks/notes_backtest.ipynb +++ b/notebooks/notes_backtest.ipynb @@ -44,9 +44,10 @@ "metadata": {}, "outputs": [], "source": [ - "#config['backtest']['start_date'] = '2023-01-01'\n", - "#config['backtest']['end_date'] = '2024-01-01'\n", - "exchange = config['backtest']['exchanges'][0]" + "config['backtest']['start_date'] = '2024-01-01'\n", + "#config['backtest']['end_date'] = '2025-01-25'\n", + "config['backtest']['exchanges'] = ['binance', 'bybit']\n", + "exchange = 'combined' if config['backtest']['combine_ohlcvs'] else config['backtest']['exchanges'][0]" ] }, { @@ -56,8 +57,8 @@ "metadata": {}, "outputs": [], "source": [ - "symbols, hlcvs, mss, results_path, cache_dir = await prepare_hlcvs_mss(config, exchange)\n", - "config['backtest']['symbols'][exchange] = symbols" + "coins, hlcvs, mss, results_path, cache_dir = await prepare_hlcvs_mss(config, exchange)\n", + "config['backtest']['coins'] = {exchange: coins}" ] }, { @@ -83,6 +84,24 @@ "fills, equities, analysis = run_backtest(hlcvs, mss, config, exchange)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8d185d1-1ef2-4b9f-973f-c6c13831ef9e", + "metadata": {}, + "outputs": [], + "source": [ + "analysis = expand_analysis(analysis, fills, config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "855f5f2d-df4b-4c09-9e18-06085981ae0f", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -95,7 +114,7 @@ "print(f'elapsed {utc_ms() - sts}')\n", "sts = utc_ms()\n", "equities = pd.Series(equities)\n", - "analysis_py, balance_and_equity = analyze_fills_forager(config['backtest']['symbols'], hlcvs, fdf, equities)\n", + "analysis_py, balance_and_equity = analyze_fills_forager(config['backtest']['coins'], hlcvs, fdf, equities)\n", "for k in analysis_py:\n", " if k not in analysis:\n", " analysis[k] = analysis_py[k]\n", @@ -111,14 +130,14 @@ "metadata": {}, "outputs": [], "source": [ - "syms_sorted_by_volume = fdf.groupby('symbol').fee_paid.sum().sort_values().index.to_list()\n", - "for i, symbol in enumerate(syms_sorted_by_volume[:5]):\n", - " print(f\"Plotting fills for {symbol}\")\n", - " hlcvs_df = pd.DataFrame(hlcvs[:, symbols.index(symbol), :], columns=[\"high\", \"low\", \"close\", \"volume\"])\n", - " fdfc = fdf[fdf.symbol == symbol]\n", + "coins_sorted_by_volume = fdf.groupby('coin').fee_paid.sum().sort_values().index.to_list()\n", + "for i, coin in enumerate(coins_sorted_by_volume[:5]):\n", + " print(f\"Plotting fills for {coin}\")\n", + " hlcvs_df = pd.DataFrame(hlcvs[:, coins.index(coin), :], columns=[\"high\", \"low\", \"close\", \"volume\"])\n", + " fdfc = fdf[fdf.coin == coin]\n", " plt.clf()\n", " plot_fills_forager(fdfc, hlcvs_df)\n", - " plt.title(f\"Fills {symbol}\")\n", + " plt.title(f\"Fills {coin}\")\n", " plt.xlabel = \"time\"\n", " plt.ylabel = \"price\"\n", " plt.show()\n" @@ -132,7 +151,7 @@ "outputs": [], "source": [ "# performers worst to best\n", - "for x in fdf.groupby('symbol').pnl.sum().sort_values().to_dict().items():\n", + "for x in fdf.groupby('coin').pnl.sum().sort_values().to_dict().items():\n", " print(x)" ] }, diff --git a/passivbot-rust/src/backtest.rs b/passivbot-rust/src/backtest.rs index 9cec257ed..b8d5be8bd 100644 --- a/passivbot-rust/src/backtest.rs +++ b/passivbot-rust/src/backtest.rs @@ -268,10 +268,8 @@ impl<'a> Backtest<'a> { // Rolling calculation let safe_start = (*prev_k).saturating_sub(window); for idx in 0..self.n_coins { - rolling_volume_sum[idx] -= self - .hlcvs - .slice(s![safe_start..start_k, idx, VOLUME]) - .sum(); + rolling_volume_sum[idx] -= + self.hlcvs.slice(s![safe_start..start_k, idx, VOLUME]).sum(); rolling_volume_sum[idx] += self.hlcvs.slice(s![*prev_k..k, idx, VOLUME]).sum(); volume_indices[idx] = (rolling_volume_sum[idx], idx); } @@ -373,8 +371,13 @@ impl<'a> Backtest<'a> { fn update_equities(&mut self, k: usize) { let mut equity = self.balance; - // Calculate unrealized PnL for long positions - for (&idx, position) in &self.positions.long { + + // Sort long keys + let mut long_keys: Vec = self.positions.long.keys().cloned().collect(); + long_keys.sort(); + // Calculate unrealized PnL for each long position in sorted order + for idx in long_keys { + let position = &self.positions.long[&idx]; let current_price = self.hlcvs[[k, idx, CLOSE]]; let upnl = calc_pnl_long( position.price, @@ -384,8 +387,13 @@ impl<'a> Backtest<'a> { ); equity += upnl; } - // Calculate unrealized PnL for short positions - for (&idx, position) in &self.positions.short { + + // Sort short keys + let mut short_keys: Vec = self.positions.short.keys().cloned().collect(); + short_keys.sort(); + // Calculate unrealized PnL for each short position in sorted order + for idx in short_keys { + let position = &self.positions.short[&idx]; let current_price = self.hlcvs[[k, idx, CLOSE]]; let upnl = calc_pnl_short( position.price, @@ -395,6 +403,7 @@ impl<'a> Backtest<'a> { ); equity += upnl; } + self.equities.push(equity); } @@ -409,7 +418,9 @@ impl<'a> Backtest<'a> { _ => panic!("Invalid pside"), }; - let current_positions: Vec = positions.keys().cloned().collect(); + // Sort positions to ensure stable iteration + let mut current_positions: Vec = positions.keys().cloned().collect(); + current_positions.sort(); let mut preferred_coins = Vec::new(); // Only calculate preferred coins if there are open slots @@ -588,7 +599,7 @@ impl<'a> Backtest<'a> { let mut adjusted_close_qty = close_fill.qty; if new_psize < 0.0 { println!("warning: close qty greater than psize long"); - println!("symbol: {}", self.backtest_params.symbols[idx]); + println!("coin: {}", self.backtest_params.coins[idx]); println!("new_psize: {}", new_psize); println!("close order: {:?}", close_fill); new_psize = 0.0; @@ -616,16 +627,16 @@ impl<'a> Backtest<'a> { self.positions.long.get_mut(&idx).unwrap().size = new_psize; } self.fills.push(Fill { - index: k, // index minute - symbol: self.backtest_params.symbols[idx].clone(), // symbol - pnl, // realized pnl - fee_paid, // fee paid - balance: self.balance, // balance after fill - fill_qty: adjusted_close_qty, // fill qty - fill_price: close_fill.price, // fill price - position_size: new_psize, // psize after fill - position_price: current_pprice, // pprice after fill - order_type: close_fill.order_type.clone(), // fill type + index: k, // index minute + coin: self.backtest_params.coins[idx].clone(), // coin + pnl, // realized pnl + fee_paid, // fee paid + balance: self.balance, // balance after fill + fill_qty: adjusted_close_qty, // fill qty + fill_price: close_fill.price, // fill price + position_size: new_psize, // psize after fill + position_price: current_pprice, // pprice after fill + order_type: close_fill.order_type.clone(), // fill type }); } @@ -637,7 +648,7 @@ impl<'a> Backtest<'a> { let mut adjusted_close_qty = order.qty; if new_psize > 0.0 { println!("warning: close qty greater than psize short"); - println!("symbol: {}", self.backtest_params.symbols[idx]); + println!("coin: {}", self.backtest_params.coins[idx]); println!("new_psize: {}", new_psize); println!("close order: {:?}", order); new_psize = 0.0; @@ -665,16 +676,16 @@ impl<'a> Backtest<'a> { self.positions.short.get_mut(&idx).unwrap().size = new_psize; } self.fills.push(Fill { - index: k, // index minute - symbol: self.backtest_params.symbols[idx].clone(), // symbol - pnl, // realized pnl - fee_paid, // fee paid - balance: self.balance, // balance after fill - fill_qty: adjusted_close_qty, // fill qty - fill_price: order.price, // fill price - position_size: new_psize, // psize after fill - position_price: current_pprice, // pprice after fill - order_type: order.order_type.clone(), // fill type + index: k, // index minute + coin: self.backtest_params.coins[idx].clone(), // coin + pnl, // realized pnl + fee_paid, // fee paid + balance: self.balance, // balance after fill + fill_qty: adjusted_close_qty, // fill qty + fill_price: order.price, // fill price + position_size: new_psize, // psize after fill + position_price: current_pprice, // pprice after fill + order_type: order.order_type.clone(), // fill type }); } @@ -701,16 +712,16 @@ impl<'a> Backtest<'a> { self.positions.long.get_mut(&idx).unwrap().size = new_psize; self.positions.long.get_mut(&idx).unwrap().price = new_pprice; self.fills.push(Fill { - index: k, // index minute - symbol: self.backtest_params.symbols[idx].clone(), // symbol - pnl: 0.0, // realized pnl - fee_paid, // fee paid - balance: self.balance, // balance after fill - fill_qty: order.qty, // fill qty - fill_price: order.price, // fill price - position_size: self.positions.long[&idx].size, // psize after fill - position_price: self.positions.long[&idx].price, // pprice after fill - order_type: order.order_type.clone(), // fill type + index: k, // index minute + coin: self.backtest_params.coins[idx].clone(), // coin + pnl: 0.0, // realized pnl + fee_paid, // fee paid + balance: self.balance, // balance after fill + fill_qty: order.qty, // fill qty + fill_price: order.price, // fill price + position_size: self.positions.long[&idx].size, // psize after fill + position_price: self.positions.long[&idx].price, // pprice after fill + order_type: order.order_type.clone(), // fill type }); } @@ -737,16 +748,16 @@ impl<'a> Backtest<'a> { self.positions.short.get_mut(&idx).unwrap().size = new_psize; self.positions.short.get_mut(&idx).unwrap().price = new_pprice; self.fills.push(Fill { - index: k, // index minute - symbol: self.backtest_params.symbols[idx].clone(), // symbol - pnl: 0.0, // realized pnl - fee_paid, // fee paid - balance: self.balance, // balance after fill - fill_qty: order.qty, // fill qty - fill_price: order.price, // fill price - position_size: self.positions.short[&idx].size, // psize after fill - position_price: self.positions.short[&idx].price, // pprice after fill - order_type: order.order_type.clone(), // fill type + index: k, // index minute + coin: self.backtest_params.coins[idx].clone(), // coin + pnl: 0.0, // realized pnl + fee_paid, // fee paid + balance: self.balance, // balance after fill + fill_qty: order.qty, // fill qty + fill_price: order.price, // fill price + position_size: self.positions.short[&idx].size, // psize after fill + position_price: self.positions.short[&idx].price, // pprice after fill + order_type: order.order_type.clone(), // fill type }); } @@ -1033,7 +1044,11 @@ impl<'a> Backtest<'a> { ); if unstuck_allowances.0 > 0.0 { // Check long positions - for (&idx, position) in &self.positions.long { + // Sort the keys for long + let mut long_keys: Vec = self.positions.long.keys().cloned().collect(); + long_keys.sort(); + for idx in long_keys { + let position = &self.positions.long[&idx]; let wallet_exposure = calc_wallet_exposure( self.exchange_params_list[idx].c_mult, self.balance, @@ -1061,7 +1076,12 @@ impl<'a> Backtest<'a> { ); if unstuck_allowances.1 > 0.0 { // Check short positions - for (&idx, position) in &self.positions.short { + // Sort the keys for short + let mut short_keys: Vec = self.positions.short.keys().cloned().collect(); + short_keys.sort(); + + for idx in short_keys { + let position = &self.positions.short[&idx]; let wallet_exposure = calc_wallet_exposure( self.exchange_params_list[idx].c_mult, self.balance, @@ -1084,8 +1104,13 @@ impl<'a> Backtest<'a> { if stuck_positions.is_empty() { return (NO_POS, NO_POS, Order::default()); } - // Sort stuck positions by pprice_diff - stuck_positions.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(Ordering::Equal)); + // Sort with tie-breaker: first by diff, then by idx + stuck_positions.sort_by(|(i1, side1, d1), (i2, side2, d2)| { + match d1.partial_cmp(d2).unwrap_or(std::cmp::Ordering::Equal) { + std::cmp::Ordering::Equal => i1.cmp(i2), + other => other, + } + }); for (idx, pside, _) in stuck_positions { match pside { LONG => { @@ -1229,8 +1254,9 @@ impl<'a> Backtest<'a> { fn update_open_orders_any_fill(&mut self, k: usize) { if self.trading_enabled.long { if self.trailing_enabled.long { - let positions_long_indices: Vec = + let mut positions_long_indices: Vec = self.positions.long.keys().cloned().collect(); + positions_long_indices.sort(); for idx in &positions_long_indices { if !self.did_fill_long.contains(&idx) { self.update_trailing_prices(k, *idx, LONG); @@ -1241,7 +1267,8 @@ impl<'a> Backtest<'a> { self.open_orders .long .retain(|&idx, _| self.actives.long.contains(&idx)); - let active_long_indices: Vec = self.actives.long.iter().cloned().collect(); + let mut active_long_indices: Vec = self.actives.long.iter().cloned().collect(); + active_long_indices.sort(); // Ensure deterministic order for &idx in &active_long_indices { self.update_stuck_status(idx, LONG); self.update_open_orders_long_single(k, idx); @@ -1249,8 +1276,9 @@ impl<'a> Backtest<'a> { } if self.trading_enabled.short { if self.trailing_enabled.short { - let positions_short_indices: Vec = + let mut positions_short_indices: Vec = self.positions.short.keys().cloned().collect(); + positions_short_indices.sort(); for idx in &positions_short_indices { if !self.did_fill_short.contains(&idx) { self.update_trailing_prices(k, *idx, SHORT); @@ -1261,7 +1289,8 @@ impl<'a> Backtest<'a> { self.open_orders .short .retain(|&idx, _| self.actives.short.contains(&idx)); - let active_short_indices: Vec = self.actives.short.iter().cloned().collect(); + let mut active_short_indices: Vec = self.actives.short.iter().cloned().collect(); + active_short_indices.sort(); // Ensure deterministic order for &idx in &active_short_indices { self.update_stuck_status(idx, SHORT); self.update_open_orders_short_single(k, idx); @@ -1293,10 +1322,12 @@ impl<'a> Backtest<'a> { // Update selectively: // - actives if len(positions) < n_positions // - unstuck close if any stuck - // - entries for symbols with open trailing entries - // - closes for symbols with open trailing closes + // - entries for coins with open trailing entries + // - closes for coins with open trailing closes if self.trading_enabled.long { - let positions_long_indices: Vec = self.positions.long.keys().cloned().collect(); + let mut positions_long_indices: Vec = + self.positions.long.keys().cloned().collect(); + positions_long_indices.sort(); if self.trailing_enabled.long { for idx in &positions_long_indices { if !self.did_fill_long.contains(idx) { @@ -1311,7 +1342,8 @@ impl<'a> Backtest<'a> { .long .retain(|&idx, _| self.actives.long.contains(&idx)); } - let active_long_indices: Vec = self.actives.long.iter().cloned().collect(); + let mut active_long_indices: Vec = self.actives.long.iter().cloned().collect(); + active_long_indices.sort(); for idx in active_long_indices { if actives_without_pos.contains(&idx) @@ -1331,8 +1363,9 @@ impl<'a> Backtest<'a> { } if self.trading_enabled.short { - let positions_short_indices: Vec = + let mut positions_short_indices: Vec = self.positions.short.keys().cloned().collect(); + positions_short_indices.sort(); if self.trailing_enabled.short { for idx in &positions_short_indices { if !self.did_fill_short.contains(idx) { @@ -1347,7 +1380,8 @@ impl<'a> Backtest<'a> { .short .retain(|&idx, _| self.actives.short.contains(&idx)); } - let active_short_indices: Vec = self.actives.short.iter().cloned().collect(); + let mut active_short_indices: Vec = self.actives.short.iter().cloned().collect(); + active_short_indices.sort(); for idx in active_short_indices { if actives_without_pos.contains(&idx) || self.open_orders.short.get(&idx).map_or(false, |orders| { @@ -1447,7 +1481,10 @@ fn calc_ema_alphas(bot_params_pair: &BotParamsPair) -> EmaAlphas { } } -pub fn analyze_backtest(fills: &[Fill], equities: &Vec) -> Analysis { +fn analyze_backtest_basic(fills: &[Fill], equities: &Vec) -> Analysis { + if fills.len() <= 1 { + return Analysis::default(); + } // Calculate daily equities let mut daily_eqs = Vec::new(); let mut current_day = 0; @@ -1596,14 +1633,34 @@ pub fn analyze_backtest(fills: &[Fill], equities: &Vec) -> Analysis { bal_eq.push((last_balance, equity)); } - let (equity_balance_diff_sum, equity_balance_diff_max) = - bal_eq - .iter() - .fold((0.0, 0.0), |(sum, max), &(balance, equity)| { - let diff = (equity - balance).abs() / balance; - (sum + diff, f64::max(max, diff)) - }); - let equity_balance_diff_mean = equity_balance_diff_sum / bal_eq.len() as f64; + // Calculate equity-balance differences with separate positive and negative tracking + let mut ebds_pos = Vec::new(); + let mut ebds_neg = Vec::new(); + + for &(balance, equity) in bal_eq.iter() { + let ebd = (equity - balance) / balance; + if ebd > 0.0 { + ebds_pos.push(ebd); + } else if ebd < 0.0 { + ebds_neg.push(ebd); + } + } + + let equity_balance_diff_pos_max = ebds_pos.iter().fold(0.0, |max, &x| f64::max(max, x)); + let equity_balance_diff_pos_mean = if !ebds_pos.is_empty() { + ebds_pos.iter().sum::() / ebds_pos.len() as f64 + } else { + 0.0 + }; + + let equity_balance_diff_neg_max = ebds_neg.iter().fold(0.0, |max, &x| f64::max(max, x.abs())); + let equity_balance_diff_neg_mean = if !ebds_neg.is_empty() { + ebds_neg.iter().map(|x| x.abs()).sum::() / ebds_neg.len() as f64 + } else { + 0.0 + }; + + let gain = fills[fills.len() - 1].balance / fills[0].balance; // Calculate profit factor let (total_profit, total_loss) = fills.iter().fold((0.0, 0.0), |(profit, loss), fill| { @@ -1619,21 +1676,89 @@ pub fn analyze_backtest(fills: &[Fill], equities: &Vec) -> Analysis { total_loss / total_profit }; - Analysis { - adg, - mdg, - sharpe_ratio, - sortino_ratio, - omega_ratio, - expected_shortfall_1pct, - calmar_ratio, - sterling_ratio, - drawdown_worst, - drawdown_worst_mean_1pct, - equity_balance_diff_mean, - equity_balance_diff_max, - loss_profit_ratio, + let mut analysis = Analysis::default(); + analysis.adg = adg; + analysis.mdg = mdg; + analysis.gain = gain; + analysis.sharpe_ratio = sharpe_ratio; + analysis.sortino_ratio = sortino_ratio; + analysis.omega_ratio = omega_ratio; + analysis.expected_shortfall_1pct = expected_shortfall_1pct; + analysis.calmar_ratio = calmar_ratio; + analysis.sterling_ratio = sterling_ratio; + analysis.drawdown_worst = drawdown_worst; + analysis.drawdown_worst_mean_1pct = drawdown_worst_mean_1pct; + analysis.equity_balance_diff_neg_max = equity_balance_diff_neg_max; + analysis.equity_balance_diff_neg_mean = equity_balance_diff_neg_mean; + analysis.equity_balance_diff_pos_max = equity_balance_diff_pos_max; + analysis.equity_balance_diff_pos_mean = equity_balance_diff_pos_mean; + analysis.loss_profit_ratio = loss_profit_ratio; + + analysis +} + +pub fn analyze_backtest(fills: &[Fill], equities: &Vec) -> Analysis { + let mut analysis = analyze_backtest_basic(fills, equities); + + if fills.len() <= 1 { + return analysis; + } + + let n = equities.len(); + let mut subset_analyses = Vec::with_capacity(10); + subset_analyses.push(analysis.clone()); + + for i in 1..10 { + // fraction of the data we want to keep: + // i=1 => fraction = 0.5 => last half + // i=2 => fraction = 0.3333 => last third + // i=3 => fraction = 0.25 => last quarter + // etc. + let fraction = 1.0 / (1.0 + i as f64); + + // start index for slicing the 'last' fraction + let start_idx = (n as f64 - fraction * (n as f64)).round() as usize; + + // slice from start_idx to the end + let subset_equities = &equities[start_idx..]; + if subset_equities.len() == 0 { + break; + } + + // filter fills that happened after or at start_idx + let subset_fills: Vec = fills + .iter() + .filter(|fill| fill.index >= start_idx) + .cloned() + .collect(); + if subset_fills.len() == 0 { + break; + } + + let subset_analysis = analyze_backtest_basic(&subset_fills, &subset_equities.to_vec()); + subset_analyses.push(subset_analysis); } + + // Compute weighted metrics as the mean of subset analyses + analysis.adg_w = subset_analyses.iter().map(|a| a.adg).sum::() / 10.0; + analysis.mdg_w = subset_analyses.iter().map(|a| a.mdg).sum::() / 10.0; + analysis.gain_w = subset_analyses.iter().map(|a| a.gain).sum::() / 10.0; + analysis.sharpe_ratio_w = subset_analyses.iter().map(|a| a.sharpe_ratio).sum::() / 10.0; + analysis.sortino_ratio_w = subset_analyses.iter().map(|a| a.sortino_ratio).sum::() / 10.0; + analysis.omega_ratio_w = subset_analyses.iter().map(|a| a.omega_ratio).sum::() / 10.0; + analysis.calmar_ratio_w = subset_analyses.iter().map(|a| a.calmar_ratio).sum::() / 10.0; + analysis.sterling_ratio_w = subset_analyses + .iter() + .map(|a| a.sterling_ratio) + .sum::() + / 10.0; + analysis.loss_profit_ratio_w = subset_analyses + .iter() + .map(|a| a.loss_profit_ratio) + .sum::() + / 10.0; + + analysis } fn calc_drawdowns(equity_series: &[f64]) -> Vec { diff --git a/passivbot-rust/src/closes.rs b/passivbot-rust/src/closes.rs index 8a53958fa..0b3d0007f 100644 --- a/passivbot-rust/src/closes.rs +++ b/passivbot-rust/src/closes.rs @@ -4,7 +4,8 @@ use crate::types::{ StateParams, TrailingPriceBundle, }; use crate::utils::{ - calc_pprice_diff_int, calc_wallet_exposure, cost_to_qty, round_, round_dn, round_up, + calc_pprice_diff_int, calc_wallet_exposure, cost_to_qty, interpolate, round_, round_dn, + round_up, }; use ndarray::{Array1, Array2}; use std::collections::HashMap; @@ -230,6 +231,46 @@ pub fn calc_next_close_long( // no position return Order::default(); } + let wallet_exposure = calc_wallet_exposure( + exchange_params.c_mult, + state_params.balance, + position.size, + position.price, + ); + let wallet_exposure_ratio = if bot_params.wallet_exposure_limit <= 0.0 { + 10.0 + } else { + wallet_exposure / bot_params.wallet_exposure_limit + }; + if bot_params.enforce_exposure_limit && wallet_exposure_ratio > 1.01 { + let position_size_lowered = position.size * 0.9; + let wallet_exposure_lowered = calc_wallet_exposure( + exchange_params.c_mult, + state_params.balance, + position_size_lowered, + position.price, + ); + let ideal_psize = interpolate( + bot_params.wallet_exposure_limit * 1.01, + &[wallet_exposure, wallet_exposure_lowered], + &[position.size, position_size_lowered], + ); + let auto_reduce_qty = position.size - ideal_psize; + if auto_reduce_qty > 0.0 { + let close_qty = f64::min( + round_(position.size, exchange_params.qty_step), + f64::max( + calc_min_entry_qty(state_params.order_book.ask, &exchange_params), + round_(auto_reduce_qty, exchange_params.qty_step), + ), + ); + return Order { + price: state_params.order_book.ask, + qty: -close_qty, + order_type: OrderType::CloseAutoReduceLong, + }; + } + } if bot_params.close_trailing_grid_ratio >= 1.0 || bot_params.close_trailing_grid_ratio <= -1.0 { // return trailing only return calc_trailing_close_long( @@ -244,12 +285,6 @@ pub fn calc_next_close_long( // return grid only return calc_grid_close_long(&exchange_params, &state_params, &bot_params, &position); } - let wallet_exposure_ratio = calc_wallet_exposure( - exchange_params.c_mult, - state_params.balance, - position.size, - position.price, - ) / bot_params.wallet_exposure_limit; if bot_params.close_trailing_grid_ratio > 0.0 { // trailing first if wallet_exposure_ratio < bot_params.close_trailing_grid_ratio { @@ -506,6 +541,46 @@ pub fn calc_next_close_short( // no position return Order::default(); } + let wallet_exposure = calc_wallet_exposure( + exchange_params.c_mult, + state_params.balance, + position_size_abs, + position.price, + ); + let wallet_exposure_ratio = if bot_params.wallet_exposure_limit <= 0.0 { + 10.0 + } else { + wallet_exposure / bot_params.wallet_exposure_limit + }; + if bot_params.enforce_exposure_limit && wallet_exposure_ratio > 1.01 { + let position_size_lowered = position_size_abs * 0.9; + let wallet_exposure_lowered = calc_wallet_exposure( + exchange_params.c_mult, + state_params.balance, + position_size_lowered, + position.price, + ); + let ideal_psize = interpolate( + bot_params.wallet_exposure_limit * 1.01, + &[wallet_exposure, wallet_exposure_lowered], + &[position_size_abs, position_size_lowered], + ); + let auto_reduce_qty = position_size_abs - ideal_psize; + if auto_reduce_qty > 0.0 { + let close_qty = f64::min( + round_(position_size_abs, exchange_params.qty_step), + f64::max( + calc_min_entry_qty(state_params.order_book.bid, &exchange_params), + round_(auto_reduce_qty, exchange_params.qty_step), + ), + ); + return Order { + price: state_params.order_book.bid, + qty: close_qty, + order_type: OrderType::CloseAutoReduceShort, + }; + } + } if bot_params.close_trailing_grid_ratio >= 1.0 || bot_params.close_trailing_grid_ratio <= -1.0 { // return trailing only return calc_trailing_close_short( diff --git a/passivbot-rust/src/python.rs b/passivbot-rust/src/python.rs index feebe0c27..1ac1c4421 100644 --- a/passivbot-rust/src/python.rs +++ b/passivbot-rust/src/python.rs @@ -86,6 +86,7 @@ pub fn run_backtest( let py_analysis = PyDict::new(py); py_analysis.set_item("adg", analysis.adg)?; py_analysis.set_item("mdg", analysis.mdg)?; + py_analysis.set_item("gain", analysis.gain)?; py_analysis.set_item("sharpe_ratio", analysis.sharpe_ratio)?; py_analysis.set_item("sortino_ratio", analysis.sortino_ratio)?; py_analysis.set_item("omega_ratio", analysis.omega_ratio)?; @@ -98,17 +99,38 @@ pub fn run_backtest( analysis.drawdown_worst_mean_1pct, )?; py_analysis.set_item( - "equity_balance_diff_mean", - analysis.equity_balance_diff_mean, + "equity_balance_diff_neg_max", + analysis.equity_balance_diff_neg_max, + )?; + py_analysis.set_item( + "equity_balance_diff_neg_mean", + analysis.equity_balance_diff_neg_mean, + )?; + py_analysis.set_item( + "equity_balance_diff_pos_max", + analysis.equity_balance_diff_pos_max, + )?; + py_analysis.set_item( + "equity_balance_diff_pos_mean", + analysis.equity_balance_diff_pos_mean, )?; - py_analysis.set_item("equity_balance_diff_max", analysis.equity_balance_diff_max)?; py_analysis.set_item("loss_profit_ratio", analysis.loss_profit_ratio)?; + py_analysis.set_item("adg_w", analysis.adg_w)?; + py_analysis.set_item("mdg_w", analysis.mdg_w)?; + py_analysis.set_item("gain_w", analysis.gain_w)?; + py_analysis.set_item("sharpe_ratio_w", analysis.sharpe_ratio_w)?; + py_analysis.set_item("sortino_ratio_w", analysis.sortino_ratio_w)?; + py_analysis.set_item("omega_ratio_w", analysis.omega_ratio_w)?; + py_analysis.set_item("calmar_ratio_w", analysis.calmar_ratio_w)?; + py_analysis.set_item("sterling_ratio_w", analysis.sterling_ratio_w)?; + py_analysis.set_item("loss_profit_ratio_w", analysis.loss_profit_ratio_w)?; + // Convert fills to a 2D array with mixed types let mut py_fills = Array2::from_elem((fills.len(), 10), py.None()); for (i, fill) in fills.iter().enumerate() { py_fills[(i, 0)] = fill.index.into_py(py); - py_fills[(i, 1)] = ::clone(&fill.symbol).into_py(py); + py_fills[(i, 1)] = ::clone(&fill.coin).into_py(py); py_fills[(i, 2)] = fill.pnl.into_py(py); py_fills[(i, 3)] = fill.fee_paid.into_py(py); py_fills[(i, 4)] = fill.balance.into_py(py); @@ -134,7 +156,7 @@ fn backtest_params_from_dict(dict: &PyDict) -> PyResult { Ok(BacktestParams { starting_balance: extract_value(dict, "starting_balance").unwrap_or_default(), maker_fee: extract_value(dict, "maker_fee").unwrap_or_default(), - symbols: extract_value(dict, "symbols").unwrap_or_default(), + coins: extract_value(dict, "coins").unwrap_or_default(), }) } @@ -155,6 +177,21 @@ fn bot_params_pair_from_dict(dict: &PyDict) -> PyResult { }) } +fn extract_bool_value(dict: &PyDict, key: &str) -> PyResult { + if let Ok(val) = extract_value::(dict, key) { + Ok(val) + } else if let Ok(val) = extract_value::(dict, key) { + Ok(val != 0) + } else if let Ok(val) = extract_value::(dict, key) { + Ok(val != 0) + } else if let Ok(val) = extract_value::(dict, key) { + Ok(val != 0.0) + } else { + // If none of the above types match, try to get the value as a bool + extract_value::(dict, key) + } +} + fn bot_params_from_dict(dict: &PyDict) -> PyResult { Ok(BotParams { close_grid_markup_range: extract_value(dict, "close_grid_markup_range")?, @@ -164,6 +201,7 @@ fn bot_params_from_dict(dict: &PyDict) -> PyResult { close_trailing_grid_ratio: extract_value(dict, "close_trailing_grid_ratio")?, close_trailing_qty_pct: extract_value(dict, "close_trailing_qty_pct")?, close_trailing_threshold_pct: extract_value(dict, "close_trailing_threshold_pct")?, + enforce_exposure_limit: extract_bool_value(dict, "enforce_exposure_limit")?, entry_grid_double_down_factor: extract_value(dict, "entry_grid_double_down_factor")?, entry_grid_spacing_weight: extract_value(dict, "entry_grid_spacing_weight")?, entry_grid_spacing_pct: extract_value(dict, "entry_grid_spacing_pct")?, @@ -503,6 +541,7 @@ pub fn calc_next_close_long_py( close_trailing_qty_pct: f64, close_trailing_retracement_pct: f64, close_trailing_threshold_pct: f64, + enforce_exposure_limit: bool, wallet_exposure_limit: f64, balance: f64, position_size: f64, @@ -534,6 +573,7 @@ pub fn calc_next_close_long_py( close_trailing_qty_pct, close_trailing_retracement_pct, close_trailing_threshold_pct, + enforce_exposure_limit, wallet_exposure_limit, ..Default::default() }; @@ -653,6 +693,7 @@ pub fn calc_next_close_short_py( close_trailing_qty_pct: f64, close_trailing_retracement_pct: f64, close_trailing_threshold_pct: f64, + enforce_exposure_limit: bool, wallet_exposure_limit: f64, balance: f64, position_size: f64, @@ -684,6 +725,7 @@ pub fn calc_next_close_short_py( close_trailing_qty_pct, close_trailing_retracement_pct, close_trailing_threshold_pct, + enforce_exposure_limit, wallet_exposure_limit, ..Default::default() }; @@ -888,6 +930,7 @@ pub fn calc_closes_long_py( close_trailing_qty_pct: f64, close_trailing_retracement_pct: f64, close_trailing_threshold_pct: f64, + enforce_exposure_limit: bool, wallet_exposure_limit: f64, balance: f64, position_size: f64, @@ -921,6 +964,7 @@ pub fn calc_closes_long_py( close_trailing_qty_pct, close_trailing_retracement_pct, close_trailing_threshold_pct, + enforce_exposure_limit, wallet_exposure_limit, ..Default::default() }; @@ -963,6 +1007,7 @@ pub fn calc_closes_short_py( close_trailing_qty_pct: f64, close_trailing_retracement_pct: f64, close_trailing_threshold_pct: f64, + enforce_exposure_limit: bool, wallet_exposure_limit: f64, balance: f64, position_size: f64, @@ -996,6 +1041,7 @@ pub fn calc_closes_short_py( close_trailing_qty_pct, close_trailing_retracement_pct, close_trailing_threshold_pct, + enforce_exposure_limit, wallet_exposure_limit, ..Default::default() }; diff --git a/passivbot-rust/src/types.rs b/passivbot-rust/src/types.rs index 1d9e7dec3..bf764f1b8 100644 --- a/passivbot-rust/src/types.rs +++ b/passivbot-rust/src/types.rs @@ -26,7 +26,7 @@ impl Default for ExchangeParams { pub struct BacktestParams { pub starting_balance: f64, pub maker_fee: f64, - pub symbols: Vec, + pub coins: Vec, } #[derive(Default, Debug, Clone, Copy)] @@ -102,6 +102,7 @@ pub struct BotParams { pub close_trailing_grid_ratio: f64, pub close_trailing_qty_pct: f64, pub close_trailing_threshold_pct: f64, + pub enforce_exposure_limit: bool, pub entry_grid_double_down_factor: f64, pub entry_grid_spacing_weight: f64, pub entry_grid_spacing_pct: f64, @@ -154,6 +155,7 @@ pub enum OrderType { CloseGridLong, CloseTrailingLong, CloseUnstuckLong, + CloseAutoReduceLong, EntryInitialNormalShort, EntryInitialPartialShort, @@ -166,6 +168,7 @@ pub enum OrderType { CloseGridShort, CloseTrailingShort, CloseUnstuckShort, + CloseAutoReduceShort, Empty, } @@ -183,6 +186,7 @@ impl fmt::Display for OrderType { OrderType::CloseGridLong => write!(f, "close_grid_long"), OrderType::CloseTrailingLong => write!(f, "close_trailing_long"), OrderType::CloseUnstuckLong => write!(f, "close_unstuck_long"), + OrderType::CloseAutoReduceLong => write!(f, "close_auto_reduce_long"), OrderType::EntryInitialNormalShort => write!(f, "entry_initial_normal_short"), OrderType::EntryInitialPartialShort => write!(f, "entry_initial_partial_short"), OrderType::EntryTrailingNormalShort => write!(f, "entry_trailing_normal_short"), @@ -193,6 +197,7 @@ impl fmt::Display for OrderType { OrderType::CloseGridShort => write!(f, "close_grid_short"), OrderType::CloseTrailingShort => write!(f, "close_trailing_short"), OrderType::CloseUnstuckShort => write!(f, "close_unstuck_short"), + OrderType::CloseAutoReduceShort => write!(f, "close_auto_reduce_short"), OrderType::Empty => write!(f, "empty"), } } @@ -201,7 +206,7 @@ impl fmt::Display for OrderType { #[derive(Debug, Clone)] pub struct Fill { pub index: usize, - pub symbol: String, + pub coin: String, pub pnl: f64, pub fee_paid: f64, pub balance: f64, @@ -212,10 +217,11 @@ pub struct Fill { pub order_type: OrderType, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Analysis { pub adg: f64, pub mdg: f64, + pub gain: f64, pub sharpe_ratio: f64, pub sortino_ratio: f64, pub omega_ratio: f64, @@ -224,9 +230,21 @@ pub struct Analysis { pub sterling_ratio: f64, pub drawdown_worst: f64, pub drawdown_worst_mean_1pct: f64, - pub equity_balance_diff_mean: f64, - pub equity_balance_diff_max: f64, + pub equity_balance_diff_neg_max: f64, + pub equity_balance_diff_neg_mean: f64, + pub equity_balance_diff_pos_max: f64, + pub equity_balance_diff_pos_mean: f64, pub loss_profit_ratio: f64, + + pub adg_w: f64, + pub mdg_w: f64, + pub gain_w: f64, + pub sharpe_ratio_w: f64, + pub sortino_ratio_w: f64, + pub omega_ratio_w: f64, + pub calmar_ratio_w: f64, + pub sterling_ratio_w: f64, + pub loss_profit_ratio_w: f64, } impl Default for Analysis { @@ -234,6 +252,7 @@ impl Default for Analysis { Analysis { adg: 0.0, mdg: 0.0, + gain: 0.0, sharpe_ratio: 0.0, sortino_ratio: 0.0, omega_ratio: 0.0, @@ -242,9 +261,21 @@ impl Default for Analysis { sterling_ratio: 0.0, drawdown_worst: 1.0, drawdown_worst_mean_1pct: 1.0, - equity_balance_diff_mean: 1.0, - equity_balance_diff_max: 1.0, + equity_balance_diff_neg_max: 1.0, + equity_balance_diff_neg_mean: 1.0, + equity_balance_diff_pos_max: 1.0, + equity_balance_diff_pos_mean: 1.0, loss_profit_ratio: 1.0, + + adg_w: 0.0, + mdg_w: 0.0, + gain_w: 0.0, + sharpe_ratio_w: 0.0, + sortino_ratio_w: 0.0, + omega_ratio_w: 0.0, + calmar_ratio_w: 0.0, + sterling_ratio_w: 0.0, + loss_profit_ratio_w: 1.0, } } } diff --git a/requirements.txt b/requirements.txt index e6d998179..f52913b57 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,9 +16,10 @@ deap==1.4.1 websockets==10.1 aiohttp==3.8.1 numpy==1.22.4 -ccxt==4.4.7 +ccxt==4.4.39 hjson==3.0.2 prettytable==3.0.0 maturin==1.5.1 sortedcontainers==2.4.0 dictdiffer==0.9.0 +openpyxl==3.1.5 diff --git a/src/backtest.py b/src/backtest.py index b836c7c70..e8e9ef03e 100644 --- a/src/backtest.py +++ b/src/backtest.py @@ -24,9 +24,11 @@ calc_hash, ) import pprint -from downloader import prepare_hlcvs +from copy import deepcopy +from downloader import prepare_hlcvs, prepare_hlcvs_combined, add_all_eligible_coins_to_config from pathlib import Path from plotting import plot_fills_forager +from collections import defaultdict import matplotlib.pyplot as plt import logging from main import manage_rust_compilation @@ -67,7 +69,7 @@ def process_forager_fills(fills): fills, columns=[ "minute", - "symbol", + "coin", "pnl", "fee_paid", "balance", @@ -81,7 +83,7 @@ def process_forager_fills(fills): return fdf -def analyze_fills_forager(symbols, hlcvs, fdf, equities): +def analyze_fills_forager(coins, hlcvs, fdf, equities): analysis = {} pnls = {} for pside in ["long", "short"]: @@ -144,40 +146,43 @@ def check_nested(d0, d1): def get_cache_hash(config, exchange): to_hash = { - "symbols": config["backtest"]["symbols"][exchange], + "coins": config["live"]["approved_coins"], "end_date": format_end_date(config["backtest"]["end_date"]), "start_date": config["backtest"]["start_date"], - "exchange": exchange, + "exchange": config["backtest"]["exchanges"] if exchange == "combined" else exchange, + "minimum_coin_age_days": config["live"]["minimum_coin_age_days"], + "gap_tolerance_ohlcvs_minutes": config["backtest"]["gap_tolerance_ohlcvs_minutes"], } - to_hash["minimum_coin_age_days"] = config["live"]["minimum_coin_age_days"] return calc_hash(to_hash) -def load_symbols_hlcvs_from_cache(config, exchange): +def load_coins_hlcvs_from_cache(config, exchange): cache_hash = get_cache_hash(config, exchange) cache_dir = Path("caches") / "hlcvs_data" / cache_hash[:16] if os.path.exists(cache_dir): - symbols = json.load(open(cache_dir / "symbols.json")) + coins = json.load(open(cache_dir / "coins.json")) + mss = json.load(open(cache_dir / "market_specific_settings.json")) if config["backtest"]["compress_cache"]: fname = cache_dir / "hlcvs.npy.gz" - logging.info(f"Attempting to load hlcvs data from cache {fname}...") + logging.info(f"{exchange} Attempting to load hlcvs data from cache {fname}...") with gzip.open(fname, "rb") as f: hlcvs = np.load(f) else: fname = cache_dir / "hlcvs.npy" - logging.info(f"Attempting to load hlcvs data from cache {fname}...") + logging.info(f"{exchange} Attempting to load hlcvs data from cache {fname}...") hlcvs = np.load(fname) - return cache_dir, symbols, hlcvs + return cache_dir, coins, hlcvs, mss -def save_symbols_hlcvs_to_cache(config, symbols, hlcvs, exchange): +def save_coins_hlcvs_to_cache(config, coins, hlcvs, exchange, mss): cache_hash = get_cache_hash(config, exchange) cache_dir = Path("caches") / "hlcvs_data" / cache_hash[:16] cache_dir.mkdir(parents=True, exist_ok=True) - if all([os.path.exists(cache_dir / x) for x in ["symbols.json", "hlcvs.npy"]]): + if all([os.path.exists(cache_dir / x) for x in ["coins.json", "hlcvs.npy"]]): return logging.info(f"Dumping cache...") - json.dump(symbols, open(cache_dir / "symbols.json", "w")) + json.dump(coins, open(cache_dir / "coins.json", "w")) + json.dump(mss, open(cache_dir / "market_specific_settings.json", "w")) uncompressed_size = hlcvs.nbytes sts = utc_ms() if config["backtest"]["compress_cache"]: @@ -211,45 +216,33 @@ async def prepare_hlcvs_mss(config, exchange): exchange, "", ) - mss_path = oj( - results_path, - "market_specific_settings.json", - ) try: sts = utc_ms() - result = load_symbols_hlcvs_from_cache(config, exchange) + result = load_coins_hlcvs_from_cache(config, exchange) if result: logging.info(f"Seconds to load cache: {(utc_ms() - sts) / 1000:.4f}") - cache_dir, symbols, hlcvs = result - mss = json.load(open(mss_path)) + cache_dir, coins, hlcvs, mss = result logging.info(f"Successfully loaded hlcvs data from cache") - return symbols, hlcvs, mss, results_path, cache_dir + return coins, hlcvs, mss, results_path, cache_dir except: logging.info(f"Unable to load hlcvs data from cache. Fetching...") + if exchange == "combined": + mss, timestamps, hlcvs = await prepare_hlcvs_combined(config) + else: + mss, timestamps, hlcvs = await prepare_hlcvs(config, exchange) + coins = sorted(mss) + logging.info(f"Finished preparing hlcvs data for {exchange}. Shape: {hlcvs.shape}") try: - mss = fetch_market_specific_settings_multi(exchange=exchange) - json.dump(mss, open(make_get_filepath(mss_path), "w")) - except Exception as e: - logging.error(f"failed to fetch market specific settings {e}") - try: - mss = json.load(open(mss_path)) - logging.info(f"loaded market specific settings from cache {mss_path}") - except: - raise Exception("failed to load market specific settings from cache") - - symbols, timestamps, hlcvs = await prepare_hlcvs(config, exchange) - logging.info(f"Finished preparing hlcvs data. Shape: {hlcvs.shape}") - try: - cache_dir = save_symbols_hlcvs_to_cache(config, symbols, hlcvs, exchange) + cache_dir = save_coins_hlcvs_to_cache(config, coins, hlcvs, exchange, mss) except Exception as e: logging.error(f"failed to save hlcvs to cache {e}") traceback.print_exc() cache_dir = "" - return symbols, hlcvs, mss, results_path, cache_dir + return coins, hlcvs, mss, results_path, cache_dir def prep_backtest_args(config, mss, exchange, exchange_params=None, backtest_params=None): - symbols = sorted(set(config["backtest"]["symbols"][exchange])) # sort for consistency + coins = sorted(set(config["backtest"]["coins"][exchange])) # sort for consistency bot_params = {k: config["bot"][k].copy() for k in ["long", "short"]} for pside in bot_params: bot_params[pside]["wallet_exposure_limit"] = ( @@ -259,21 +252,25 @@ def prep_backtest_args(config, mss, exchange, exchange_params=None, backtest_par ) if exchange_params is None: exchange_params = [ - {k: mss[symbol][k] for k in ["qty_step", "price_step", "min_qty", "min_cost", "c_mult"]} - for symbol in symbols + {k: mss[coin][k] for k in ["qty_step", "price_step", "min_qty", "min_cost", "c_mult"]} + for coin in coins ] if backtest_params is None: backtest_params = { "starting_balance": config["backtest"]["starting_balance"], - "maker_fee": mss[symbols[0]]["maker"], - "symbols": symbols, + "maker_fee": mss[coins[0]]["maker"], + "coins": coins, } return bot_params, exchange_params, backtest_params +def expand_analysis(analysis, fills, config): + return analysis + + def run_backtest(hlcvs, mss, config: dict, exchange: str): bot_params, exchange_params, backtest_params = prep_backtest_args(config, mss, exchange) - logging.info(f"Backtesting...") + logging.info(f"Backtesting {exchange}...") sts = utc_ms() with create_shared_memory_file(hlcvs) as shared_memory_file: @@ -287,7 +284,7 @@ def run_backtest(hlcvs, mss, config: dict, exchange: str): ) logging.info(f"seconds elapsed for backtest: {(utc_ms() - sts) / 1000:.4f}") - return fills, equities, analysis + return fills, equities, expand_analysis(analysis, fills, config) def post_process(config, hlcvs, fills, equities, analysis, results_path, exchange): @@ -295,7 +292,7 @@ def post_process(config, hlcvs, fills, equities, analysis, results_path, exchang fdf = process_forager_fills(fills) equities = pd.Series(equities) analysis_py, bal_eq = analyze_fills_forager( - config["backtest"]["symbols"][exchange], hlcvs, fdf, equities + config["backtest"]["coins"][exchange], hlcvs, fdf, equities ) for k in analysis_py: if k not in analysis: @@ -310,29 +307,38 @@ def post_process(config, hlcvs, fills, equities, analysis, results_path, exchang dump_config(config, f"{results_path}config.json") fdf.to_csv(f"{results_path}fills.csv") bal_eq.to_csv(oj(results_path, "balance_and_equity.csv")) - plot_forager(results_path, config["backtest"]["symbols"][exchange], fdf, bal_eq, hlcvs, config["disable_plotting"]) + plot_forager( + results_path, + config["backtest"]["coins"][exchange], + fdf, + bal_eq, + hlcvs, + config["disable_plotting"], + ) -def plot_forager(results_path, symbols: [str], fdf: pd.DataFrame, bal_eq, hlcvs, disable_plotting: bool = False): +def plot_forager( + results_path, coins: [str], fdf: pd.DataFrame, bal_eq, hlcvs, disable_plotting: bool = False +): plots_dir = make_get_filepath(oj(results_path, "fills_plots", "")) plt.clf() bal_eq.plot() plt.savefig(oj(results_path, "balance_and_equity.png")) if not disable_plotting: - for i, symbol in enumerate(symbols): + for i, coin in enumerate(coins): try: - logging.info(f"Plotting fills for {symbol}") + logging.info(f"Plotting fills for {coin}") hlcvs_df = pd.DataFrame(hlcvs[:, i, :3], columns=["high", "low", "close"]) - fdfc = fdf[fdf.symbol == symbol] + fdfc = fdf[fdf.coin == coin] plt.clf() plot_fills_forager(fdfc, hlcvs_df) - plt.title(f"Fills {symbol}") + plt.title(f"Fills {coin}") plt.xlabel = "time" plt.ylabel = "price" - plt.savefig(oj(plots_dir, f"{symbol}.png")) + plt.savefig(oj(plots_dir, f"{coin}.png")) except Exception as e: - logging.info(f"Error plotting {symbol} {e}") + logging.info(f"Error plotting {coin} {e}") async def main(): @@ -362,20 +368,39 @@ async def main(): args = parser.parse_args() if args.config_path is None: logging.info(f"loading default template config configs/template.json") - config = load_config("configs/template.json") + config = load_config("configs/template.json", verbose=False) else: logging.info(f"loading config {args.config_path}") config = load_config(args.config_path) update_config_with_args(config, args) - config = format_config(config) + config = format_config(config, verbose=False) + await add_all_eligible_coins_to_config(config) config["disable_plotting"] = args.disable_plotting config["backtest"]["cache_dir"] = {} - for exchange in config["backtest"]["exchanges"]: - symbols, hlcvs, mss, results_path, cache_dir = await prepare_hlcvs_mss(config, exchange) - config["backtest"]["symbols"][exchange] = symbols + config["backtest"]["coins"] = {} + if config["backtest"]["combine_ohlcvs"]: + exchange = "combined" + coins, hlcvs, mss, results_path, cache_dir = await prepare_hlcvs_mss(config, exchange) + exchange_preference = defaultdict(list) + for coin in coins: + exchange_preference[mss[coin]["exchange"]].append(coin) + for ex in exchange_preference: + logging.info(f"chose {ex} for {','.join(exchange_preference[ex])}") + config["backtest"]["coins"][exchange] = coins config["backtest"]["cache_dir"][exchange] = str(cache_dir) fills, equities, analysis = run_backtest(hlcvs, mss, config, exchange) post_process(config, hlcvs, fills, equities, analysis, results_path, exchange) + else: + configs = {exchange: deepcopy(config) for exchange in config["backtest"]["exchanges"]} + tasks = {} + for exchange in config["backtest"]["exchanges"]: + tasks[exchange] = asyncio.create_task(prepare_hlcvs_mss(configs[exchange], exchange)) + for exchange in tasks: + coins, hlcvs, mss, results_path, cache_dir = await tasks[exchange] + configs[exchange]["backtest"]["coins"][exchange] = coins + configs[exchange]["backtest"]["cache_dir"][exchange] = str(cache_dir) + fills, equities, analysis = run_backtest(hlcvs, mss, configs[exchange], exchange) + post_process(configs[exchange], hlcvs, fills, equities, analysis, results_path, exchange) if __name__ == "__main__": diff --git a/src/downloader.py b/src/downloader.py index 4439f12d4..d205eda33 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -2,57 +2,50 @@ import asyncio import datetime import gzip +import json +import logging +import inspect import os +import shutil import sys -import requests -import json +import traceback +import zipfile +from collections import deque +from functools import wraps from io import BytesIO +from pathlib import Path from time import time -from typing import Tuple +from typing import List, Dict, Any, Tuple +from uuid import uuid4 from urllib.request import urlopen -from functools import reduce -import zipfile -import traceback +from collections import defaultdict + import aiohttp -import ccxt.async_support as ccxt -import logging import pprint -from pathlib import Path +import ccxt.async_support as ccxt import numpy as np import pandas as pd from dateutil import parser from tqdm import tqdm -from uuid import uuid4 - -from njit_funcs import calc_samples +from pure_funcs import ( + date_to_ts, + ts_to_date_utc, + safe_filename, + symbol_to_coin, + get_template_live_config, +) from procedures import ( - prepare_backtest_config, make_get_filepath, - create_binance_bot, - create_bybit_bot, - create_binance_bot_spot, - print_, - add_argparse_args, + format_end_date, + coin_to_symbol, utc_ms, - get_first_ohlcv_timestamps, + get_file_mod_utc, + get_first_timestamps_unified, add_arguments_recursively, load_config, - update_config_with_args, - format_config, - format_end_date, ) -from pure_funcs import ( - ts_to_date, - ts_to_date_utc, - date_to_ts2, - get_dummy_settings, - get_day, - numpyize, - get_template_live_config, - safe_filename, -) -from collections import deque -from functools import wraps + +# ========================= CONFIGURABLES & GLOBALS ========================= logging.basicConfig( format="%(asctime)s %(levelname)-8s %(message)s", @@ -60,449 +53,930 @@ datefmt="%Y-%m-%dT%H:%M:%S", ) +MAX_REQUESTS_PER_MINUTE = 120 +REQUEST_TIMESTAMPS = deque(maxlen=1000) # for rate-limiting checks -# Global request tracker -_request_timestamps = deque(maxlen=1000) # Store last 1000 timestamps -_MAX_REQUESTS_PER_MINUTE = 120 # Adjust this value as needed +# ========================= HELPER FUNCTIONS ========================= -async def check_rate_limit(): - """Check if we can make a new request based on rate limit""" - current_time = utc_ms() / 1000 - # Remove timestamps older than 1 minute - while _request_timestamps and current_time - _request_timestamps[0] > 60: - _request_timestamps.popleft() +def is_valid_date(date): + try: + ts = date_to_ts(date) + return True + except: + return False - # Check if we've made too many requests in the last minute - if len(_request_timestamps) >= _MAX_REQUESTS_PER_MINUTE: - sleep_time = 60 - (current_time - _request_timestamps[0]) - if sleep_time > 0: - logging.info(f"Rate limit reached, sleeping for {sleep_time:.2f} seconds") - await asyncio.sleep(sleep_time) - _request_timestamps.append(current_time) +def get_function_name(): + return inspect.currentframe().f_back.f_code.co_name + + +def dump_ohlcv_data(data, filepath): + columns = ["timestamp", "open", "high", "low", "close", "volume"] + if isinstance(data, pd.DataFrame): + data = data[columns].astype(float).values + elif isinstance(data, np.ndarray): + pass + else: + raise Exception(f"Unknown data format for {filepath}") + np.save(filepath, data) + + +def load_ohlcv_data(filepath: str) -> pd.DataFrame: + arr = np.load(filepath, allow_pickle=True) + columns = ["timestamp", "open", "high", "low", "close", "volume"] + return ensure_millis(pd.DataFrame(arr, columns=columns)) + + +def get_days_in_between(start_day, end_day): + date_format = "%Y-%m-%d" + start_date = datetime.datetime.strptime(start_day[:10], date_format) + end_date = datetime.datetime.strptime(end_day[:10], date_format) + days = [] + current_date = start_date + while current_date <= end_date: + days.append(current_date.strftime(date_format)) + current_date += datetime.timedelta(days=1) + return days + + +def fill_gaps_in_ohlcvs(df): + interval = 60000 + new_timestamps = np.arange(df["timestamp"].iloc[0], df["timestamp"].iloc[-1] + interval, interval) + new_df = df.set_index("timestamp").reindex(new_timestamps) + new_df.close = new_df.close.ffill() + for col in ["open", "high", "low"]: + new_df[col] = new_df[col].fillna(new_df.close) + new_df["volume"] = new_df["volume"].fillna(0.0) + return new_df.reset_index().rename(columns={"index": "timestamp"}) + + +def attempt_gap_fix_ohlcvs(df, symbol=None): + interval = 60_000 + max_hours = 12 + max_gap = interval * 60 * max_hours + greatest_gap = df.timestamp.diff().max() + if pd.isna(greatest_gap) or greatest_gap == interval: + return df + if greatest_gap > max_gap: + raise Exception(f"Huge gap in data for {symbol}: {greatest_gap/(1000*60*60)} hours.") + if self.verbose: + logging.info( + f"Filling small gaps in {symbol}. Largest gap: {greatest_gap/(1000*60*60):.3f} hours." + ) + new_timestamps = np.arange(df["timestamp"].iloc[0], df["timestamp"].iloc[-1] + interval, interval) + new_df = df.set_index("timestamp").reindex(new_timestamps) + new_df.close = new_df.close.ffill() + for col in ["open", "high", "low"]: + new_df[col] = new_df[col].fillna(new_df.close) + new_df["volume"] = new_df["volume"].fillna(0.0) + return new_df.reset_index().rename(columns={"index": "timestamp"}) + + +async def fetch_url(session, url): + async with session.get(url) as response: + response.raise_for_status() + return await response.read() async def fetch_zips(url): try: async with aiohttp.ClientSession() as session: - async with session.get(url) as response: - response.raise_for_status() - zip_content = await response.read() + content = await fetch_url(session, url) zips = [] - with zipfile.ZipFile(BytesIO(zip_content), "r") as zip_ref: - for contained_file in zip_ref.namelist(): - zips.append(zip_ref.open(contained_file)) + with zipfile.ZipFile(BytesIO(content), "r") as z: + for f in z.namelist(): + zips.append(z.open(f)) return zips - - except aiohttp.ClientError as e: - logging.error(f"Error during HTTP request: {e}") - except zipfile.BadZipFile: - logging.error(f"Error extracting the zip file. Make sure it contains a valid CSV file.") - except pd.errors.EmptyDataError: - logging.error("The CSV file is empty or could not be loaded as a DataFrame.") + except Exception as e: + logging.error(f"Error fetching zips {url}: {e}") async def get_zip_binance(url): col_names = ["timestamp", "open", "high", "low", "close", "volume"] zips = await fetch_zips(url) + if not zips: + return pd.DataFrame(columns=col_names) dfs = [] - for zip in zips: - df = pd.read_csv(zip, header=None) - df.columns = col_names + [str(i) for i in range(len(df.columns) - len(col_names))] + for z in zips: + df = pd.read_csv(z, header=None) + df.columns = col_names + [f"extra_{i}" for i in range(len(df.columns) - len(col_names))] dfs.append(df[col_names]) - dfc = pd.concat(dfs).sort_values("timestamp").reset_index() - return dfc[dfc.timestamp != "open_time"] + dfc = pd.concat(dfs).sort_values("timestamp").reset_index(drop=True) + return dfc[dfc.timestamp != "open_time"].astype(float) -def get_first_ohlcv_ts(symbol: str, spot=False) -> int: - try: - if spot: - url = "https://api.binance.com/api/v3/klines" - else: - url = "https://fapi.binance.com/fapi/v1/klines" - res = requests.get( - url, params={"symbol": symbol, "startTime": 0, "limit": 100, "interval": "1m"} - ) - first_ohlcvs = json.loads(res.text) - first_ts = first_ohlcvs[0][0] - return first_ts - except Exception as e: - logging.error(f"error getting first ohlcv ts {e}, returning 0") - return 0 +async def get_zip_bitget(url): + col_names = ["timestamp", "open", "high", "low", "close", "volume"] + zips = await fetch_zips(url) + if not zips: + return pd.DataFrame(columns=col_names) + dfs = [] + for z in zips: + df = ensure_millis(pd.read_excel(z)) + df.columns = col_names + [f"extra_{i}" for i in range(len(df.columns) - len(col_names))] + dfs.append(df[col_names]) + dfc = pd.concat(dfs).sort_values("timestamp").reset_index(drop=True) + return dfc[dfc.timestamp != "open_time"] -def get_days_in_between(start_day, end_day): - date_format = "%Y-%m-%d" - start_date = datetime.datetime.strptime(start_day, date_format) - end_date = datetime.datetime.strptime(end_day, date_format) +def ensure_millis(df): + if "timestamp" not in df.columns: + return df + if df.timestamp.iloc[0] > 1e14: # is microseconds + df.timestamp /= 1000 + elif df.timestamp.iloc[0] > 1e11: # is milliseconds + pass + else: # is seconds + df.timestamp *= 1000 + return df + + +class OHLCVManager: + """ + Manages OHLCVs for multiple exchanges. + """ + + def __init__( + self, + exchange, + start_date=None, + end_date=None, + cc=None, + gap_tolerance_ohlcvs_minutes=120.0, + verbose=True, + ): + self.exchange = "binanceusdm" if exchange == "binance" else exchange + self.quote = "USDC" if exchange == "hyperliquid" else "USDT" + self.start_date = "2020-01-01" if start_date is None else start_date + self.end_date = format_end_date("now" if end_date is None else end_date) + self.start_ts = date_to_ts(self.start_date) + self.end_ts = date_to_ts(self.end_date) + self.cc = cc + self.cache_filepaths = { + "markets": os.path.join("caches", self.exchange, "markets.json"), + "ohlcvs": os.path.join("historical_data", f"ohlcvs_{self.exchange}"), + "first_timestamps": os.path.join("caches", self.exchange, "first_timestamps.json"), + } + self.markets = None + self.verbose = verbose + self.max_requests_per_minute = {"": 120, "gateio": 60} + self.request_timestamps = deque(maxlen=1000) # for rate-limiting checks + self.gap_tolerance_ohlcvs_minutes = gap_tolerance_ohlcvs_minutes + + def update_date_range(self, new_start_date=None, new_end_date=None): + if new_start_date: + if isinstance(new_start_date, (float, int)): + self.start_date = ts_to_date_utc(new_start_date) + elif isinstance(new_start_date, str): + self.start_date = new_start_date + else: + raise Exception(f"invalid start date {new_start_date}") + self.start_ts = date_to_ts(self.start_date) + if new_end_date: + if isinstance(new_end_date, (float, int)): + self.end_date = ts_to_date_utc(new_end_date) + elif isinstance(new_end_date, str): + self.end_date = new_end_date + else: + raise Exception(f"invalid end date {new_end_date}") + self.end_date = format_end_date(self.end_date) + self.end_ts = date_to_ts(self.end_date) + + def get_symbol(self, coin): + assert self.markets, "needs to call self.load_markets() first" + return coin_to_symbol( + coin, + eligible_symbols={ + k for k in self.markets if self.markets[k]["swap"] and k.endswith(f":{self.quote}") + }, + verbose=self.verbose, + ) - days_in_between = [] - current_date = start_date - while current_date <= end_date: - days_in_between.append(current_date.strftime(date_format)) - current_date += datetime.timedelta(days=1) + def get_market_specific_settings(self, coin): + mss = self.markets[self.get_symbol(coin)] + mss["hedge_mode"] = True + mss["maker_fee"] = mss["maker"] + mss["taker_fee"] = mss["taker"] + mss["c_mult"] = mss["contractSize"] + mss["min_cost"] = mc if (mc := mss["limits"]["cost"]["min"]) is not None else 0.01 + mss["price_step"] = mss["precision"]["price"] + mss["min_qty"] = max( + lm if (lm := mss["limits"]["amount"]["min"]) is not None else 0.0, + pm if (pm := mss["precision"]["amount"]) is not None else 0.0, + ) + mss["qty_step"] = mss["precision"]["amount"] + if self.exchange == "binanceusdm": + pass + elif self.exchange == "bybit": + # ccxt reports incorrect fees for bybit perps + mss["maker"] = mss["maker_fee"] = 0.0002 + mss["taker"] = mss["taker_fee"] = 0.00055 + elif self.exchange == "bitget": + pass + elif self.exchange == "gateio": + # ccxt reports incorrect fees for gateio perps. Assume VIP0 + mss["maker"] = mss["maker_fee"] = 0.0002 + mss["taker"] = mss["taker_fee"] = 0.0005 + return mss + + def filter_date_range(self, df: pd.DataFrame) -> pd.DataFrame: + """Filter dataframe to include only data within start_date and end_date (inclusive)""" + if df.empty: + return df + return df[(df.timestamp >= self.start_ts) & (df.timestamp <= self.end_ts)].reset_index( + drop=True + ) - return days_in_between + def has_coin(self, coin): + symbol = self.get_symbol(coin) + if not symbol: + return False + return True + async def check_rate_limit(self): + current_time = time() + while self.request_timestamps and current_time - self.request_timestamps[0] > 60: + self.request_timestamps.popleft() + mrpm = ( + self.max_requests_per_minute[self.exchange] + if self.exchange in self.max_requests_per_minute + else self.max_requests_per_minute[""] + ) + if len(self.request_timestamps) >= mrpm: + sleep_time = 60 - (current_time - self.request_timestamps[0]) + if sleep_time > 0: + if self.verbose: + logging.info( + f"{self.exchange} Rate limit reached, sleeping for {sleep_time:.2f} seconds" + ) + await asyncio.sleep(sleep_time) + + self.request_timestamps.append(current_time) + + async def get_ohlcvs(self, coin, start_date=None, end_date=None): + """ + - Attempts to get ohlcvs for coin from cache. + - If any data is missing, checks if it exists to download + - If so, download. + - Return ohlcvs. + - If exchange unsupported, + coin unsupported on exchange, + or date range for coin not existing on exchange, + return empty dataframe + """ + if not self.has_coin(coin): + return pd.DataFrame(columns=["timestamp", "open", "high", "low", "close", "volume"]) + if start_date or end_date: + self.update_date_range(new_start_date=start_date, new_end_date=end_date) + missing_days = await self.get_missing_days_ohlcvs(coin) + if missing_days: + if not self.markets: + await self.load_markets() + await self.download_ohlcvs(coin) + ohlcvs = await self.load_ohlcvs_from_cache(coin) + ohlcvs.volume = ohlcvs.volume * ohlcvs.close # use quote volume + return ohlcvs + + async def get_start_date_modified(self, coin): + fts = await self.get_first_timestamp(coin) + return ts_to_date_utc(max(self.start_ts, fts))[:10] + + async def get_missing_days_ohlcvs(self, coin): + start_date = await self.get_start_date_modified(coin) + days = get_days_in_between(start_date, self.end_date) + dirpath = os.path.join(self.cache_filepaths["ohlcvs"], coin) + if not os.path.exists(dirpath): + return days + all_files = os.listdir(dirpath) + return sorted([x for x in days if x + ".npy" not in all_files]) + + async def download_ohlcvs(self, coin): + if not self.markets: + await self.load_markets() + if not self.has_coin(coin): + return + if self.exchange == "binanceusdm": + await self.download_ohlcvs_binance(coin) + elif self.exchange == "bybit": + await self.download_ohlcvs_bybit(coin) + elif self.exchange == "bitget": + await self.download_ohlcvs_bitget(coin) + elif self.exchange == "gateio": + if self.cc is None: + self.load_cc() + await self.download_ohlcvs_gateio(coin) + + def dump_ohlcvs_to_cache(self, coin): + """ + Dumps new ohlcv data to cache if not already existing. Only whole days are dumped. + """ + pass -async def download_ohlcvs_bybit(symbol, start_date, end_date, spot=False, download_only=False): - ns = [30, 10, 1] - for i, n in enumerate(ns): - try: - return await download_ohlcvs_bybit_sub( - symbol, start_date, end_date, spot=False, download_only=False, n_concurrent_fetches=n + async def get_first_timestamp(self, coin): + """ + Get first timestamp of available ohlcv data for given exchange & coin + """ + if (fts := self.load_first_timestamp(coin)) not in [None, 0.0]: + return fts + if not self.markets: + self.load_cc() + await self.load_markets() + if not self.has_coin(coin): + self.dump_first_timestamp(coin, 0.0) + return 0.0 + if self.exchange == "binanceusdm": + # Fetches first by default + ohlcvs = await self.cc.fetch_ohlcv(self.get_symbol(coin), since=1, timeframe="1d") + elif self.exchange == "bybit": + fts = await self.find_first_day_bybit(coin) + return fts + elif self.exchange == "gateio": + # Data since 2018 + ohlcvs = await self.cc.fetch_ohlcv( + self.get_symbol(coin), since=int(date_to_ts("2018-01-01")), timeframe="1d" ) - except Exception as e: - logging.error(f"Error fetching trades from bybit for {symbol} {e}. ") - if i < len(ns): - logging.info(f"Retrying with concurrent fetches changed {n} -> {ns[i+1]}.") - - -async def download_ohlcvs_bybit_sub( - symbol, start_date, end_date, spot=False, download_only=False, n_concurrent_fetches=10 -): - start_date, end_date = get_day(start_date), get_day(end_date) - assert date_to_ts2(end_date) >= date_to_ts2(start_date), "end_date is older than start_date" - dirpath = make_get_filepath(f"historical_data/ohlcvs_bybit{'_spot' if spot else ''}/{symbol}/") - convert_csv_to_npy(dirpath) - ideal_days = get_days_in_between(start_date, end_date) - days_done = [filename[:-4] for filename in os.listdir(dirpath) if ".npy" in filename] - days_to_get = [day for day in ideal_days if day not in days_done] - dfs = {} - if len(days_to_get) > 0: - base_url = f"https://public.bybit.com/{'spot' if spot else 'trading'}/" - webpage = await get_bybit_webpage(base_url, symbol) - filenames = [ - cand - for day in days_to_get - if (cand := f"{symbol}{'_' if spot else ''}{day}.csv.gz") in webpage - ] - if len(filenames) > 0: - for i in range(0, len(filenames), n_concurrent_fetches): - filenames_sublist = filenames[i : i + n_concurrent_fetches] - logging.info( - f"fetching {len(filenames_sublist)} files with {symbol} trades from {filenames_sublist[0][-17:-7]} to {filenames_sublist[-1][-17:-7]}" + if not ohlcvs: + ohlcvs = await self.cc.fetch_ohlcv( + self.get_symbol(coin), since=int(date_to_ts("2020-01-01")), timeframe="1d" ) - dfs_ = await get_bybit_trades(base_url, symbol, filenames_sublist) - dfs_ = {k[-17:-7]: convert_to_ohlcv(v, spot) for k, v in dfs_.items()} - dumped = [] - for day, df in sorted(dfs_.items()): - if day in days_done: - continue - filepath = f"{dirpath}{day}.npy" - dump_ohlcv_data(df, filepath) - dumped.append(day) - if not download_only: - dfs.update(dfs_) - if not download_only: - for day in ideal_days: - if os.path.exists(f"{dirpath}{day}.npy"): - dfs[day] = load_ohlcv_data(f"{dirpath}{day}.npy") - if len(dfs) == 0: - return pd.DataFrame(columns=["timestamp", "open", "high", "low", "close", "volume"]) - df = pd.concat(dfs.values()).sort_values("timestamp").reset_index() - return df[["timestamp", "open", "high", "low", "close", "volume"]] - - -async def get_bybit_webpage(base_url: str, symbol: str): - return urlopen(f"{base_url}{symbol}/").read().decode() - + elif self.exchange == "bitget": + fts = await self.find_first_day_bitget(coin) + return fts + if ohlcvs: + fts = ohlcvs[0][0] + else: + fts = 0.0 + self.dump_first_timestamp(coin, fts) + return fts + + def load_cc(self): + if self.cc is None: + self.cc = getattr(ccxt, self.exchange)({"enableRateLimit": True}) + self.cc.options["defaultType"] = "swap" + + async def load_markets(self): + self.load_cc() + self.markets = self.load_markets_from_cache() + if self.markets: + return + self.markets = await self.cc.load_markets() + self.dump_markets_to_cache() + + def load_markets_from_cache(self, max_age_ms=1000 * 60 * 60 * 24): + try: + if os.path.exists(self.cache_filepaths["markets"]): + if utc_ms() - get_file_mod_utc(self.cache_filepaths["markets"]) < max_age_ms: + markets = json.load(open(self.cache_filepaths["markets"])) + if self.verbose: + logging.info(f"{self.exchange} Loaded markets from cache") + return markets + return {} + except Exception as e: + logging.error(f"Error with {get_function_name()} {e}") + return {} -async def get_bybit_trades(base_url: str, symbol: str, filenames: [str]): - if len(filenames) == 0: - return None - async with aiohttp.ClientSession() as session: - tasks = {} - for url in [f"{base_url}{symbol}/{filename}" for filename in filenames]: - await check_rate_limit() # Add rate limiting check before each request - tasks[url] = asyncio.ensure_future(get_csv_gz(session, url)) - responses = {} - for url in tasks: - responses[url] = await tasks[url] - return {k: v.sort_values("timestamp") for k, v in responses.items()} + def dump_markets_to_cache(self): + if self.markets: + try: + json.dump(self.markets, open(make_get_filepath(self.cache_filepaths["markets"]), "w")) + if self.verbose: + logging.info(f"{self.exchange} Dumped markets to cache") + except Exception as e: + logging.error(f"Error with {get_function_name()} {e}") + + async def load_ohlcvs_from_cache(self, coin): + """ + Loads any cached ohlcv data for exchange, coin and date range from cache + and *strictly* enforces no gaps. If any gap is found, return empty. + """ + dirpath = os.path.join(self.cache_filepaths["ohlcvs"], coin, "") + if not os.path.exists(dirpath): + return pd.DataFrame() + all_files = sorted([f for f in os.listdir(dirpath) if f.endswith(".npy")]) + all_days = get_days_in_between(self.start_date, self.end_date) + all_months = sorted(set([x[:7] for x in all_days])) -async def fetch_url(session, url): - async with session.get(url) as response: - content = await response.read() - return content + # Load month files first + files_to_load = [x for x in all_files if x.replace(".npy", "") in all_months] + # Add day files (exclude if they were loaded already as a month) + files_to_load += [ + x for x in all_files if x.replace(".npy", "") in all_days and x not in files_to_load + ] + dfs = [] + for f in files_to_load: + try: + filepath = os.path.join(dirpath, f) + df_part = load_ohlcv_data(filepath) + dfs.append(df_part) + except Exception as e: + logging.error(f"Error loading file {f}: {e}") -async def get_csv_gz(session, url: str): - # from bybit - try: - resp = await fetch_url(session, url) - with gzip.open(BytesIO(resp)) as f: - tdf = pd.read_csv(f) - return tdf - except Exception as e: - logging.error(f"error fetching bybit trades {e}") - traceback.print_exc() - return pd.DataFrame() - - -def convert_to_ohlcv(df, spot, interval=60000): - # bybit data - # timestamps are in seconds for futures, millis for spot - groups = df.groupby((df.timestamp * (1 if spot else 1000)) // interval * interval) - ohlcvs = pd.DataFrame( - { - "open": groups.price.first(), - "high": groups.price.max(), - "low": groups.price.min(), - "close": groups.price.last(), - "volume": groups["volume" if spot else "size"].sum(), - } - ) - new_index = np.arange(ohlcvs.index[0], ohlcvs.index[-1] + interval, interval) - ohlcvs = ohlcvs.reindex(new_index) - closes = ohlcvs.close.ffill() - for x in ["open", "high", "low", "close"]: - ohlcvs[x] = ohlcvs[x].fillna(closes) - ohlcvs["volume"] = ohlcvs["volume"].fillna(0.0) - ohlcvs.loc[:, "timestamp"] = ohlcvs.index.values - columns = ["timestamp", "open", "high", "low", "close", "volume"] - return ohlcvs[columns] + if not dfs: + return pd.DataFrame() + # Concatenate, drop duplicates, sort by timestamp + df = ( + pd.concat(dfs) + .drop_duplicates("timestamp") + .sort_values("timestamp") + .reset_index(drop=True) + ) + # ---------------------------------------------------------------------- + # 1) Clip to [start_ts, end_ts] and return + # ---------------------------------------------------------------------- + df = self.filter_date_range(df) + + # ---------------------------------------------------------------------- + # 2) Gap check with tolerance: if intervals != 60000 for any bar, return empty. + # ---------------------------------------------------------------------- + intervals = np.diff(df["timestamp"].values) + # If any interval is not exactly 60000, we have a gap. + if (intervals != 60000).any(): + greatest_gap = int(intervals.max() / 60000.0) + if greatest_gap > self.gap_tolerance_ohlcvs_minutes: + logging.warning( + f"[{self.exchange}] Gaps detected in {coin} OHLCV data. Greatest gap: {greatest_gap} minutes. Returning empty DataFrame." + ) + return pd.DataFrame(columns=df.columns) + else: + df = fill_gaps_in_ohlcvs(df) + return df -async def download_single_ohlcvs_binance(url: str, fpath: str): - try: - logging.info(f"fetching {url}") - csv = await get_zip_binance(url) - dump_ohlcv_data(csv, fpath) - except Exception as e: - logging.error(f"failed to download {url} {e}") - - -async def download_ohlcvs_binance( - symbol, - inverse, - start_date, - end_date, - spot=False, - download_only=False, - start_tss=None, -) -> pd.DataFrame: - dirpath = make_get_filepath(f"historical_data/ohlcvs_{'spot' if spot else 'futures'}/{symbol}/") - convert_csv_to_npy(dirpath) - base_url = "https://data.binance.vision/data/" - base_url += "spot/" if spot else f"futures/{'cm' if inverse else 'um'}/" - col_names = ["timestamp", "open", "high", "low", "close", "volume"] - if start_tss is not None and symbol in start_tss: - start_ts = start_tss[symbol] - elif spot: - start_ts = get_first_ohlcv_ts(symbol, spot=spot) - else: - start_ts = (await get_first_ohlcv_timestamps(symbols=[symbol]))[symbol] - start_ts = int(max(start_ts, date_to_ts2(start_date))) - end_date = format_end_date(end_date) - end_ts = int(date_to_ts2(end_date)) - days = [ts_to_date_utc(x)[:10] for x in list(range(start_ts, end_ts, 1000 * 60 * 60 * 24))] - months = sorted({x[:7] for x in days}) - month_now = ts_to_date(utc_ms())[:7] - months = [m for m in months if m != month_now] - - # do months async - months_filepaths = {month: os.path.join(dirpath, month + ".npy") for month in months} - missing_months = {k: v for k, v in months_filepaths.items() if not os.path.exists(v)} - await asyncio.gather( - *[ - download_single_ohlcvs_binance( - base_url + f"monthly/klines/{symbol}/1m/{symbol}-1m-{k}.zip", v + def copy_ohlcvs_from_old_dir(self, new_dirpath, old_dirpath, missing_days, coin): + symbolf = self.get_symbol(coin).replace("/USDT:", "") + files_copied = 0 + if os.path.exists(old_dirpath): + for d0 in os.listdir(old_dirpath): + if d0.endswith(".npy") and d0[:10] in missing_days: + src = os.path.join(old_dirpath, d0) + dst = os.path.join(new_dirpath, d0) + if os.path.exists(dst): + continue + try: + shutil.copy(src, dst) + files_copied += 1 + except Exception as e: + logging.error(f"{self.exchange} error copying {src} -> {dst} {e}") + if files_copied: + logging.info( + f"{self.exchange} copied {files_copied} files from {old_dirpath} to {new_dirpath}" ) - for k, v in missing_months.items() - ] - ) - months_done = sorted([x for x in os.listdir(dirpath) if x[:-4] in months_filepaths]) + return True + else: + return False + + async def download_ohlcvs_binance(self, coin: str): + # Uses Binance's data archives via binance.vision + symbolf = self.get_symbol(coin).replace("/USDT:", "") + dirpath = make_get_filepath(os.path.join(self.cache_filepaths["ohlcvs"], coin, "")) + base_url = "https://data.binance.vision/data/futures/um/" + missing_days = await self.get_missing_days_ohlcvs(coin) + + # Copy from old directory first + old_dirpath = f"historical_data/ohlcvs_futures/{symbolf}/" + if self.copy_ohlcvs_from_old_dir(dirpath, old_dirpath, missing_days, coin): + missing_days = await self.get_missing_days_ohlcvs(coin) + if not missing_days: + return + + # Download monthy first (there may be gaps) + month_now = ts_to_date_utc(utc_ms())[:7] + missing_months = sorted({x[:7] for x in missing_days if x[:7] != month_now}) + tasks = [] + for month in missing_months: + fpath = os.path.join(dirpath, month + ".npy") + if not os.path.exists(fpath): + url = f"{base_url}monthly/klines/{symbolf}/1m/{symbolf}-1m-{month}.zip" + await self.check_rate_limit() + tasks.append(asyncio.create_task(self.download_single_binance(url, fpath))) + for task in tasks: + await task + + # Convert any monthly data to daily data + for f in os.listdir(dirpath): + if len(f) == 11: + df = load_ohlcv_data(os.path.join(dirpath, f)) + + df.loc[:, "datetime"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True) + df.set_index("datetime", inplace=True) + + daily_groups = df.groupby(df.index.date) + n_days_dumped = 0 + for date, daily_data in daily_groups: + if len(daily_data) == 1440: + fpath = str(date) + ".npy" + d_fpath = os.path.join(dirpath, fpath) + if not os.path.exists(d_fpath): + n_days_dumped += 1 + dump_ohlcv_data(daily_data, d_fpath) + else: + logging.info( + f"binanceusdm incomplete daily data for {coin} {date} {len(daily_data)}" + ) + if n_days_dumped: + logging.info(f"binanceusdm dumped {n_days_dumped} daily files for {coin} {f}") + m_fpath = os.path.join(dirpath, f) + logging.info(f"binanceusdm removing {m_fpath}") + os.remove(m_fpath) + + # Download missing daily + missing_days = await self.get_missing_days_ohlcvs(coin) + tasks = [] + for day in missing_days: + fpath = os.path.join(dirpath, day + ".npy") + if not os.path.exists(fpath): + url = base_url + f"daily/klines/{symbolf}/1m/{symbolf}-1m-{day}.zip" + await self.check_rate_limit() + tasks.append(asyncio.create_task(self.download_single_binance(url, fpath))) + for task in tasks: + await task + + async def download_single_binance(self, url: str, fpath: str): + try: + csv = await get_zip_binance(url) + if not csv.empty: + dump_ohlcv_data(ensure_millis(csv), fpath) + if self.verbose: + logging.info(f"binanceusdm Dumped data {fpath}") + except Exception as e: + logging.error(f"binanceusdm Failed to download {url}: {e}") + traceback.print_exc() + + async def download_ohlcvs_bybit(self, coin: str): + # Bybit has public data archives + missing_days = await self.get_missing_days_ohlcvs(coin) + if not missing_days: + return + symbolf = self.get_symbol(coin).replace("/USDT:", "") + dirpath = make_get_filepath(os.path.join(self.cache_filepaths["ohlcvs"], coin, "")) + + # Copy from old directory first + old_dirpath = f"historical_data/ohlcvs_bybit/{symbolf}/" + if self.copy_ohlcvs_from_old_dir(dirpath, old_dirpath, missing_days, coin): + missing_days = await self.get_missing_days_ohlcvs(coin) + if not missing_days: + return + + # Bybit public data: "https://public.bybit.com/trading/" + base_url = "https://public.bybit.com/trading/" + webpage = urlopen(f"{base_url}{symbolf}/").read().decode() - # do days async - days_filepaths = {day: os.path.join(dirpath, day + ".npy") for day in days} - missing_days = { - k: v - for k, v in days_filepaths.items() - if not os.path.exists(v) and k[:7] + ".npy" not in months_done - } - await asyncio.gather( - *[ - download_single_ohlcvs_binance( - base_url + f"daily/klines/{symbol}/1m/{symbol}-1m-{k}.zip", v - ) - for k, v in missing_days.items() - ] - ) - days_done = sorted([x for x in os.listdir(dirpath) if x[:-4] in days_filepaths]) - - # delete days contained in months - fnames = os.listdir(dirpath) - for fname in fnames: - if fname.endswith(".npy") and len(fname) == 14: - if fname[:7] + ".npy" in fnames: - logging.info(f"deleting {os.path.join(dirpath, fname)}") - os.remove(os.path.join(dirpath, fname)) - - if not download_only: - fnames = os.listdir(dirpath) - dfs = [ - load_ohlcv_data(os.path.join(dirpath, fpath)) - for fpath in months_done + days_done - if fpath in fnames and fpath.endswith(".npy") + filenames = [ + f"{symbolf}{day}.csv.gz" for day in missing_days if f"{symbolf}{day}.csv.gz" in webpage ] + # Download concurrently + async with aiohttp.ClientSession() as session: + tasks = [] + for fn in filenames: + url = f"{base_url}{symbolf}/{fn}" + day = fn[-17:-7] + await self.check_rate_limit() + tasks.append( + asyncio.create_task(self.download_single_bybit(session, url, dirpath, day)) + ) + results = await asyncio.gather(*tasks, return_exceptions=True) + + async def find_first_day_bybit(self, coin: str, webpage=None) -> float: + symbolf = self.get_symbol(coin).replace("/USDT:", "") + # Bybit public data: "https://public.bybit.com/trading/" + base_url = "https://public.bybit.com/trading/" + if webpage is None: + webpage = urlopen(f"{base_url}{symbolf}/").read().decode() + dates = [date for x in webpage.split(".csv.gz") if is_valid_date((date := x[-10:]))] + first_ts = date_to_ts(sorted(dates)[0]) + self.dump_first_timestamp(coin, first_ts) + return first_ts + + async def download_single_bybit(self, session, url: str, dirpath: str, day: str) -> pd.DataFrame: try: - df = pd.concat(dfs)[col_names].sort_values("timestamp") - except ValueError as e: - logging.error( - f"error with download_ohlcvs_binance {symbol} {start_date} {end_date}: {e}. Returning empty" + resp = await fetch_url(session, url) + with gzip.open(BytesIO(resp)) as f: + raw = pd.read_csv(f) + # Convert trades to OHLCV + interval = 60000 + groups = raw.groupby((raw.timestamp * 1000) // interval * interval) + ohlcvs = pd.DataFrame( + { + "open": groups.price.first(), + "high": groups.price.max(), + "low": groups.price.min(), + "close": groups.price.last(), + "volume": groups["size"].sum(), + } + ) + ohlcvs["timestamp"] = ohlcvs.index + fpath = os.path.join(dirpath, day + ".npy") + dump_ohlcv_data( + ensure_millis(ohlcvs[["timestamp", "open", "high", "low", "close", "volume"]]), + fpath, ) + if self.verbose: + logging.info(f"bybit Dumped {fpath}") + except Exception as e: + logging.error(f"bybit error {url}: {e}") + traceback.print_exc() return pd.DataFrame() - df = df.drop_duplicates(subset=["timestamp"]).reset_index() - nindex = np.arange(df.timestamp.iloc[0], df.timestamp.iloc[-1] + 60000, 60000) - return df[col_names].set_index("timestamp").reindex(nindex).ffill().reset_index() - - -def count_longest_identical_data(hlc, symbol, verbose=True): - line = f"checking ohlcv integrity of {symbol}" - diffs = (np.diff(hlc[:, 1:], axis=0) == [0.0, 0.0, 0.0]).all(axis=1) - longest_consecutive = 0 - counter = 0 - i_ = 0 - for i, x in enumerate(diffs): - if x: - counter += 1 + + async def download_ohlcvs_bitget(self, coin: str): + # Bitget has public data archives + fts = await self.find_first_day_bitget(coin) + if fts == 0.0: + return + first_day = ts_to_date_utc(fts) + missing_days = await self.get_missing_days_ohlcvs(coin) + if not missing_days: + return + symbolf = self.get_symbol(coin).replace("/USDT:", "") + if not symbolf: + return + dirpath = make_get_filepath(os.path.join(self.cache_filepaths["ohlcvs"], coin, "")) + base_url = "https://img.bitgetimg.com/online/kline/" + # Download daily + tasks = [] + for day in sorted(missing_days): + fpath = day + ".npy" + await self.check_rate_limit() + tasks.append( + asyncio.create_task( + self.download_single_bitget( + base_url, symbolf, day, os.path.join(dirpath, day + ".npy") + ) + ) + ) + for task in tasks: + try: + await task + except Exception as e: + logging.error(f"bitget Error with downloader for {coin} {e}") + # traceback.print_exc() + + def get_url_bitget(self, base_url, symbolf, day): + if day <= "2024-04-18": + return f"{base_url}{symbolf}/{symbolf}_UMCBL_1min_{day.replace('-', '')}.zip" else: - if counter > longest_consecutive: - longest_consecutive = counter - i_ = i - counter = 0 - if verbose: - logging.info( - f"{symbol} most n days of consecutive identical ohlcvs: {longest_consecutive / 60 / 24:.3f}, index last: {i_}" - ) - return longest_consecutive + return f"{base_url}{symbolf}/UMCBL/{day.replace('-', '')}.zip" + + async def download_single_bitget(self, base_url, symbolf, day, fpath): + url = self.get_url_bitget(base_url, symbolf, day) + res = await get_zip_bitget(url) + dump_ohlcv_data(ensure_millis(res), fpath) + if self.verbose: + logging.info(f"bitget Dumped daily data {fpath}") + + async def find_first_day_bitget(self, coin: str, start_year=2020) -> float: + """Find first day where data is available for a given symbol""" + if fts := self.load_first_timestamp(coin): + return fts + if not self.markets: + await self.load_markets() + symbol = self.get_symbol(coin).replace("/USDT:", "") + if not symbol: + fts = 0.0 + self.dump_first_timestamp(coin, fts) + return fts + base_url = "https://img.bitgetimg.com/online/kline/" + start = datetime.datetime(start_year, 1, 1) + end = datetime.datetime.now() + earliest = None + + while start <= end: + mid = start + (end - start) // 2 + date_str = mid.strftime("%Y%m%d") + url = self.get_url_bitget(base_url, symbol, date_str) + try: + await self.check_rate_limit() + async with aiohttp.ClientSession() as session: + async with session.head(url) as response: + if self.verbose: + logging.info( + f"bitget, searching for first day of data for {symbol} {str(mid)[:10]}" + ) + if response.status == 200: + earliest = mid + end = mid - datetime.timedelta(days=1) + else: + start = mid + datetime.timedelta(days=1) + except Exception as e: + start = mid + datetime.timedelta(days=1) -def attempt_gap_fix_hlcvs(df, symbol=None): - interval = 60 * 1000 - max_hours = 12 - max_gap = interval * 60 * max_hours - greatest_gap = df.timestamp.diff().max() - if greatest_gap == interval: - return df - if greatest_gap > max_gap: - raise Exception( - f"ohlcvs gap greater than {max_hours} hours: {greatest_gap / (1000 * 60 * 60)} hours" - ) - logging.info( - f"ohlcvs for {symbol} has greatest gap {greatest_gap / (1000 * 60 * 60):.3f} hours. Filling gaps..." - ) - new_timestamps = np.arange(df["timestamp"].iloc[0], df["timestamp"].iloc[-1] + interval, interval) - new_df = df.set_index("timestamp").reindex(new_timestamps) - new_df.close = new_df.close.ffill() - new_df.open = new_df.open.fillna(new_df.close) - new_df.high = new_df.high.fillna(new_df.close) - new_df.low = new_df.low.fillna(new_df.close) - new_df.volume = new_df.volume.fillna(0.0) - new_df = new_df.reset_index() - return new_df[["timestamp", "open", "high", "low", "close", "volume"]] + if earliest: + # Verify by checking the previous day + prev_day = earliest - datetime.timedelta(days=1) + prev_url = self.get_url_bitget(base_url, symbol, prev_day.strftime("%Y%m%d")) + try: + await check_rate_limit() + async with aiohttp.ClientSession() as session: + async with session.head(prev_url) as response: + if response.status == 200: + earliest = prev_day + except Exception: + pass + if self.verbose: + logging.info(f"Bitget, found first day for {symbol}: {earliest.strftime('%Y-%m-%d')}") + # dump cache + fts = date_to_ts(earliest.strftime("%Y-%m-%d")) + self.dump_first_timestamp(coin, fts) + return fts + return None + async def download_ohlcvs_gateio(self, coin: str): + # GateIO doesn't have public data archives, but has ohlcvs via REST API + missing_days = await self.get_missing_days_ohlcvs(coin) + if not missing_days: + return + if self.cc is None: + self.load_cc() + dirpath = make_get_filepath(os.path.join(self.cache_filepaths["ohlcvs"], coin, "")) + symbol = self.get_symbol(coin) + + # Instead of downloading in small chunks, do a single fetch for each day + # This avoids multiple .fetch_ohlcv() calls that might exceed rate limits. + tasks = [] + for day in missing_days: + await self.check_rate_limit() + tasks.append(asyncio.create_task(self.fetch_and_save_day_gateio(symbol, day, dirpath))) + for task in tasks: + await task + + async def fetch_and_save_day_gateio(self, symbol: str, day: str, dirpath: str): + """ + Fetches one full day of OHLCV data from GateIO with a single call, + then dumps it to disk. Uses self.check_rate_limit() to avoid exceeding + the per-minute request cap. + """ + fpath = os.path.join(dirpath, f"{day}.npy") + start_ts_day = date_to_ts(day) # 00:00:00 UTC of 'day' + end_ts_day = start_ts_day + 24 * 60 * 60 * 1000 # next 24 hours + interval = "1m" + + # GateIO typically allows up to 1440+ limit for 1m timeframe in one call + limit = 1500 + ohlcvs = await self.cc.fetch_ohlcv( + symbol, timeframe=interval, since=start_ts_day, limit=limit + ) + if not ohlcvs: + # No data returned; skip + if self.verbose: + logging.info(f"No data returned for GateIO {symbol} {day}") + return + + # Convert to DataFrame + df_day = pd.DataFrame(ohlcvs, columns=["timestamp", "open", "high", "low", "close", "volume"]) + # Filter exactly for the given day (start_ts_day <= ts < end_ts_day) + df_day = df_day[ + (df_day.timestamp >= start_ts_day) & (df_day.timestamp < end_ts_day) + ].reset_index(drop=True) + + # Convert volume from quote to base volume if needed + # (Gate.io's swap markets typically return quote-volume in "volume") + # Adjust if your usage needs base volume. E.g.: + df_day["volume"] = df_day["volume"] / df_day["close"] + + # Dump final day data only if is a full day + if len(df_day) == 1440: + dump_ohlcv_data(ensure_millis(df_day), fpath) + if self.verbose: + logging.info(f"gateio Dumped daily OHLCV data for {symbol} to {fpath}") + + def load_first_timestamp(self, coin): + if os.path.exists(self.cache_filepaths["first_timestamps"]): + try: + ftss = json.load(open(self.cache_filepaths["first_timestamps"])) + if coin in ftss: + return ftss[coin] + except Exception as e: + logging.error(f"Error loading {self.cache_filepaths['first_timestamps']} {e}") -async def load_hlcvs(symbol, start_date, end_date, exchange="binance"): - end_date = format_end_date(end_date) - # returns matrix [[timestamp, high, low, close, volume]] - if exchange == "binance": - df = await download_ohlcvs_binance(symbol, False, start_date, end_date, False) - elif exchange == "bybit": - df = await download_ohlcvs_bybit(symbol, start_date, end_date) - df = attempt_gap_fix_hlcvs(df, symbol=symbol) - else: - raise Exception(f"downloading ohlcvs from exchange {exchange} not supported") - if len(df) == 0: - return pd.DataFrame() - df = df[df.timestamp >= date_to_ts2(start_date)] - df = df[df.timestamp <= date_to_ts2(end_date)] - return df[["timestamp", "high", "low", "close", "volume"]].values + def dump_first_timestamp(self, coin, fts): + try: + fpath = self.cache_filepaths["first_timestamps"] + if os.path.exists(fpath): + try: + ftss = json.load(open(fpath)) + except Exception as e0: + logging.error(f"Error loading {fpath} {e0}") + ftss = {} + else: + make_get_filepath(fpath) + ftss = {} + ftss[coin] = fts + json.dump(ftss, open(fpath, "w"), indent=True, sort_keys=True) + if self.verbose: + logging.info(f"{self.exchange} Dumped {fpath}") + except Exception as e: + logging.error(f"Error with {get_function_name()} {e}") async def prepare_hlcvs(config: dict, exchange: str): - symbols = sorted(set(config["backtest"]["symbols"][exchange])) + coins = sorted( + set([symbol_to_coin(c) for c in config["live"]["approved_coins"]["long"]]) + | set([symbol_to_coin(c) for c in config["live"]["approved_coins"]["short"]]) + ) + if exchange == "binance": + exchange = "binanceusdm" start_date = config["backtest"]["start_date"] end_date = format_end_date(config["backtest"]["end_date"]) - end_ts = date_to_ts2(end_date) + om = OHLCVManager( + exchange, + start_date, + end_date, + gap_tolerance_ohlcvs_minutes=config["backtest"]["gap_tolerance_ohlcvs_minutes"], + ) + try: + return await prepare_hlcvs_internal(config, coins, exchange, start_date, end_date, om) + finally: + if om.cc: + await om.cc.close() + + +async def prepare_hlcvs_internal(config, coins, exchange, start_date, end_date, om): + end_ts = date_to_ts(end_date) minimum_coin_age_days = config["live"]["minimum_coin_age_days"] interval_ms = 60000 + first_timestamps_unified = await get_first_timestamps_unified(coins) + # Create cache directory if it doesn't exist - cache_dir = Path(f"./cache/hlcv_data/{uuid4().hex[:16]}") + cache_dir = Path(f"./caches/hlcvs_data/{uuid4().hex[:16]}") cache_dir.mkdir(parents=True, exist_ok=True) # First pass: Download data and store metadata - symbol_metadata = {} - start_tss = None - if exchange == "binance": - start_tss = await get_first_ohlcv_timestamps(cc=ccxt.binanceusdm(), symbols=symbols) - elif exchange == "bybit": - start_tss = await get_first_ohlcv_timestamps(cc=ccxt.bybit(), symbols=symbols) + coin_metadata = {} - valid_symbols = {} + valid_coins = {} global_start_time = float("inf") global_end_time = float("-inf") + await om.load_markets() + min_coin_age_ms = 1000 * 60 * 60 * 24 * minimum_coin_age_days # First pass: Download and save data, collect metadata - for symbol in symbols: - adjusted_start_ts = date_to_ts2(start_date) - + for coin in coins: + adjusted_start_ts = date_to_ts(start_date) + if not om.has_coin(coin): + logging.info(f"{exchange} coin {coin} missing, skipping") + continue + if coin not in first_timestamps_unified: + logging.info(f"coin {coin} missing from first_timestamps_unified, skipping") + continue if minimum_coin_age_days > 0.0: - min_coin_age_ms = 1000 * 60 * 60 * 24 * minimum_coin_age_days - if symbol not in start_tss: - logging.info(f"coin {symbol} missing from first timestamps, skipping") + first_ts = await om.get_first_timestamp(coin) + if first_ts >= end_ts: + logging.info( + f"{exchange} Coin {coin} too young, start date {ts_to_date_utc(first_ts)}. Skipping" + ) continue - new_start_ts = start_tss[symbol] + min_coin_age_ms - if new_start_ts >= end_ts: + first_ts_plus_min_coin_age = first_timestamps_unified[coin] + min_coin_age_ms + if first_ts_plus_min_coin_age >= end_ts: logging.info( - f"Coin {symbol} too young, start date {ts_to_date_utc(start_tss[symbol])}, skipping" + f"{exchange} Coin {coin}: Not traded due to min_coin_age {int(minimum_coin_age_days)} days" + f"{ts_to_date_utc(first_ts_plus_min_coin_age)}. Skipping" ) continue - if new_start_ts > adjusted_start_ts: + new_adjusted_start_ts = max(first_timestamps_unified[coin] + min_coin_age_ms, first_ts) + if new_adjusted_start_ts > adjusted_start_ts: logging.info( - f"First date for {symbol} was {ts_to_date_utc(start_tss[symbol])}. Adjusting start date to {ts_to_date_utc(new_start_ts)}" + f"{exchange} Coin {coin}: Adjusting start date from {start_date} " + f"to {ts_to_date_utc(new_adjusted_start_ts)}" ) - adjusted_start_ts = new_start_ts + adjusted_start_ts = new_adjusted_start_ts try: - data = await load_hlcvs( - symbol, - ts_to_date_utc(adjusted_start_ts)[:10], - end_date, - exchange, - ) + om.update_date_range(adjusted_start_ts) + df = await om.get_ohlcvs(coin) + data = df[["timestamp", "high", "low", "close", "volume"]].values except Exception as e: - logging.error(f"error with load_hlcvs for {symbol} {e}. Skipping") + logging.error(f"error with get_ohlcvs for {coin} {e}. Skipping") + traceback.print_exc() continue if len(data) == 0: continue - assert (np.diff(data[:, 0]) == interval_ms).all(), f"gaps in hlcv data {symbol}" + assert (np.diff(data[:, 0]) == interval_ms).all(), f"gaps in hlcv data {coin}" # Save data to disk - file_path = cache_dir / f"{safe_filename(symbol)}.npy" + file_path = cache_dir / f"{coin}.npy" np.save(file_path, data) # Update metadata - symbol_metadata[symbol] = { + coin_metadata[coin] = { "start_time": int(data[0, 0]), "end_time": int(data[-1, 0]), "length": len(data), } - valid_symbols[symbol] = file_path + valid_coins[coin] = file_path global_start_time = min(global_start_time, data[0, 0]) global_end_time = max(global_end_time, data[-1, 0]) - if not valid_symbols: - raise ValueError("No valid symbols found with data") + if not valid_coins: + raise ValueError("No valid coins found with data") # Calculate dimensions for the unified array n_timesteps = int((global_end_time - global_start_time) / interval_ms) + 1 - n_coins = len(valid_symbols) + n_coins = len(valid_coins) # Create the timestamp array timestamps = np.arange(global_start_time, global_end_time + interval_ms, interval_ms) @@ -511,9 +985,9 @@ async def prepare_hlcvs(config: dict, exchange: str): unified_array = np.zeros((n_timesteps, n_coins, 4)) # Second pass: Load data from disk and populate the unified array - logging.info(f"Unifying data for {len(valid_symbols)} coins into single numpy array...") - for i, symbol in enumerate(tqdm(valid_symbols, desc="Processing symbols", unit="symbol")): - file_path = valid_symbols[symbol] + logging.info(f"{exchange} Unifying data for {len(valid_coins)} coins into single numpy array...") + for i, coin in enumerate(tqdm(valid_coins, desc="Processing coins", unit="coin")): + file_path = valid_coins[coin] ohlcv = np.load(file_path) # Calculate indices @@ -522,7 +996,6 @@ async def prepare_hlcvs(config: dict, exchange: str): # Extract and process data coin_data = ohlcv[:, 1:] - coin_data[:, 3] = coin_data[:, 2] * coin_data[:, 3] # Use quote volume # Place the data in the unified array unified_array[start_idx:end_idx, i, :] = coin_data @@ -543,134 +1016,525 @@ async def prepare_hlcvs(config: dict, exchange: str): os.rmdir(cache_dir) except OSError: pass + mss = {coin: om.get_market_specific_settings(coin) for coin in sorted(valid_coins)} + return mss, timestamps, unified_array + + +async def prepare_hlcvs_combined(config): + """ + Public function that sets up any needed resources, + calls the internal implementation, and ensures + ccxt connections are closed in a finally block. + """ + # Create or load the OHLCVManager dict + exchanges_to_consider = [ + "binanceusdm" if e == "binance" else e for e in config["backtest"]["exchanges"] + ] + om_dict = {} + for ex in exchanges_to_consider: + om = OHLCVManager( + ex, + config["backtest"]["start_date"], + config["backtest"]["end_date"], + gap_tolerance_ohlcvs_minutes=config["backtest"]["gap_tolerance_ohlcvs_minutes"], + ) + # await om.load_markets() # if you want to do this up front + om_dict[ex] = om - return list(valid_symbols), timestamps, unified_array + try: + return await _prepare_hlcvs_combined_impl(config, om_dict) + finally: + # Cleanly close all ccxt sessions + for om in om_dict.values(): + if om.cc: + await om.cc.close() + + +async def _prepare_hlcvs_combined_impl(config, om_dict): + """ + Amalgamates data from different exchanges for each coin in config, then unifies them into a single + numpy array with shape (n_timestamps, n_coins, 4). The final data per coin is chosen using: + + 1) Filter out exchanges that don't fully cover [start_date, end_date] + 2) Among the remaining, pick the exchange with the fewest data gaps + 3) If still tied, pick the exchange with the highest total volume + + Returns: + mss: dict of coin -> market_specific_settings from the chosen exchange + timestamps: 1D numpy array of all timestamps (1min granularity) covering the entire combined range + unified_array: 3D numpy array with shape (len(timestamps), n_coins, 4), + where the last dimension is [high, low, close, volume]. + Price fields are forward-filled; volume is 0-filled for missing data. + """ + # --------------------------------------------------------------- + # 0) Define or load relevant info from config + # --------------------------------------------------------------- + start_date = config["backtest"]["start_date"] + end_date = format_end_date(config["backtest"]["end_date"]) + start_ts = date_to_ts(start_date) + end_ts = date_to_ts(end_date) + # Pull out all coins from config: + coins = sorted( + set([symbol_to_coin(c) for c in config["live"]["approved_coins"]["long"]]) + | set([symbol_to_coin(c) for c in config["live"]["approved_coins"]["short"]]) + ) -def unify_hlcv_data(hlcv_list) -> (np.ndarray, np.ndarray): + # If your config includes a list of exchanges, grab it; else pick a default set: + exchanges_to_consider = [ + "binanceusdm" if e == "binance" else e for e in config["backtest"]["exchanges"] + ] + + # Minimum coin age handling (same approach as prepare_hlcvs) + min_coin_age_days = config["live"].get("minimum_coin_age_days", 0.0) + min_coin_age_ms = int(min_coin_age_days * 24 * 60 * 60 * 1000) + + # First timestamps from your pre-cached or dynamically fetched data + # (some procedures rely on e.g. get_first_timestamps_unified()) + first_timestamps_unified = await get_first_timestamps_unified(coins) + + for ex in exchanges_to_consider: + await om_dict[ex].load_markets() + + # --------------------------------------------------------------- + # 2) For each coin, gather 1m data from all exchanges, filter/choose best + # --------------------------------------------------------------- + chosen_data_per_coin = {} # coin -> pd.DataFrame of final chosen data + chosen_mss_per_coin = {} # coin -> market_specific_settings from chosen exchange + + for coin in coins: + # If the global "first_timestamps_unified" says we have no data for coin, skip immediately + coin_fts = first_timestamps_unified.get(coin, 0.0) + if coin_fts == 0.0: + logging.info(f"Skipping coin {coin}, no first timestamp recorded.") + continue - # Find the global start and end timestamps - start_time = min(arr[0, 0] for arr in hlcv_list) - end_time = max(arr[-1, 0] for arr in hlcv_list) + # Check if coin is "too young": first_ts + min_coin_age >= end_ts + # meaning there's effectively no eligible window to trade/backtest + if coin_fts + min_coin_age_ms >= end_ts: + logging.info( + f"Skipping coin {coin}: it does not satisfy the minimum_coin_age_days = {min_coin_age_days}" + ) + continue - # Calculate the number of timesteps - n_timesteps = int((end_time - start_time) / 60000) + 1 + # The earliest time we can start from, given coin's first trade time plus coin age + effective_start_ts = max(start_ts, coin_fts + min_coin_age_ms) + if effective_start_ts >= end_ts: + # No coverage needed or possible + continue - # Create the unified array - n_coins = len(hlcv_list) - unified_array = np.zeros((n_timesteps, n_coins, 4)) + # >>> Instead of a normal for-loop over exchanges, do concurrent tasks: + tasks = [] + for ex in exchanges_to_consider: + tasks.append( + asyncio.create_task( + fetch_data_for_coin_and_exchange( + coin, ex, om_dict[ex], effective_start_ts, end_ts + ) + ) + ) + # Gather results concurrently + results = await asyncio.gather(*tasks, return_exceptions=True) - # Create the timestamp array - timestamps = np.arange(start_time, end_time + 60000, 60000) + # Filter out None/Exceptions, build exchange_candidates + exchange_candidates = [] + for r in results: + if r is None or isinstance(r, Exception): + continue + ex, df, coverage_count, gap_count, total_volume = r + exchange_candidates.append((ex, df, coverage_count, gap_count, total_volume)) - for i, ohlcv in enumerate(hlcv_list): - # Calculate the start and end indices for this coin - start_idx = int((ohlcv[0, 0] - start_time) / 60000) - end_idx = start_idx + len(ohlcv) + if not exchange_candidates: + logging.info(f"No exchange data found at all for coin {coin}. Skipping.") + continue - # Extract the required data (high, low, close, volume) - coin_data = ohlcv[:, 1:] + # Now pick the "best" exchange (per your partial-coverage logic): + if len(exchange_candidates) == 1: + best_exchange, best_df, best_cov, best_gaps, best_vol = exchange_candidates[0] + else: + # Sort by coverage desc, gap_count asc, volume desc + exchange_candidates.sort(key=lambda x: (x[2], -x[3], x[4]), reverse=True) + best_exchange, best_df, best_cov, best_gaps, best_vol = exchange_candidates[0] + logging.info(f"{coin} exchange preference: {[x[0] for x in exchange_candidates]}") + + chosen_data_per_coin[coin] = best_df + chosen_mss_per_coin[coin] = om_dict[best_exchange].get_market_specific_settings(coin) + chosen_mss_per_coin[coin]["exchange"] = best_exchange + # --------------------------------------------------------------- + # If no coins survived, raise error + # --------------------------------------------------------------- + if not chosen_data_per_coin: + raise ValueError("No coin data found on any exchange for the requested date range.") + + # --------------------------------------------------------------- + # 6) Unify across coins into a single (n_timestamps, n_coins, 4) array + # We'll unify on 1m timestamps from the earliest to latest across all chosen coins + # --------------------------------------------------------------- + global_start_time = min(df.timestamp.iloc[0] for df in chosen_data_per_coin.values()) + global_end_time = max(df.timestamp.iloc[-1] for df in chosen_data_per_coin.values()) + + timestamps = np.arange(global_start_time, global_end_time + 60000, 60000) + n_timesteps = len(timestamps) + valid_coins = sorted(chosen_data_per_coin.keys()) + n_coins = len(valid_coins) + # use at most last 60 days of date range to compute volume ratios + start_date_for_volume_ratios = ts_to_date_utc( + max(global_start_time, global_end_time - 1000 * 60 * 60 * 24 * 60) + ) + end_date_for_volume_ratios = ts_to_date_utc(global_end_time) + + exchanges_with_data = sorted(set([chosen_mss_per_coin[coin]["exchange"] for coin in valid_coins])) + exchange_volume_ratios = await compute_exchange_volume_ratios( + exchanges_with_data, + valid_coins, + start_date_for_volume_ratios, + end_date_for_volume_ratios, + {ex: om_dict[ex] for ex in exchanges_with_data}, + ) + exchanges_counts = defaultdict(int) + for coin in chosen_mss_per_coin: + exchanges_counts[chosen_mss_per_coin[coin]["exchange"]] += 1 + reference_exchange = sorted(exchanges_counts.items(), key=lambda x: x[1])[-1][0] + exchange_volume_ratios_mapped = defaultdict(dict) + if len(exchanges_counts) == 1: + exchange_volume_ratios_mapped[reference_exchange][reference_exchange] = 1.0 + else: + for ex0, ex1 in exchange_volume_ratios: + exchange_volume_ratios_mapped[ex0][ex1] = 1 / exchange_volume_ratios[(ex0, ex1)] + exchange_volume_ratios_mapped[ex1][ex0] = exchange_volume_ratios[(ex0, ex1)] + exchange_volume_ratios_mapped[ex1][ex1] = 1.0 + exchange_volume_ratios_mapped[ex0][ex0] = 1.0 + + pprint.pprint(dict(exchange_volume_ratios_mapped)) + + # We'll store [high, low, close, volume] in the last dimension + unified_array = np.zeros((n_timesteps, n_coins, 4), dtype=np.float64) + + # For each coin i, reindex its DataFrame onto the full timestamps + for i, coin in enumerate(valid_coins): + df = chosen_data_per_coin[coin].copy() + + # Reindex on the global minute timestamps + df = df.set_index("timestamp").reindex(timestamps) + + # Forward fill 'close' for all missing rows, then backward fill any leading edge + df["close"] = df["close"].ffill().bfill() + + # For O/H/L, fill with whatever the 'close' ended up being + df["open"] = df["open"].fillna(df["close"]) + df["high"] = df["high"].fillna(df["close"]) + df["low"] = df["low"].fillna(df["close"]) + + # Fill volume with 0.0 for missing bars, then apply scaling factor + df["volume"] = df["volume"].fillna(0.0) + exchange_for_this_coin = chosen_mss_per_coin[coin]["exchange"] + scaling_factor = exchange_volume_ratios_mapped[exchange_for_this_coin][reference_exchange] + df["volume"] *= scaling_factor + + # Now extract columns in correct order + coin_data = df[["high", "low", "close", "volume"]].values + unified_array[:, i, :] = coin_data + + # --------------------------------------------------------------- + # 7) Cleanup: close all ccxt clients if needed + # --------------------------------------------------------------- + for om in om_dict.values(): + if om.cc: + await om.cc.close() + + # --------------------------------------------------------------- + # Return final: + # - chosen_mss_per_coin: dict coin-> market settings from the chosen exchange + # - timestamps: 1D array of all unified timestamps + # - unified_array: shape (n_timestamps, n_coins, 4) => [H, L, C, V] + # --------------------------------------------------------------- + return chosen_mss_per_coin, timestamps, unified_array + + +async def fetch_data_for_coin_and_exchange( + coin: str, ex: str, om: OHLCVManager, effective_start_ts: int, end_ts: int +): + """ + Fetch data for (coin, ex) between [effective_start_ts, end_ts]. + Returns (ex, df, coverage_count, gap_count, total_volume), where: + - ex: the exchange name + - df: the OHLCV dataframe + - coverage_count: total number of rows in df + - gap_count: sum of missing minutes across all gaps + - total_volume: sum of 'volume' column (within the timeframe) + """ + + # Check if coin is listed on this exchange + if not om.has_coin(coin): + return None - # Use quote volume as volume - coin_data[:, 3] = coin_data[:, 2] * coin_data[:, 3] + # Adjust the manager's date range to [effective_start_ts, end_ts] + om.update_date_range(effective_start_ts, end_ts) - # Place the data in the unified array - unified_array[start_idx:end_idx, i, :] = coin_data + try: + # Get the DataFrame of 1m OHLCVs + df = await om.get_ohlcvs(coin) + except Exception as e: + logging.warning(f"Error retrieving {coin} from {ex}: {e}") + return None - # Front-fill - if start_idx > 0: - unified_array[:start_idx, i, :3] = coin_data[0, 2] # Set high, low, close to first close + if df.empty: + return None - # Back-fill - if end_idx < n_timesteps: - unified_array[end_idx:, i, :3] = coin_data[-1, 2] # Set high, low, close to last close + # Filter strictly to [effective_start_ts, end_ts] + df = df[(df.timestamp >= effective_start_ts) & (df.timestamp <= end_ts)].reset_index(drop=True) + if df.empty: + return None - return timestamps, unified_array + # coverage_count = total number of 1m bars in df + coverage_count = len(df) + # ------------------------------------------------------------------ + # 1) Compute sum of all missing minutes (gap_count) + # ------------------------------------------------------------------ + # For each consecutive pair, the difference in timestamps should be 60000 ms. + # If it's bigger, we measure how many 1-minute bars are missing. + intervals = np.diff(df["timestamp"].values) -def convert_csv_to_npy(filepath): - if not os.path.exists(filepath): - return False - if os.path.isdir(filepath): - for fp in os.listdir(filepath): - convert_csv_to_npy(os.path.join(filepath, fp)) - return False - if filepath.endswith(".csv"): - columns = ["timestamp", "open", "high", "low", "close", "volume"] - npy_filepath = filepath.replace(".csv", ".npy") - csv_data = pd.read_csv(filepath)[columns] - dump_ohlcv_data(csv_data, npy_filepath) - os.remove(filepath) - logging.info(f"successfully converted {filepath} to {npy_filepath}") - return True + gap_count = sum( + (gap // 60000) - 1 # e.g. if gap is 5 minutes => 5 - 1 = 4 missing bars + for gap in intervals + if gap > 60000 + ) + # total_volume = sum of volume column + total_volume = df["volume"].sum() + + return (ex, df, coverage_count, gap_count, total_volume) + + +async def compute_exchange_volume_ratios( + exchanges: List[str], + coins: List[str], + start_date: str, + end_date: str, + om_dict: Dict[str, "OHLCVManager"] = None, +) -> Dict[Tuple[str, str], float]: + """ + Gathers daily volume for each coin on each exchange, + filters out incomplete days (days missing from any exchange), + and then computes pairwise volume ratios (ex0, ex1) = sumVol(ex0) / sumVol(ex1). + Finally, it averages those ratios across all coins. + + :param exchanges: list of exchange names (e.g. ["binanceusdm", "bybit"]). + :param coins: list of coins (e.g. ["BTC", "ETH"]). + :param start_date: "YYYY-MM-DD" inclusive + :param end_date: "YYYY-MM-DD" inclusive + :param om_dict: dict of {exchange_name -> OHLCVManager}, already initialized + :return: dict {(ex0, ex1): average_ratio}, where ex0 < ex1 in alphabetical order, for example + """ + # ------------------------------------------------------- + # 1) Build all pairs of exchanges + # ------------------------------------------------------- + if om_dict is None: + om_dict = {ex: OHLCVManager(ex, start_date, end_date) for ex in exchanges} + await asyncio.gather(*[om_dict[ex].load_markets() for ex in om_dict]) + assert all([ex in om_dict for ex in exchanges]) + exchange_pairs = [] + for i, ex0 in enumerate(sorted(exchanges)): + for ex1 in exchanges[i + 1 :]: + # (Optional) sort them or keep them as-is + # We'll just keep them in the (ex0, ex1) order for clarity + exchange_pairs.append((ex0, ex1)) + + # ------------------------------------------------------- + # 2) For each coin, gather data from all exchanges + # ------------------------------------------------------- + # We'll store: all_data[coin][(ex0, ex1)] = ratio_of_volumes_for_that_coin + all_data = {} + + for coin in coins: + # If coin does not exist on ALL exchanges, skip + if not all(om_dict[ex].has_coin(coin) for ex in exchanges): + continue -def dump_ohlcv_data(data, filepath): - npy_filepath = filepath.replace(".csv", ".npy") - columns = ["timestamp", "open", "high", "low", "close", "volume"] - if isinstance(data, pd.DataFrame): - to_dump = data[columns].astype(float).values - elif isinstance(data, np.ndarray): - to_dump = data - else: - raise Exception(f"unknown file type {filepath} dump_ohlcv_data") - np.save(npy_filepath, to_dump) + # Gather concurrent tasks => each exchange's DF for that coin + tasks = [] + for ex in exchanges: + om = om_dict[ex] + om.update_date_range(start_date, end_date) + tasks.append( + om.get_ohlcvs(coin) + ) # returns a DataFrame: [timestamp, open, high, low, close, volume] + + dfs = await asyncio.gather(*tasks, return_exceptions=True) + # Filter out any exceptions or empty data + # We'll keep them in the same order as `exchanges` + for i, df in enumerate(dfs): + if isinstance(df, Exception) or df is None or df.empty: + dfs[i] = pd.DataFrame() # mark as empty + + # If any are empty, skip coin + if any(df.empty for df in dfs): + continue + # ------------------------------------------------------- + # 3) Convert each DF to daily volume. + # We'll produce: daily_df[day_str or day_int] = volume + # ------------------------------------------------------- + # Approach: group by day (UTC). E.g. day_key = df.timestamp // 86400000 + # Then sum up df["volume"] for each day. + + daily_volumes = [] # daily_volumes[i] will be a dict day->volume for exchange i + for df in dfs: + df["day"] = df["timestamp"] // 86400000 # integer day + grouped = df.groupby("day", as_index=False)["volume"].sum() + # build dict {day: volume} + daily_dict = dict(zip(grouped["day"], grouped["volume"])) + daily_volumes.append(daily_dict) + + # Now we want to find the set of "common days" that appear in all daily_volumes + # E.g. intersection of day keys across all exchanges + sets_of_days = [set(dv.keys()) for dv in daily_volumes] + common_days = set.intersection(*sets_of_days) + if not common_days: + continue -def load_ohlcv_data(filepath): - npy_filepath = filepath.replace(".csv", ".npy") - columns = ["timestamp", "open", "high", "low", "close", "volume"] - if os.path.exists(npy_filepath): - loaded_data = np.load(npy_filepath, allow_pickle=True) - else: - logging.info(f"loading {filepath}") - csv_data = pd.read_csv(filepath)[columns] - logging.info(f"dumping {npy_filepath}") - dump_ohlcv_data(csv_data, npy_filepath) - logging.info(f"removing {filepath}") - os.remove(filepath) - loaded_data = csv_data.values - return pd.DataFrame(loaded_data, columns=columns) + # Filter out days that have no volume on some exchange + # (Already done by intersection, but you might want to check if the volume is zero and exclude, etc.) + + # ------------------------------------------------------- + # 4) For each pair of exchanges, compute ratio over the *full* range of common days + # ------------------------------------------------------- + # i.e. ratio = (sum of daily volumes on ex0) / (sum of daily volumes on ex1) + coin_data = {} # coin_data[(ex0, ex1)] = ratio for this coin + for ex0, ex1 in exchange_pairs: + i0 = exchanges.index(ex0) + i1 = exchanges.index(ex1) + sum0 = sum(daily_volumes[i0][day] for day in common_days) + sum1 = sum(daily_volumes[i1][day] for day in common_days) + ratio = (sum0 / sum1) if sum1 > 0 else 0.0 + coin_data[(ex0, ex1)] = ratio + + if coin_data: + all_data[coin] = coin_data + + # ------------------------------------------------------- + # 5) Compute average ratio per (ex0, ex1) across all coins + # ------------------------------------------------------- + # all_data is: { coin: {(ex0, ex1): ratio, (exA, exB): ratio, ...}, ... } + # We'll gather lists of ratios per exchange pair, then compute the mean. + averages = {} + if not all_data: + return averages # empty if no coin data + + # Build a list of all pairs we actually used: + used_pairs = set() + for coin in all_data: + for pair in all_data[coin]: + used_pairs.add(pair) + + for pair in used_pairs: + # collect all coin-specific ratios for that pair + ratios_for_pair = [] + for coin in all_data: + if pair in all_data[coin]: + ratios_for_pair.append(all_data[coin][pair]) + if ratios_for_pair: + averages[pair] = float(np.mean(ratios_for_pair)) + else: + averages[pair] = 0.0 + + return averages + + +async def add_all_eligible_coins_to_config(config): + path = config["live"]["approved_coins"] + if config["live"]["empty_means_all_approved"] and path in [ + [""], + [], + None, + "", + 0, + 0.0, + {"long": [], "short": []}, + {"long": [""], "short": [""]}, + ]: + approved_coins = await get_all_eligible_coins(config["backtest"]["exchanges"]) + config["live"]["approved_coins"] = {"long": approved_coins, "short": approved_coins} + + +async def get_all_eligible_coins(exchanges): + oms = {} + for ex in exchanges: + oms[ex] = OHLCVManager(ex, verbose=False) + await asyncio.gather(*[oms[ex].load_markets() for ex in oms]) + approved_coins = set() + for ex in oms: + for s in oms[ex].markets: + if oms[ex].has_coin(s): + coin = symbol_to_coin(s) + if coin: + approved_coins.add(symbol_to_coin(s)) + return sorted(approved_coins) async def main(): - parser = argparse.ArgumentParser(prog="downloader", description="download hlcv data") + parser = argparse.ArgumentParser(prog="downloader", description="download ohlcv data") parser.add_argument( "config_path", type=str, default=None, nargs="?", help="path to json passivbot config" ) template_config = get_template_live_config("v7") del template_config["optimize"] del template_config["bot"] - keep_live_keys = { - "approved_coins", - "minimum_coin_age_days", + template_config["live"] = { + k: v + for k, v in template_config["live"].items() + if k + in { + "approved_coins", + "ignored_coins", + } + } + template_config["backtest"] = { + k: v + for k, v in template_config["backtest"].items() + if k + in { + "combine_ohlcvs", + "end_date", + "start_date", + "exchanges", + } } - del template_config["backtest"]["base_dir"] - for key in sorted(template_config["live"]): - if key not in keep_live_keys: - del template_config["live"][key] add_arguments_recursively(parser, template_config) args = parser.parse_args() if args.config_path is None: logging.info(f"loading default template config configs/template.json") - config = load_config("configs/template.json") + config = load_config("configs/template.json", verbose=False) else: logging.info(f"loading config {args.config_path}") config = load_config(args.config_path) - update_config_with_args(config, args) - config = format_config(config) - for exchange in config["backtest"]["exchanges"]: - for symbol in config["backtest"]["symbols"][exchange]: - try: - data = await load_hlcvs( - symbol, - config["backtest"]["start_date"], - config["backtest"]["end_date"], - exchange=exchange, - ) - except Exception as e: - logging.error(f"Error with {symbol} {e}") - traceback.print_exc() + oms = {} + try: + for ex in config["backtest"]["exchanges"]: + oms[ex] = OHLCVManager( + ex, config["backtest"]["start_date"], config["backtest"]["end_date"] + ) + logging.info("loading markets for {config['backtest']['exchanges']}") + await asyncio.gather(*[oms[ex].load_markets() for ex in oms]) + coins = [x for y in config["live"]["approved_coins"].values() for x in y] + for coin in sorted(set(coins)): + tasks = {} + for ex in oms: + try: + tasks[ex] = asyncio.create_task(oms[ex].get_ohlcvs(coin)) + except Exception as e: + logging.error(f"{ex} {coin} error a with get_ohlcvs() {e}") + for ex in tasks: + try: + await tasks[ex] + except Exception as e: + logging.error(f"{ex} {coin} error b with get_ohlcvs() {e}") + finally: + for om in oms.values(): + if om.cc: + await om.cc.close() if __name__ == "__main__": diff --git a/src/exchanges/binance.py b/src/exchanges/binance.py index 8aaa4efc7..0c07cd31e 100644 --- a/src/exchanges/binance.py +++ b/src/exchanges/binance.py @@ -9,6 +9,7 @@ import numpy as np import json import passivbot_rust as pbr +from copy import deepcopy from pure_funcs import ( floatify, ts_to_date_utc, @@ -52,6 +53,12 @@ def create_ccxt_sessions(self): getattr(self, ccx).options["broker"][key] = "x-" + self.broker_code_spot async def print_new_user_suggestion(self): + between_print_wait_ms = 1000 * 60 * 60 * 4 + if hasattr(self, "previous_user_suggestion_print_ts"): + if utc_ms() - self.previous_user_suggestion_print_ts < between_print_wait_ms: + return + self.previous_user_suggestion_print_ts = utc_ms() + res = None try: res = await self.cca.fapiprivate_get_apireferral_ifnewuser( @@ -84,9 +91,10 @@ async def print_new_user_suggestion(self): print(front_pad + "#" * (max_len + 2) + back_pad) print("\n\n") - async def init_markets(self, verbose=True): + async def execute_to_exchange(self): + res = await super().execute_to_exchange() await self.print_new_user_suggestion() - await super().init_markets(verbose=verbose) + return res def set_market_specific_settings(self): super().set_market_specific_settings() @@ -399,18 +407,22 @@ async def execute_cancellations(self, orders: [dict]) -> [dict]: ) async def execute_order(self, order: dict) -> dict: - executed = await self.cca.create_limit_order( + order_type = order["type"] if "type" in order else "limit" + params = { + "positionSide": order["position_side"].upper(), + "newClientOrderId": order["custom_id"], + } + if order_type == "limit": + params["timeInForce"] = ( + "GTX" if self.config["live"]["time_in_force"] == "post_only" else "GTC" + ) + executed = await self.cca.create_order( + type=order_type, symbol=order["symbol"], side=order["side"], amount=abs(order["qty"]), price=order["price"], - params={ - "positionSide": order["position_side"].upper(), - "newClientOrderId": order["custom_id"], - "timeInForce": ( - "GTX" if self.config["live"]["time_in_force"] == "post_only" else "GTC" - ), - }, + params=params, ) if "info" in executed and "code" in executed["info"] and executed["info"]["code"] == "-5022": logging.info(f"{executed['info']['msg']}") @@ -428,6 +440,14 @@ async def execute_orders(self, orders: [dict]) -> [dict]: return [await self.execute_order(orders[0])] to_execute = [] for order in orders[: self.config["live"]["max_n_creations_per_batch"]]: + params = { + "positionSide": order["position_side"].upper(), + "newClientOrderId": order["custom_id"], + } + if order["type"] == "limit": + params["timeInForce"] = ( + "GTX" if self.config["live"]["time_in_force"] == "post_only" else "GTC" + ) to_execute.append( { "type": "limit", @@ -435,13 +455,7 @@ async def execute_orders(self, orders: [dict]) -> [dict]: "side": order["side"], "amount": abs(order["qty"]), "price": order["price"], - "params": { - "positionSide": order["position_side"].upper(), - "newClientOrderId": order["custom_id"], - "timeInForce": ( - "GTX" if self.config["live"]["time_in_force"] == "post_only" else "GTC" - ), - }, + "params": deepcopy(params), } ) executed = None diff --git a/src/exchanges/bitget.py b/src/exchanges/bitget.py index f7e56f6aa..d2e87c06f 100644 --- a/src/exchanges/bitget.py +++ b/src/exchanges/bitget.py @@ -194,43 +194,69 @@ async def fetch_ohlcv(self, symbol: str, timeframe="1m"): traceback.print_exc() return False - async def fetch_pnls( - self, - start_time: int = None, - end_time: int = None, - limit=None, - ): - all_fetched = {} - params = {"productType": "USDT-FUTURES"} - if end_time: - params["endTime"] = str(int(end_time)) + async def fetch_pnls(self, start_time=None, end_time=None, limit=None): + wait_between_fetches_minimum_seconds = 0.5 + all_res = {} + until = int(end_time) if end_time else None + since = int(start_time) if start_time else None + retry_count = 0 + first_fetch = True while True: - fetched = await self.cca.private_mix_get_v2_mix_order_fill_history(params=params) - fetched = sorted(fetched["data"]["fillList"], key=lambda x: float(x["cTime"])) - if fetched == []: - break - if all(x["orderId"] in all_fetched for x in fetched): - break - for elm in fetched: - elm["symbol"] = self.get_symbol_id_inv(elm["symbol"]) - elm["pnl"] = float(elm["profit"]) - elm["position_side"] = self.position_side_map[elm["side"]][elm["tradeSide"]] - elm["qty"] = float(elm["baseVolume"]) - elm["price"] = float(elm["price"]) - elm["id"] = elm["orderId"] - elm["timestamp"] = float(elm["cTime"]) - elm["datetime"] = ts_to_date_utc(elm["timestamp"]) - all_fetched[elm["id"]] = elm - if start_time and fetched[0]["timestamp"] <= start_time: + if since and until and since >= until: + # print("debug fetch_pnls g") break + sts = utc_ms() + res = await ( + self.cca.fetch_closed_orders(since=since) + if until is None + else self.cca.fetch_closed_orders(since=since, params={"until": until}) + ) + if first_fetch: + if not res: + # print("debug fetch_pnls e") + break + first_fetch = False + if not res: + # print("debug fetch_pnls a retry_count:", retry_count) + if retry_count >= 10: + break + retry_count += 1 + until = int(until - 1000 * 60 * 60 * 4) + continue + resd = {elm["id"]: elm for elm in res} + # if len(resd) != len(res): + # print("debug fetch_pnls b", len(resd), len(res)) + if all(id_ in all_res for id_ in resd): + # print("debug fetch_pnls c retry_count:", retry_count) + if retry_count >= 10: + break + retry_count += 1 + until = int(until - 1000 * 60 * 60 * 4) + continue + retry_count = 0 + for k, v in resd.items(): + all_res[k] = v + all_res[k]["pnl"] = float(v["info"]["totalProfits"]) + all_res[k]["position_side"] = v["info"]["posSide"] if start_time is None and end_time is None: break - logging.info( - f"debug fetching fills {ts_to_date_utc(fetched[0]['timestamp'])} {ts_to_date_utc(fetched[-1]['timestamp'])} {len(fetched)}" + if since and res[0]["timestamp"] <= since: + # print("debug fetch_pnls e") + break + until = int(res[0]["timestamp"]) + # print( + # "debug fetch_pnls d len(res):", + # len(res), + # res[0]["datetime"], + # res[-1]["datetime"], + # (res[-1]["timestamp"] - res[0]["timestamp"]) / (1000 * 60 * 60), + # ) + wait_time_seconds = max( + 0.0, wait_between_fetches_minimum_seconds - (utc_ms() - sts) / 1000 ) - # params = {'idLessThan': fetched[0]['id']} - params["endTime"] = str(int(fetched[0]["timestamp"])) - return sorted([x for x in all_fetched.values()], key=lambda x: x["timestamp"]) + await asyncio.sleep(wait_time_seconds) + all_res_list = sorted(all_res.values(), key=lambda x: x["timestamp"]) + return all_res_list async def execute_cancellation(self, order: dict) -> dict: executed = None @@ -266,9 +292,10 @@ async def execute_cancellations(self, orders: [dict]) -> [dict]: ) async def execute_order(self, order: dict) -> dict: + order_type = order["type"] if "type" in order else "limit" executed = await self.cca.create_order( symbol=order["symbol"], - type="limit", + type=order_type, side=order["side"], amount=abs(order["qty"]), price=order["price"], diff --git a/src/exchanges/bybit.py b/src/exchanges/bybit.py index 9185b1d5e..32306f9c4 100644 --- a/src/exchanges/bybit.py +++ b/src/exchanges/bybit.py @@ -381,7 +381,8 @@ async def execute_cancellations(self, orders: [dict]) -> [dict]: ) async def execute_order(self, order: dict) -> dict: - executed = await self.cca.create_limit_order( + executed = await self.cca.create_order( + type=order["type"] if "type" in order else "limit", symbol=order["symbol"], side=order["side"], amount=abs(order["qty"]), diff --git a/src/exchanges/gateio.py b/src/exchanges/gateio.py index e9bf2d1bf..1a685878c 100644 --- a/src/exchanges/gateio.py +++ b/src/exchanges/gateio.py @@ -310,19 +310,22 @@ async def execute_orders(self, orders: [dict]) -> [dict]: return [] to_execute = [] for order in orders[: self.max_n_creations_per_batch]: + order_type = order["type"] if "type" in order else "limit" + params = { + "reduce_only": order["reduce_only"], + } + if order_type == "limit": + params["timeInForce"] = ( + "poc" if self.config["live"]["time_in_force"] == "post_only" else "gtc" + ) to_execute.append( { "symbol": order["symbol"], - "type": "limit", + "type": order_type, "side": order["side"], "amount": order["qty"], "price": order["price"], - "params": { - "reduce_only": order["reduce_only"], - "timeInForce": ( - "poc" if self.config["live"]["time_in_force"] == "post_only" else "gtc" - ), - }, + "params": params, } ) res = await self.cca.create_orders(to_execute) diff --git a/src/exchanges/hyperliquid.py b/src/exchanges/hyperliquid.py index ebedaeab5..c42df1724 100644 --- a/src/exchanges/hyperliquid.py +++ b/src/exchanges/hyperliquid.py @@ -7,6 +7,7 @@ import traceback import json import numpy as np +import passivbot_rust as pbr from pure_funcs import ( multi_replace, floatify, @@ -66,16 +67,16 @@ def set_market_specific_settings(self): elm = self.markets_dict[symbol] self.symbol_ids[symbol] = elm["id"] self.min_costs[symbol] = ( - 10.1 if elm["limits"]["cost"]["min"] is None else elm["limits"]["cost"]["min"] + 10.0 if elm["limits"]["cost"]["min"] is None else elm["limits"]["cost"]["min"] ) - self.min_costs[symbol] *= 1.1 - self.qty_steps[symbol] = round_(10 ** -elm["precision"]["amount"], 0.0000000001) + self.min_costs[symbol] = pbr.round_(self.min_costs[symbol] * 1.01, 0.01) + self.qty_steps[symbol] = elm["precision"]["amount"] self.min_qtys[symbol] = ( self.qty_steps[symbol] if elm["limits"]["amount"]["min"] is None else elm["limits"]["amount"]["min"] ) - self.price_steps[symbol] = round_(10 ** -elm["precision"]["price"], 0.0000000001) + self.price_steps[symbol] = elm["precision"]["price"] self.c_mults[symbol] = elm["contractSize"] self.max_leverage[symbol] = ( int(elm["info"]["maxLeverage"]) if "maxLeverage" in elm["info"] else 0 @@ -343,7 +344,7 @@ async def execute_orders(self, orders: [dict]) -> [dict]: to_execute.append( { "symbol": order["symbol"], - "type": "limit", + "type": order["type"] if "type" in order else "limit", "side": order["side"], "amount": order["qty"], "price": order["price"], @@ -357,20 +358,56 @@ async def execute_orders(self, orders: [dict]) -> [dict]: }, } ) - res = await self.cca.create_orders( - to_execute, - params=( - {"vaultAddress": self.user_info["wallet_address"]} - if self.user_info["is_vault"] - else {} - ), - ) + try: + res = await self.cca.create_orders( + to_execute, + params=( + {"vaultAddress": self.user_info["wallet_address"]} + if self.user_info["is_vault"] + else {} + ), + ) + except Exception as e: + if self.adjust_min_cost_on_error(e): + return [] + else: + raise executed = [] for ex, order in zip(res, orders): if "info" in ex and "filled" in ex["info"] or "resting" in ex["info"]: executed.append({**ex, **order}) return executed + def adjust_min_cost_on_error(self, error): + any_adjusted = False + successful_orders = [] + str_e = error.args[0] + error_json = json.loads(str_e[str_e.find("{") :]) + if ( + "response" in error_json + and "data" in error_json["response"] + and "statuses" in error_json["response"]["data"] + ): + for elm in error_json["response"]["data"]["statuses"]: + if "error" in elm: + if "Order must have minimum value of $10" in elm["error"]: + asset_id = int(elm["error"][elm["error"].find("asset=") + 6 :]) + for symbol in self.markets_dict: + if ( + "baseId" in self.markets_dict[symbol]["info"] + and self.markets_dict[symbol]["info"]["baseId"] == asset_id + ): + break + else: + raise Exception(f"No symbol match for asset_id={asset_id}") + new_min_cost = pbr.round_(self.min_costs[symbol] * 1.1, 0.1) + logging.info( + f"caught {elm['error']} {symbol}. Upping min_cost from {self.min_costs[symbol]} to {new_min_cost}" + ) + self.min_costs[symbol] = new_min_cost + any_adjusted = True + return any_adjusted + def symbol_is_eligible(self, symbol): try: if ( diff --git a/src/optimize.py b/src/optimize.py index bddfc2efe..58c2e1a73 100644 --- a/src/optimize.py +++ b/src/optimize.py @@ -8,9 +8,11 @@ import subprocess import mmap from multiprocessing import Queue, Process +from collections import defaultdict from backtest import ( prepare_hlcvs_mss, prep_backtest_args, + expand_analysis, ) from pure_funcs import ( get_template_live_config, @@ -20,6 +22,7 @@ sort_dict_keys, calc_hash, flatten, + date_to_ts, ) from procedures import ( make_get_filepath, @@ -30,6 +33,7 @@ add_arguments_recursively, update_config_with_args, ) +from downloader import add_all_eligible_coins_to_config from copy import deepcopy from main import manage_rust_compilation import numpy as np @@ -44,7 +48,7 @@ import time import fcntl from tqdm import tqdm -import dictdiffer # Added import for dictdiffer +import dictdiffer def make_json_serializable(obj): @@ -218,24 +222,42 @@ def cxSimulatedBinaryBoundedWrapper(ind1, ind2, eta, low, up): def individual_to_config(individual, template=None): if template is None: template = get_template_live_config("v7") + keys_ignored = ["enforce_exposure_limit"] config = deepcopy(template) - keys = sorted(config["bot"]["long"]) + keys = [k for k in sorted(config["bot"]["long"]) if k not in keys_ignored] i = 0 for pside in ["long", "short"]: for key in keys: config["bot"][pside][key] = individual[i] i += 1 + is_enabled = ( + config["bot"][pside]["total_wallet_exposure_limit"] > 0.0 + and config["bot"][pside]["n_positions"] > 0.0 + ) + if not is_enabled: + for key in config["bot"][pside]: + if key in keys_ignored: + continue + bounds = config["optimize"]["bounds"][f"{pside}_{key}"] + if len(bounds) == 1: + bounds = [bounds[0], bounds[0]] + config["bot"][pside][key] = min(max(bounds[0], 0.0), bounds[1]) return config def config_to_individual(config, param_bounds): individual = [] + keys_ignored = ["enforce_exposure_limit"] for pside in ["long", "short"]: is_enabled = ( param_bounds[f"{pside}_n_positions"][1] > 0.0 and param_bounds[f"{pside}_total_wallet_exposure_limit"][1] > 0.0 ) - individual += [(v if is_enabled else 0.0) for k, v in sorted(config["bot"][pside].items())] + individual += [ + (v if is_enabled else 0.0) + for k, v in sorted(config["bot"][pside].items()) + if k not in keys_ignored + ] # adjust to bounds bounds = [(low, high) for low, high in param_bounds.values()] adjusted = [max(min(x, bounds[z][1]), bounds[z][0]) for z, x in enumerate(individual)] @@ -307,7 +329,7 @@ def evaluate(self, individual): self.exchange_params[exchange], self.backtest_params[exchange], ) - analyses[exchange] = analysis + analyses[exchange] = expand_analysis(analysis, fills, config) analyses_combined = self.combine_analyses(analyses) w_0, w_1 = self.calc_fitness(analyses_combined) @@ -328,20 +350,38 @@ def combine_analyses(self, analyses): keys = analyses[next(iter(analyses))].keys() for key in keys: values = [analysis[key] for analysis in analyses.values()] - analyses_combined[f"{key}_mean"] = np.mean(values) - analyses_combined[f"{key}_min"] = np.min(values) - analyses_combined[f"{key}_max"] = np.max(values) - analyses_combined[f"{key}_std"] = np.std(values) + if not values or any([x == np.inf for x in values]): + analyses_combined[f"{key}_mean"] = 0.0 + analyses_combined[f"{key}_min"] = 0.0 + analyses_combined[f"{key}_max"] = 0.0 + analyses_combined[f"{key}_std"] = 0.0 + else: + try: + analyses_combined[f"{key}_mean"] = np.mean(values) + analyses_combined[f"{key}_min"] = np.min(values) + analyses_combined[f"{key}_max"] = np.max(values) + analyses_combined[f"{key}_std"] = np.std(values) + except Exception as e: + print("\n\n debug\n\n") + print("values", values) + print(e) + traceback.print_exc() + raise return analyses_combined def calc_fitness(self, analyses_combined): modifier = 0.0 - for i, key in [ - (5, "drawdown_worst"), - (4, "drawdown_worst_mean_1pct"), - (3, "equity_balance_diff_mean"), - (2, "loss_profit_ratio"), - ]: + keys = [ + "drawdown_worst", + "drawdown_worst_mean_1pct", + "equity_balance_diff_neg_max", + "equity_balance_diff_neg_mean", + "equity_balance_diff_pos_max", + "equity_balance_diff_pos_mean", + "loss_profit_ratio", + ] + i = len(keys) + 1 + for key in keys: modifier += ( max( self.config["optimize"]["limits"][f"lower_bound_{key}"], @@ -349,9 +389,10 @@ def calc_fitness(self, analyses_combined): ) - self.config["optimize"]["limits"][f"lower_bound_{key}"] ) * 10**i + i -= 1 if ( analyses_combined["drawdown_worst_max"] >= 1.0 - or analyses_combined["equity_balance_diff_max_max"] >= 1.0 + or analyses_combined["equity_balance_diff_neg_max_max"] >= 1.0 ): w_0 = w_1 = modifier else: @@ -483,23 +524,14 @@ async def main(): ) if args.config_path is None: logging.info(f"loading default template config configs/template.json") - config = load_config("configs/template.json") + config = load_config("configs/template.json", verbose=False) else: logging.info(f"loading config {args.config_path}") - config = load_config(args.config_path) + config = load_config(args.config_path, verbose=False) old_config = deepcopy(config) update_config_with_args(config, args) - config = format_config(config) - exchanges = config["backtest"]["exchanges"] - date_fname = ts_to_date_utc(utc_ms())[:19].replace(":", "_") - coins = sorted( - set([symbol_to_coin(x) for y in config["backtest"]["symbols"].values() for x in y]) - ) - coins_fname = "_".join(coins) if len(coins) <= 6 else f"{len(coins)}_coins" - hash_snippet = uuid4().hex[:8] - config["results_filename"] = make_get_filepath( - f"optimize_results/{date_fname}_{'_'.join(exchanges)}_{coins_fname}_{hash_snippet}_all_results.txt" - ) + config = format_config(config, verbose=False) + await add_all_eligible_coins_to_config(config) try: # Prepare data for each exchange @@ -508,8 +540,16 @@ async def main(): hlcvs_shapes = {} hlcvs_dtypes = {} msss = {} - for exchange in exchanges: - symbols, hlcvs, mss, results_path, cache_dir = await prepare_hlcvs_mss(config, exchange) + config["backtest"]["coins"] = {} + if config["backtest"]["combine_ohlcvs"]: + exchange = "combined" + coins, hlcvs, mss, results_path, cache_dir = await prepare_hlcvs_mss(config, exchange) + exchange_preference = defaultdict(list) + for coin in coins: + exchange_preference[mss[coin]["exchange"]].append(coin) + for ex in exchange_preference: + logging.info(f"chose {ex} for {','.join(exchange_preference[ex])}") + config["backtest"]["coins"][exchange] = coins hlcvs_dict[exchange] = hlcvs hlcvs_shapes[exchange] = hlcvs.shape hlcvs_dtypes[exchange] = hlcvs.dtype @@ -520,7 +560,44 @@ async def main(): shared_memory_file = create_shared_memory_file(hlcvs) shared_memory_files[exchange] = shared_memory_file logging.info(f"Finished creating shared memory file for {exchange}: {shared_memory_file}") + else: + tasks = {} + for exchange in config["backtest"]["exchanges"]: + tasks[exchange] = asyncio.create_task(prepare_hlcvs_mss(config, exchange)) + for exchange in config["backtest"]["exchanges"]: + coins, hlcvs, mss, results_path, cache_dir = await tasks[exchange] + config["backtest"]["coins"][exchange] = coins + hlcvs_dict[exchange] = hlcvs + hlcvs_shapes[exchange] = hlcvs.shape + hlcvs_dtypes[exchange] = hlcvs.dtype + msss[exchange] = mss + required_space = hlcvs.nbytes * 1.1 # Add 10% buffer + check_disk_space(tempfile.gettempdir(), required_space) + logging.info(f"Starting to create shared memory file for {exchange}...") + shared_memory_file = create_shared_memory_file(hlcvs) + shared_memory_files[exchange] = shared_memory_file + logging.info( + f"Finished creating shared memory file for {exchange}: {shared_memory_file}" + ) + exchanges = config["backtest"]["exchanges"] + exchanges_fname = "combined" if config["backtest"]["combine_ohlcvs"] else "_".join(exchanges) + date_fname = ts_to_date_utc(utc_ms())[:19].replace(":", "_") + coins = sorted(set([x for y in config["backtest"]["coins"].values() for x in y])) + coins_fname = "_".join(coins) if len(coins) <= 6 else f"{len(coins)}_coins" + hash_snippet = uuid4().hex[:8] + n_days = int( + round( + ( + date_to_ts(config["backtest"]["end_date"]) + - date_to_ts(config["backtest"]["start_date"]) + ) + / (1000 * 60 * 60 * 24) + ) + ) + config["results_filename"] = make_get_filepath( + f"optimize_results/{date_fname}_{exchanges_fname}_{n_days}days_{coins_fname}_{hash_snippet}_all_results.txt" + ) # Create results queue and start manager process manager = multiprocessing.Manager() results_queue = manager.Queue() diff --git a/src/passivbot.py b/src/passivbot.py index 715c9b2ab..60273671d 100644 --- a/src/passivbot.py +++ b/src/passivbot.py @@ -27,8 +27,7 @@ utc_ms, make_get_filepath, get_file_mod_utc, - get_first_ohlcv_timestamps, - get_first_ohlcv_timestamps_new, + get_first_timestamps_unified, load_config, add_arguments_recursively, update_config_with_args, @@ -37,13 +36,8 @@ coin_to_symbol, read_external_coins_lists, ) -from njit_funcs_recursive_grid import calc_recursive_entries_long, calc_recursive_entries_short from njit_funcs import ( - calc_samples, - calc_emas_last, calc_ema, - calc_close_grid_long, - calc_close_grid_short, calc_diff, calc_min_entry_qty, round_, @@ -166,7 +160,7 @@ def __init__(self, config: dict): self.debug_mode = False async def start_bot(self): - logging.info(f"Starting bot...") + logging.info(f"Starting bot {self.exchange}...") await self.init_markets() await asyncio.sleep(1) logging.info(f"Starting data maintainers...") @@ -272,9 +266,7 @@ async def update_first_timestamps(self, symbols=[]): symbols = sorted(set(symbols + flatten(self.approved_coins_minus_ignored_coins.values()))) if all([s in self.first_timestamps for s in symbols]): return - first_timestamps = await get_first_ohlcv_timestamps_new( - symbols=symbols, exchange=self.exchange - ) + first_timestamps = await get_first_timestamps_unified(symbols) self.first_timestamps.update(first_timestamps) for symbol in sorted(self.first_timestamps): symbolf = self.coin_to_symbol(symbol) @@ -288,7 +280,7 @@ async def update_first_timestamps(self, symbols=[]): def get_first_timestamp(self, symbol): if symbol not in self.first_timestamps: logging.info(f"warning: {symbol} missing from first_timestamps. Setting to zero.") - return 0.0 + self.first_timestamps[symbol] = 0.0 return self.first_timestamps[symbol] def coin_to_symbol(self, coin): @@ -357,9 +349,9 @@ async def execute_to_exchange(self): to_create = self.format_custom_ids(to_create) if self.debug_mode: if to_cancel: - print("would cancel:") - for x in to_cancel[: self.config["live"]["max_n_cancellations_per_batch"]]: - pprint.pprint(x) + print(f"would cancel {len(to_cancel)} orders") + # for x in to_cancel: + # pprint.pprint(x) else: res = await self.execute_cancellations( to_cancel[: self.config["live"]["max_n_cancellations_per_batch"]] @@ -369,9 +361,9 @@ async def execute_to_exchange(self): self.remove_cancelled_order(elm, source="POST") if self.debug_mode: if to_create: - print("would create:") - for x in to_create[: self.config["live"]["max_n_creations_per_batch"]]: - pprint.pprint(x) + print(f"would create {len(to_create)} orders") + # for x in to_create: + # pprint.pprint(x) else: res = None try: @@ -388,6 +380,8 @@ async def execute_to_exchange(self): await self.restart_bot_on_too_many_errors() if to_cancel or to_create: self.previous_REST_update_ts = 0 + if self.debug_mode: + return to_cancel, to_create def is_forager_mode(self, pside=None): if pside is None: @@ -426,10 +420,21 @@ def set_live_configs(self): self.live_configs[symbol]["leverage"] = self.config["live"]["leverage"] if symbol in self.flags and self.flags[symbol].live_config_path is not None: try: - loaded = load_config(self.flags[symbol].live_config_path) - logging.info( - f"successfully loaded {self.flags[symbol].live_config_path} for {symbol}" - ) + if os.path.exists(self.flags[symbol].live_config_path): + loaded = load_config(self.flags[symbol].live_config_path, verbose=False) + logging.info( + f"successfully loaded {self.flags[symbol].live_config_path} for {symbol}" + ) + else: + path2 = os.path.join( + os.path.dirname(self.config["live"]["base_config_path"]), + self.flags[symbol].live_config_path, + ) + if os.path.exists(path2): + loaded = load_config(path2, verbose=False) + logging.info(f"successfully loaded {path2} for {symbol}") + else: + raise for pside in loaded["bot"]: for k, v in loaded["bot"][pside].items(): if k not in skip: @@ -1035,6 +1040,11 @@ async def init_pnls(self): ) else: pnls_cache = await self.fetch_pnls(start_time=age_limit) + if pnls_cache: + try: + json.dump(pnls_cache, open(self.pnls_cache_filepath, "w")) + except Exception as e: + logging.error(f"error dumping pnls to {self.pnls_cache_filepath} {e}") self.pnls = pnls_cache async def update_pnls(self): @@ -1358,6 +1368,7 @@ def calc_ideal_orders(self): self.live_configs[symbol][pside]["close_trailing_qty_pct"], self.live_configs[symbol][pside]["close_trailing_retracement_pct"], self.live_configs[symbol][pside]["close_trailing_threshold_pct"], + bool(self.live_configs[symbol][pside]["enforce_exposure_limit"]), self.live_configs[symbol][pside]["wallet_exposure_limit"], self.balance, self.positions[symbol][pside]["size"], @@ -1378,16 +1389,15 @@ def calc_ideal_orders(self): ideal_orders_f = {} for symbol in ideal_orders: ideal_orders_f[symbol] = [] - with_pprice_diff = [ - (calc_diff(x[1], self.get_last_price(symbol)), x) for x in ideal_orders[symbol] - ] + last_mprice = self.get_last_price(symbol) + with_mprice_diff = [(calc_diff(x[1], last_mprice), x) for x in ideal_orders[symbol]] seen = set() - any_partial = any(["partial" in order[2] for _, order in with_pprice_diff]) - for pprice_diff, order in sorted(with_pprice_diff): + any_partial = any(["partial" in order[2] for _, order in with_mprice_diff]) + for mprice_diff, order in sorted(with_mprice_diff): position_side = "long" if "long" in order[2] else "short" if order[0] == 0.0: continue - if pprice_diff > self.config["live"]["price_distance_threshold"]: + if mprice_diff > self.config["live"]["price_distance_threshold"]: if any_partial and "entry" in order[2]: continue if any([x in order[2] for x in ["initial", "unstuck"]]): @@ -1398,15 +1408,26 @@ def calc_ideal_orders(self): if seen_key in seen: logging.info(f"debug duplicate ideal order {symbol} {order}") continue + order_side = determine_side_from_order_tuple(order) + order_type = "limit" + if self.config["live"]["market_orders_allowed"] and ( + ("grid" in order[2] and mprice_diff < 0.0001) + or ("trailing" in order[2] and mprice_diff < 0.001) + or ("auto_reduce" in order[2] and mprice_diff < 0.001) + or (order_side == "buy" and order[1] >= last_mprice) + or (order_side == "sell" and order[1] <= last_mprice) + ): + order_type = "market" ideal_orders_f[symbol].append( { "symbol": symbol, - "side": determine_side_from_order_tuple(order), + "side": order_side, "position_side": position_side, "qty": abs(order[0]), "price": order[1], "reduce_only": "close" in order[2], "custom_id": order[2], + "type": order_type, } ) seen.add(seen_key) @@ -1415,12 +1436,15 @@ def calc_ideal_orders(self): def calc_unstucking_close(self, ideal_orders): stuck_positions = [] pnls_cumsum = np.array([x["pnl"] for x in self.pnls]).cumsum() + pnls_cumsum_max, pnls_cumsum_last = ( + (pnls_cumsum.max(), pnls_cumsum[-1]) if len(pnls_cumsum) > 0 else (0.0, 0.0) + ) unstuck_allowances = {"long": 0.0, "short": 0.0} for symbol in self.positions: for pside in ["long", "short"]: if ( self.has_position(pside, symbol) - and self.live_configs[symbol][pside]["unstuck_loss_allowance_pct"] > 0.0 + and self.config["bot"][pside]["unstuck_loss_allowance_pct"] > 0.0 ): wallet_exposure = pbr.calc_wallet_exposure( self.c_mults[symbol], @@ -1438,8 +1462,8 @@ def calc_unstucking_close(self, ideal_orders): self.balance, self.config["bot"][pside]["unstuck_loss_allowance_pct"] * self.config["bot"][pside]["total_wallet_exposure_limit"], - pnls_cumsum.max(), - pnls_cumsum[-1], + pnls_cumsum_max, + pnls_cumsum_last, ) if len(pnls_cumsum) > 0 else 0.0 @@ -1630,27 +1654,27 @@ def calc_orders_to_cancel_and_create(self): ] to_cancel += to_cancel_ to_create += to_create_ - to_create_with_pprice_diff = [] + to_create_with_mprice_diff = [] for x in to_create: try: - to_create_with_pprice_diff.append( + to_create_with_mprice_diff.append( (calc_diff(x["price"], self.get_last_price(x["symbol"])), x) ) except Exception as e: - logging.info(f"debug: price missing sort to_create by pprice_diff {x} {e}") - to_create_with_pprice_diff.append((0.0, x)) - to_create_with_pprice_diff.sort(key=lambda x: x[0]) - to_cancel_with_pprice_diff = [] + logging.info(f"debug: price missing sort to_create by mprice_diff {x} {e}") + to_create_with_mprice_diff.append((0.0, x)) + to_create_with_mprice_diff.sort(key=lambda x: x[0]) + to_cancel_with_mprice_diff = [] for x in to_cancel: try: - to_cancel_with_pprice_diff.append( + to_cancel_with_mprice_diff.append( (calc_diff(x["price"], self.get_last_price(x["symbol"])), x) ) except Exception as e: - logging.info(f"debug: price missing sort to_cancel by pprice_diff {x} {e}") - to_cancel_with_pprice_diff.append((0.0, x)) - to_cancel_with_pprice_diff.sort(key=lambda x: x[0]) - return [x[1] for x in to_cancel_with_pprice_diff], [x[1] for x in to_create_with_pprice_diff] + logging.info(f"debug: price missing sort to_cancel by mprice_diff {x} {e}") + to_cancel_with_mprice_diff.append((0.0, x)) + to_cancel_with_mprice_diff.sort(key=lambda x: x[0]) + return [x[1] for x in to_cancel_with_mprice_diff], [x[1] for x in to_create_with_mprice_diff] async def restart_bot_on_too_many_errors(self): if not hasattr(self, "error_counts"): @@ -2242,7 +2266,11 @@ async def shutdown_bot(bot): async def main(): parser = argparse.ArgumentParser(prog="passivbot", description="run passivbot") parser.add_argument( - "config_path", type=str, nargs="?", default=None, help="path to hjson passivbot config" + "config_path", + type=str, + nargs="?", + default="configs/template.json", + help="path to hjson passivbot config", ) template_config = get_template_live_config("v7") @@ -2250,11 +2278,10 @@ async def main(): del template_config["backtest"] add_arguments_recursively(parser, template_config) args = parser.parse_args() - config = load_config( - "configs/template.json" if args.config_path is None else args.config_path, live_only=True - ) + config = load_config(args.config_path, live_only=True) update_config_with_args(config, args) config = format_config(config, live_only=True) + config["live"]["base_config_path"] = args.config_path cooldown_secs = 60 restarts = [] while True: diff --git a/src/procedures.py b/src/procedures.py index 4f050109a..0e1ab6de6 100644 --- a/src/procedures.py +++ b/src/procedures.py @@ -9,11 +9,13 @@ import pprint from copy import deepcopy import argparse +import re from collections import defaultdict from collections.abc import Sized import sys -from typing import Union, Optional, Set, Any +from typing import Union, Optional, Set, Any, List from pathlib import Path +import ccxt.async_support as ccxta try: import hjson @@ -189,14 +191,7 @@ def format_config(config: dict, verbose=True, live_only=False) -> dict: f"changed backtest.exchange: {result['backtest']['exchange']} -> backtest.exchanges: [{result['backtest']['exchange']}]" ) del result["backtest"]["exchange"] - for k0 in template: - for k1 in template[k0]: - if k0 not in result: - raise Exception(f"Fatal: {k0} missing from config") - if k1 not in result[k0]: - result[k0][k1] = template[k0][k1] - if verbose: - print(f"adding missing parameter {k0}.{k1}: {template[k0][k1]}") + add_missing_keys_recursively(template, result, verbose=verbose) if not live_only: for k_coins in ["approved_coins", "ignored_coins"]: path = result["live"][k_coins] @@ -223,40 +218,24 @@ def format_config(config: dict, verbose=True, live_only=False) -> dict: "long": deepcopy(result["live"][k_coins]), "short": deepcopy(result["live"][k_coins]), } - result["backtest"]["symbols"] = {} - for exchange in result["backtest"]["exchanges"]: - eligible_symbols = get_all_eligible_symbols(exchange) - ignored_coins = coins_to_symbols( - set(flatten(result["live"]["ignored_coins"].values())), - eligible_symbols=eligible_symbols, - exchange=exchange, - verbose=verbose, - ) - approved_coins = coins_to_symbols( - set(flatten(result["live"]["approved_coins"].values())), - eligible_symbols=eligible_symbols, - exchange=exchange, - verbose=verbose, - ) - if approved_coins: - result["backtest"]["symbols"][exchange] = [ - x - for x in coins_to_symbols( - sorted(approved_coins), - eligible_symbols=eligible_symbols, - exchange=exchange, - verbose=verbose, - ) - if x not in ignored_coins - ] - else: - result["backtest"]["symbols"][exchange] = [ - s for s in sorted(get_all_eligible_symbols(exchange)) if s not in ignored_coins - ] result["backtest"]["end_date"] = format_end_date(result["backtest"]["end_date"]) return result +def add_missing_keys_recursively(src, dst, parent=[], verbose=True): + for k in src: + if isinstance(src[k], dict): + if k not in dst: + raise Exception(f"Fatal: {k} missing from config") + else: + add_missing_keys_recursively(src[k], dst[k], parent + [k]) + else: + if k not in dst: + if verbose: + print(f"Adding missing key -> val {'.'.join(parent + [k])} -> {src[k]} to config") + dst[k] = src[k] + + def get_all_eligible_symbols(exchange="binance"): exchange_map = { "bybit": "bybit", @@ -329,7 +308,7 @@ def coin_to_symbol(coin, eligible_symbols=None, quote="USDT", verbose=True): print(f"coin_to_symbol {coin} {coinf}: ambiguous coin, multiple candidates {candidates}") else: if verbose: - print(f"coin_to_symbol no candidate symbol for {coin} {coinf}") + print(f"coin_to_symbol no candidate symbol for {coin}, {coinf}") return "" @@ -343,7 +322,7 @@ def coins_to_symbols(coins: [str], eligible_symbols=None, exchange=None, verbose def format_end_date(end_date) -> str: if end_date in ["today", "now", "", None]: ms2day = 1000 * 60 * 60 * 24 - end_date = ts_to_date_utc((utc_ms() - ms2day) // ms2day * ms2day) + end_date = ts_to_date_utc((utc_ms() - ms2day * 2) // ms2day * ms2day) else: end_date = ts_to_date_utc(date_to_ts2(end_date)) return end_date[:10] @@ -367,7 +346,7 @@ def dump_config(config: dict, filepath: str): def dump_pretty_json(data: dict, filepath: str): try: with open(filepath, "w") as f: - f.write(config_pretty_str(sort_dict_keys(data))) + f.write(config_pretty_str(sort_dict_keys(data)) + "\n") except Exception as e: raise Exception(f"failed to dump data {filepath}: {e}") @@ -958,6 +937,207 @@ def print_async_exception(coro): pass +async def get_first_timestamps_unified(coins: List[str], exchange: str = None): + """ + Returns earliest timestamp each coin was found on any exchange by default. + If 'exchange' is specified, returns earliest timestamps specifically for that exchange. + + Batches requests in groups of 10 coins at a time, and dumps results to disk + immediately after each batch is processed. + + :param coins: List of coin symbols to retrieve first-timestamp data for. + :param exchange: Optional string specifying a single exchange (e.g., 'binanceusdm'). + If set, tries to return first timestamps for only that exchange. + :return: Dictionary of coin -> earliest timestamp (ms). If `exchange` is provided, + only entries for the specified exchange are returned. + """ + + async def fetch_ohlcv_with_start(exchange_name, symbol, cc): + """ + Fetch OHLCV data for `symbol` on `exchange_name`, starting from a + specific date range based on the exchange’s known data availability. + Returns a list of candle data. + """ + if exchange_name == "binanceusdm": + # Data starts practically 'forever' in this example + return await cc.fetch_ohlcv(symbol, since=1, timeframe="1d") + + elif exchange_name in ["bybit", "gateio"]: + # Data since 2018 + return await cc.fetch_ohlcv(symbol, since=int(date2ts_utc("2018-01-01")), timeframe="1d") + + elif exchange_name == "okx": + # Monthly timeframe; data since 2018 + return await cc.fetch_ohlcv(symbol, since=int(date2ts_utc("2018-01-01")), timeframe="1M") + + elif exchange_name == "bitget": + # Weekly timeframe; data since 2018 + return await cc.fetch_ohlcv(symbol, since=int(date2ts_utc("2018-01-01")), timeframe="1w") + + else: # e.g., 'hyperliquid' + # Weekly timeframe; data since 2021 + return await cc.fetch_ohlcv(symbol, since=int(date2ts_utc("2021-01-01")), timeframe="1w") + + # Remove duplicates and sort the input coins for consistency + coins = sorted(set(symbol_to_coin(coin) for coin in coins)) + + # Paths to the cache files + cache_fpath = make_get_filepath("caches/first_ohlcv_timestamps_unified.json") + cache_fpath_exchange_specific = "caches/first_ohlcv_timestamps_unified_exchange_specific.json" + + # In-memory dictionaries for storing timestamps + ftss = {} # coin -> earliest timestamp across all exchanges + ftss_exchange_specific = {} # coin -> {exchange -> earliest timestamp} + + # Load main cache if it exists + if os.path.exists(cache_fpath): + try: + with open(cache_fpath, "r") as f: + ftss = json.load(f) + print(f"Loaded from main cache: {cache_fpath}") + except Exception as e: + print(f"Error reading {cache_fpath}: {e}") + + # Load exchange-specific cache if it exists + if os.path.exists(cache_fpath_exchange_specific): + try: + with open(cache_fpath_exchange_specific, "r") as f: + ftss_exchange_specific = json.load(f) + print(f"Loaded from exchange-specific cache: {cache_fpath_exchange_specific}") + except Exception as e: + print(f"Error reading {cache_fpath_exchange_specific}: {e}") + + # If an exchange is specified, handle "binance" alias + if exchange == "binance": + exchange = "binanceusdm" + + # 1) If no exchange is specified and all coins are in ftss, just return ftss + if exchange is None: + if all(coin in ftss for coin in coins): + return ftss + + # 2) If a specific exchange is requested: + else: + # If all coins exist in the exchange-specific cache for that exchange, return them + if all(coin in ftss_exchange_specific for coin in coins): + if all(exchange in ftss_exchange_specific[coin] for coin in coins): + # Return a simplified dict coin->timestamp + return {c: ftss_exchange_specific[c][exchange] for c in coins} + + # Figure out which coins are missing from the main dictionary + missing_coins = {c for c in coins if c not in ftss} + if not missing_coins: + # No missing coins => all already in ftss + return ftss + + print("Missing coins:", sorted(missing_coins)) + + # Map of exchange -> quote currency + exchange_map = { + "okx": "USDT", + "binanceusdm": "USDT", + "bybit": "USDT", + "gateio": "USDT", + "bitget": "USDT", + "hyperliquid": "USDC", + } + + # Initialize ccxt clients for each exchange + ccxt_clients = {} + for ex_name in exchange_map: + ccxt_clients[ex_name] = getattr(ccxta, ex_name)() + ccxt_clients[ex_name].options["defaultType"] = "swap" + try: + print("Loading markets for each exchange...") + await asyncio.gather(*(ccxt_clients[e].load_markets() for e in ccxt_clients)) + + # We'll fetch missing coins in batches of 10 to avoid overloading + BATCH_SIZE = 10 + missing_coins = sorted(missing_coins) + + for i in range(0, len(missing_coins), BATCH_SIZE): + batch = missing_coins[i : i + BATCH_SIZE] + print(f"\nProcessing batch: {batch}") + + # Create tasks for every coin/exchange pair in this batch + tasks = {} + for coin in batch: + tasks[coin] = {} + for ex_name, quote in exchange_map.items(): + # Build list of eligible swap symbols on this exchange + eligible_symbols = [ + s + for s in ccxt_clients[ex_name].markets + if ccxt_clients[ex_name].markets[s]["swap"] + ] + # Convert coin to a symbol recognized by the exchange, e.g. "BTC/USDT" + symbol = coin_to_symbol(coin, eligible_symbols, quote=quote, verbose=False) + if symbol: + tasks[coin][ex_name] = asyncio.create_task( + fetch_ohlcv_with_start(ex_name, symbol, ccxt_clients[ex_name]) + ) + + # Gather all results for this batch + batch_results = {} + for coin in batch: + batch_results[coin] = {} + for ex_name in exchange_map: + if ex_name in tasks[coin]: + try: + data = await tasks[coin][ex_name] + if data: + batch_results[coin][ex_name] = data + print( + f"Fetched {ex_name} {coin} => first candle: {data[0] if data else 'no data'}" + ) + except Exception as e: + print(f"Error fetching {ex_name} {coin}: {e}") + + # Process results for each coin in this batch + for coin in batch: + exchange_data = batch_results.get(coin, {}) + fts_for_this_coin = {ex: 0.0 for ex in exchange_map} # default 0.0 for all + earliest_candidates = [] + + for ex_name, arr in exchange_data.items(): + if arr and len(arr) > 0: + # arr[0][0] is the timestamp in ms + # Only consider "reasonable" timestamps after 2010 + if arr[0][0] > 1262304000000.0: + earliest_candidates.append(arr[0][0]) + fts_for_this_coin[ex_name] = arr[0][0] + + # If any valid timestamps found, keep the earliest + if earliest_candidates: + ftss[coin] = min(earliest_candidates) + else: + print(f"No valid first timestamp for coin {coin}") + ftss[coin] = 0.0 + + # Update the exchange-specific dictionary + ftss_exchange_specific[coin] = fts_for_this_coin + + # Immediately dump updated dictionaries to disk after each batch + with open(cache_fpath, "w") as f: + json.dump(ftss, f, indent=4, sort_keys=True) + + with open(cache_fpath_exchange_specific, "w") as f: + json.dump(ftss_exchange_specific, f, indent=4, sort_keys=True) + + print(f"Finished batch {batch}. Caches updated.") + + # Close all ccxt client sessions + + # If a single exchange was requested, return only those exchange-specific timestamps. + if exchange is not None: + return {coin: ftss_exchange_specific.get(coin, {}).get(exchange, 0.0) for coin in coins} + + # Otherwise, return earliest cross-exchange timestamps + return ftss + finally: + await asyncio.gather(*(ccxt_clients[e].close() for e in ccxt_clients)) + + async def get_first_ohlcv_timestamps_new(symbols=None, exchange="binance"): supported_exchanges = { "binance": "binanceusdm", @@ -1339,20 +1519,23 @@ def create_acronym(full_name, acronyms=set()): i += 1 if i > 100: raise Exception(f"too many acronym duplicates {acronym}") - break shortened_name = full_name for k in [ - "backtest_", - "live_", - "optimize_bounds_", - "optimize_limits_lower_bound_", - "optimize_", - "bot_", + "backtest.", + "live.", + "optimize.bounds.", + "optimize.limits.lower_bound", + "optimize.", + "bot.", ]: - if full_name.startswith(k): - shortened_name = full_name.replace(k, "") + if shortened_name.startswith(k): + shortened_name = shortened_name.replace(k, "") break - acronym = "".join(word[0] for word in shortened_name.split("_")) + + # Split on both '_' and '.' using regex + splitted = re.split(r"[._]+", shortened_name) + acronym = "".join(word[0] for word in splitted if word) # skip any empty splits + if acronym not in acronyms: break acronym += str(i) @@ -1375,7 +1558,7 @@ def add_arguments_recursively(parser, config, prefix="", acronyms=set()): full_name = f"{prefix}{key}" if isinstance(value, dict): - add_arguments_recursively(parser, value, f"{full_name}_", acronyms=acronyms) + add_arguments_recursively(parser, value, f"{full_name}.", acronyms=acronyms) else: acronym = create_acronym(full_name, acronyms) appendix = "" @@ -1388,7 +1571,7 @@ def add_arguments_recursively(parser, config, prefix="", acronyms=set()): elif any([x in full_name for x in ["ignored_coins", "exchanges"]]): type_ = comma_separated_values appendix = "item1,item2,item3,..." - elif "optimize_scoring" in full_name: + elif "scoring" in full_name: type_ = comma_separated_values acronym = "os" appendix = "Examples: adg,sharpe_ratio; mdg,sortino_ratio; ..." @@ -1399,8 +1582,14 @@ def add_arguments_recursively(parser, config, prefix="", acronyms=set()): elif type_ == bool: type_ = str2bool appendix = "[y/n]" + if "combine_ohlcvs" in full_name: + appendix = ( + "If true, combine ohlcvs data from all exchanges into single numpy array, otherwise backtest each exchange separately. " + + appendix + ) parser.add_argument( f"--{full_name}", + f"--{full_name.replace('.', '_')}", f"-{acronym}", type=type_, dest=full_name, @@ -1423,10 +1612,10 @@ def recursive_config_update(config, key, value, path=None): config[key] = value return True - key_split = key.split("_") + key_split = key.split(".") if key_split[0] in config: new_path = path + [key_split[0]] - return recursive_config_update(config[key_split[0]], "_".join(key_split[1:]), value, new_path) + return recursive_config_update(config[key_split[0]], ".".join(key_split[1:]), value, new_path) return False diff --git a/src/pure_funcs.py b/src/pure_funcs.py index f1565e841..cd60089cc 100644 --- a/src/pure_funcs.py +++ b/src/pure_funcs.py @@ -501,15 +501,17 @@ def flatten(lst: list) -> list: return [y for x in lst for y in x] -def get_template_live_config(passivbot_mode="neat_grid"): +def get_template_live_config(passivbot_mode="v7"): if passivbot_mode == "v7": return { "backtest": { "base_dir": "backtests", + "combine_ohlcvs": True, "compress_cache": True, "end_date": "now", - "exchanges": ["binance", "bybit"], - "start_date": "2021-05-01", + "exchanges": ["binance", "bybit", "gateio", "bitget"], + "gap_tolerance_ohlcvs_minutes": 120.0, + "start_date": "2021-04-01", "starting_balance": 100000.0, }, "bot": { @@ -523,6 +525,7 @@ def get_template_live_config(passivbot_mode="neat_grid"): "close_trailing_threshold_pct": 0.008, "ema_span_0": 1318.0, "ema_span_1": 1435.0, + "enforce_exposure_limit": True, "entry_grid_double_down_factor": 0.894, "entry_grid_spacing_pct": 0.04, "entry_grid_spacing_weight": 0.697, @@ -550,6 +553,7 @@ def get_template_live_config(passivbot_mode="neat_grid"): "close_trailing_threshold_pct": 0.008, "ema_span_0": 1318.0, "ema_span_1": 1435.0, + "enforce_exposure_limit": True, "entry_grid_double_down_factor": 0.894, "entry_grid_spacing_pct": 0.04, "entry_grid_spacing_weight": 0.697, @@ -579,6 +583,7 @@ def get_template_live_config(passivbot_mode="neat_grid"): "forced_mode_short": "", "ignored_coins": [], "leverage": 10.0, + "market_orders_allowed": True, "max_n_cancellations_per_batch": 5, "max_n_creations_per_batch": 3, "max_n_restarts_per_day": 10, @@ -648,9 +653,12 @@ def get_template_live_config(passivbot_mode="neat_grid"): "iters": 30000, "limits": { "lower_bound_drawdown_worst": 0.25, - "lower_bound_drawdown_worst_mean_1pct": 0.1, - "lower_bound_equity_balance_diff_mean": 0.03, - "lower_bound_loss_profit_ratio": 0.75, + "lower_bound_drawdown_worst_mean_1pct": 0.15, + "lower_bound_equity_balance_diff_neg_max": 0.35, + "lower_bound_equity_balance_diff_neg_mean": 0.005, + "lower_bound_equity_balance_diff_pos_max": 0.5, + "lower_bound_equity_balance_diff_pos_mean": 0.01, + "lower_bound_loss_profit_ratio": 0.6, }, "mutation_probability": 0.2, "n_cpus": 5, diff --git a/src/tools/extract_best_config.py b/src/tools/extract_best_config.py index 81904e3d5..2a1a8c38a 100644 --- a/src/tools/extract_best_config.py +++ b/src/tools/extract_best_config.py @@ -204,7 +204,6 @@ def process_single(file_location, verbose=False): del best_d["config"] fjson = config_pretty_str(best_d) print_(fjson) - coins = [s.replace("USDT", "") for s in best_d["backtest"]["symbols"]] print_(file_location) full_path = file_location.replace("_all_results.txt", "") + ".json" base_path = os.path.split(full_path)[0] diff --git a/src/tools/generate_mcap_list.py b/src/tools/generate_mcap_list.py new file mode 100644 index 000000000..9ca306059 --- /dev/null +++ b/src/tools/generate_mcap_list.py @@ -0,0 +1,162 @@ +import requests +import json +import argparse +import sys +import os + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) +from pure_funcs import calc_hash, symbol_to_coin, ts_to_date_utc +from procedures import utc_ms + + +def is_stablecoin(elm): + if elm["symbol"] in ["tether", "usdb", "usdy", "tusd", "usd0", "usde"]: + return True + if ( + all([abs(elm[k] - 1.0) < 0.01 for k in ["high_24h", "low_24h", "current_price"]]) + and abs(elm["price_change_24h"]) < 0.01 + ): + return True + return False + + +def get_top_market_caps(n_coins, minimum_market_cap_millions, exchange=None): + # Fetch the top N coins by market cap + markets_url = "https://api.coingecko.com/api/v3/coins/markets" + per_page = 150 + page = 1 + params = { + "vs_currency": "usd", + "order": "market_cap_desc", + "per_page": per_page, + "page": 1, + "sparkline": "false", + } + minimum_market_cap = minimum_market_cap_millions * 1e6 + approved_coins = {} + prev_hash = None + exchange_approved_coins = None + if exchange is not None: + exchanges = exchange.split(",") + import ccxt + + exchange_map = { + "bybit": ("bybit", "USDT"), + "binance": ("binanceusdm", "USDT"), + "bitget": ("bitget", "USDT"), + "hyperliquid": ("hyperliquid", "USDC"), + "gateio": ("gateio", "USDT"), + "okx": ("okx", "USDT"), + } + exchange_approved_coins = set() + for exchange in exchanges: + try: + cc = getattr(ccxt, exchange_map[exchange][0])() + cc.options["defaultType"] = "swap" + markets = cc.fetch_markets() + for elm in markets: + if ( + elm["swap"] + and elm["active"] + and elm["symbol"].endswith(f":{exchange_map[exchange][1]}") + ): + exchange_approved_coins.add(symbol_to_coin(elm["symbol"])) + print(f"Added coin filter for {exchange}") + except Exception as e: + print(f"error loading ccxt for {exchange} {e}") + while len(approved_coins) < n_coins: + response = requests.get(markets_url, params=params) + if response.status_code != 200: + print(f"Error fetching market data: {response.status_code} - {response.text}") + break + market_data = response.json() + new_hash = calc_hash(market_data) + if new_hash == prev_hash: + break + prev_hash = new_hash + added = [] + disapproved = {} + for elm in market_data: + coin = elm["symbol"].upper() + if len(approved_coins) >= n_coins: + print(f"N coins == {n_coins}") + if added: + print(f"Added approved coins {','.join(added)}") + return approved_coins + if elm["market_cap"] < minimum_market_cap: + print("Lowest market cap", coin) + if added: + print(f"Added approved coins {','.join(added)}") + return approved_coins + if is_stablecoin(elm): + disapproved[coin] = "stablecoin" + continue + if exchange_approved_coins is not None and coin not in exchange_approved_coins: + disapproved[coin] = "not_active" + continue + if coin not in approved_coins: + approved_coins[coin] = elm + added.append(coin) + print(f"added approved coins {','.join(added)}") + if disapproved: + for key in set(disapproved.values()): + to_print = [c for c in disapproved if disapproved[c] == key] + print(f"disapproved {key} {','.join(to_print)}") + disapproved = {} + if len(approved_coins) >= n_coins: + break + params["page"] += 1 + return approved_coins + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(prog="mcap generator", description="generate_mcap_list") + parser.add_argument( + f"--n_coins", + f"-n", + type=int, + dest="n_coins", + required=False, + default=100, + help=f"Maxiumum number of top market cap coins. Default=100", + ) + parser.add_argument( + f"--minimum_market_cap_dollars", + f"-m", + type=float, + dest="minimum_market_cap_millions", + required=False, + default=300.0, + help=f"Minimum market cap in millions of USD. Default=300.0", + ) + parser.add_argument( + f"--exchange", + f"-e", + type=str, + dest="exchange", + required=False, + default=None, + help=f"Optional: filter by coins available on exchange. Comma separated values. Default=None", + ) + parser.add_argument( + f"--output", + f"-o", + type=str, + dest="output", + required=False, + default=None, + help="Optional: Output path. Default=configs/approved_coins_{n_coins}_{min_mcap}.json", + ) + args = parser.parse_args() + + market_caps = get_top_market_caps(args.n_coins, args.minimum_market_cap_millions, args.exchange) + if args.output is None: + fname = f"configs/approved_coins_{ts_to_date_utc(utc_ms())[:10]}" + fname += f"_{args.n_coins}_coins_{int(args.minimum_market_cap_millions)}_min_mcap" + if args.exchange is not None: + fname += "_" + "_".join(args.exchange.split(",")) + fname += ".json" + else: + fname = args.output + print(f"Dumping output to {fname}") + json.dump(list(market_caps), open(fname, "w"))