vercel-fix

jookie · Sep 17, 2024 · 8e17dd8 · 8e17dd8
1 parent 553c0e8
commit 8e17dd8
Showing 1 changed file with 8 additions and 290 deletions.
diff --git a/docs/MD/StableBasdelineDowJones.md b/docs/MD/StableBasdelineDowJones.md
@@ -12,9 +12,11 @@
 
 </h2>
 </div>
-# Deep reinforcement learning based stock trading (Stable baselines3 + Dow Jones)
 
-FinRL is an open-source library that uses deep reinforcement learning (DRL) for financial trading decision-making. FinRL supports several DRL libraries, e.g., Stable Baselines3, and ElegantRL. Stable Baselines3 is a high-quality and easy-to-use DRL library implemented in Python. It is built on top of the OpenAI Gym and provides a simple interface to train and evaluate DRL models. In this article, we use Dow Jones as a stock pool, and Stable baselines3 to train DRL agents.
+# Stable baselines3 using Dow Jones Stock pool for Stock trading 
+
+Library that uses deep reinforcement learning (DRL) for financial trading decision-making. Supports several DRL libraries, e.g., Stable Baselines3, and ElegantRL. Stable Baselines3 is a DRL library implemented in Python. 
+It is built on top of the OpenAI Gym and provides a simple interface to train and evaluate DRL models. Hers, we use Dow Jones as a stock pool, and Stable baselines3 to train DRL agents.
 
 ## 1 Task Discription
 
@@ -23,7 +25,7 @@ We train a DRL agent for stock trading. This task is modeled as a Markov Decisio
 We specify the state-action-reward as follows:
 
 ### State s: 
-The state space represents an agent’s perception of the market environment. Just like a human trader analyzing various information, here our agent passively observes many features and learns by interacting with the market environment (usually by replaying historical data).
+The state space represents an agent’s perception of the market environment. Just like a human trader analyzing various information, here our agent passively observes many features and learns by interacting with the market environment by replaying historical data.
 
 ### Action a: 
 The action space includes allowed actions that an agent can take at each state. For example, a ∈ {−1, 0, 1}, where −1, 0, 1 represent selling, holding, and buying. When an action operates multiple shares, a ∈{−k, …, −1, 0, 1, …, k}, e.g.. “Buy 10 shares of AAPL” or “Sell 10 shares of AAPL” are 10 or −10, respectively
@@ -34,7 +36,7 @@ Reward is an incentive for an agent to learn a better policy. For example, it ca
 ### Market environment: 
 30 consituent stocks of Dow Jones Industrial Average (DJIA) index. Accessed at the starting date of the testing period.
 
-The data for this case study is obtained from Yahoo Finance API. The data contains Open-High-Low-Close price and volume.
+The data is obtained from Yahoo Finance API. The data contains Open-High-Low-Close price and volume.
 
 ## 2 Install and import
 
@@ -89,7 +91,7 @@ sys.path.append("../FinRL")
 import itertools
 ```
 
-3. Set parameters and run
+### 3. Set parameters and run
 
 ```python
 We set parameters for the function stock_trading, and run it. The code is here.
@@ -119,289 +121,5 @@ We set parameters for the function stock_trading, and run it. The code is here.
         if_using_sac=if_using_sac,
         if_using_td3=if_using_td3,
     )
-4 Result
-
-
-Assets of agents and DJI
-Appendix
-
-The imported function stock_trading is pasted here for easy follow.
-
-def stock_trading(
-        train_start_date: str,
-        train_end_date: str,
-        trade_start_date: str,
-        trade_end_date: str,
-        if_store_actions: bool = True,
-        if_store_result: bool = True,
-        if_using_a2c: bool = True,
-        if_using_ddpg: bool = True,
-        if_using_ppo: bool = True,
-        if_using_sac: bool = True,
-        if_using_td3: bool = True,
-):
-
-
-    sys.path.append("../FinRL")
-    check_and_make_directories(
-        [DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR]
-    )
-    date_col = "date"
-    tic_col = "tic"
-    df = YahooDownloader(
-        start_date=train_start_date, end_date=trade_end_date, ticker_list=DOW_30_TICKER
-    ).fetch_data()
-    fe = FeatureEngineer(
-        use_technical_indicator=True,
-        tech_indicator_list=INDICATORS,
-        use_vix=True,
-        use_turbulence=True,
-        user_defined_feature=False,
-    )
-
-    processed = fe.preprocess_data(df)
-    list_ticker = processed[tic_col].unique().tolist()
-    list_date = list(
-        pd.date_range(processed[date_col].min(), processed[date_col].max()).astype(str)
-    )
-    combination = list(itertools.product(list_date, list_ticker))
-
-    init_train_trade_data = pd.DataFrame(
-        combination, columns=[date_col, tic_col]
-    ).merge(processed, on=[date_col, tic_col], how="left")
-    init_train_trade_data = init_train_trade_data[
-        init_train_trade_data[date_col].isin(processed[date_col])
-    ]
-    init_train_trade_data = init_train_trade_data.sort_values([date_col, tic_col])
-
-    init_train_trade_data = init_train_trade_data.fillna(0)
-
-    init_train_data = data_split(
-        init_train_trade_data, train_start_date, train_end_date
-    )
-    init_trade_data = data_split(
-        init_train_trade_data, trade_start_date, trade_end_date
-    )
-
-    stock_dimension = len(init_train_data.tic.unique())
-    state_space = 1 + 2 * stock_dimension + len(INDICATORS) * stock_dimension
-    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
-    buy_cost_list = sell_cost_list = [0.001] * stock_dimension
-    num_stock_shares = [0] * stock_dimension
-
-    initial_amount = 1000000
-    env_kwargs = {
-        "hmax": 100,
-        "initial_amount": initial_amount,
-        "num_stock_shares": num_stock_shares,
-        "buy_cost_pct": buy_cost_list,
-        "sell_cost_pct": sell_cost_list,
-        "state_space": state_space,
-        "stock_dim": stock_dimension,
-        "tech_indicator_list": INDICATORS,
-        "action_space": stock_dimension,
-        "reward_scaling": 1e-4,
-    }
-
-    e_train_gym = StockTradingEnv(df=init_train_data, **env_kwargs)
-
-    env_train, _ = e_train_gym.get_sb_env()
-    print(type(env_train))
-
-    if if_using_a2c:
-        agent = DRLAgent(env=env_train)
-        model_a2c = agent.get_model("a2c")
-        # set up logger
-        tmp_path = RESULTS_DIR + "/a2c"
-        new_logger_a2c = configure(tmp_path, ["stdout", "csv", "tensorboard"])
-        # Set new logger
-        model_a2c.set_logger(new_logger_a2c)
-        trained_a2c = agent.train_model(
-            model=model_a2c, tb_log_name="a2c", total_timesteps=50000
-        )
-
-    if if_using_ddpg:
-        agent = DRLAgent(env=env_train)
-        model_ddpg = agent.get_model("ddpg")
-        # set up logger
-        tmp_path = RESULTS_DIR + "/ddpg"
-        new_logger_ddpg = configure(tmp_path, ["stdout", "csv", "tensorboard"])
-        # Set new logger
-        model_ddpg.set_logger(new_logger_ddpg)
-        trained_ddpg = agent.train_model(
-            model=model_ddpg, tb_log_name="ddpg", total_timesteps=50000
-        )
-
-    if if_using_ppo:
-        agent = DRLAgent(env=env_train)
-        PPO_PARAMS = {
-            "n_steps": 2048,
-            "ent_coef": 0.01,
-            "learning_rate": 0.00025,
-            "batch_size": 128,
-        }
-        model_ppo = agent.get_model("ppo", model_kwargs=PPO_PARAMS)
-        # set up logger
-        tmp_path = RESULTS_DIR + "/ppo"
-        new_logger_ppo = configure(tmp_path, ["stdout", "csv", "tensorboard"])
-        # Set new logger
-        model_ppo.set_logger(new_logger_ppo)
-        trained_ppo = agent.train_model(
-            model=model_ppo, tb_log_name="ppo", total_timesteps=50000
-        )
-
-    if if_using_sac:
-        agent = DRLAgent(env=env_train)
-        SAC_PARAMS = {
-            "batch_size": 128,
-            "buffer_size": 100000,
-            "learning_rate": 0.0001,
-            "learning_starts": 100,
-            "ent_coef": "auto_0.1",
-        }
-        model_sac = agent.get_model("sac", model_kwargs=SAC_PARAMS)
-        # set up logger
-        tmp_path = RESULTS_DIR + "/sac"
-        new_logger_sac = configure(tmp_path, ["stdout", "csv", "tensorboard"])
-        # Set new logger
-        model_sac.set_logger(new_logger_sac)
-        trained_sac = agent.train_model(
-            model=model_sac, tb_log_name="sac", total_timesteps=50000
-        )
-
-    if if_using_td3:
-        agent = DRLAgent(env=env_train)
-        TD3_PARAMS = {"batch_size": 100, "buffer_size": 1000000, "learning_rate": 0.001}
-        model_td3 = agent.get_model("td3", model_kwargs=TD3_PARAMS)
-        # set up logger
-        tmp_path = RESULTS_DIR + "/td3"
-        new_logger_td3 = configure(tmp_path, ["stdout", "csv", "tensorboard"])
-        # Set new logger
-        model_td3.set_logger(new_logger_td3)
-        trained_td3 = agent.train_model(
-            model=model_td3, tb_log_name="td3", total_timesteps=50000
-        )
-
-    # trade
-    e_trade_gym = StockTradingEnv(
-        df=init_trade_data,
-        turbulence_threshold=70,
-        risk_indicator_col="vix",
-        **env_kwargs,
-    )
-    # env_trade, obs_trade = e_trade_gym.get_sb_env()
-
-    if if_using_a2c:
-        result_a2c, actions_a2c = DRLAgent.DRL_prediction(
-            model=trained_a2c, environment=e_trade_gym
-        )
-
-    if if_using_ddpg:
-        result_ddpg, actions_ddpg = DRLAgent.DRL_prediction(
-            model=trained_ddpg, environment=e_trade_gym
-        )
-
-    if if_using_ppo:
-        result_ppo, actions_ppo = DRLAgent.DRL_prediction(
-            model=trained_ppo, environment=e_trade_gym
-        )
-
-    if if_using_sac:
-        result_sac, actions_sac = DRLAgent.DRL_prediction(
-            model=trained_sac, environment=e_trade_gym
-        )
-
-    if if_using_td3:
-        result_td3, actions_td3 = DRLAgent.DRL_prediction(
-            model=trained_td3, environment=e_trade_gym
-        )
-
-    # in python version, we should check isinstance, but in notebook version, it is not necessary
-    if if_using_a2c and isinstance(result_a2c, tuple):
-        actions_a2c = result_a2c[1]
-        result_a2c = result_a2c[0]
-    if if_using_ddpg and isinstance(result_ddpg, tuple):
-        actions_ddpg = result_ddpg[1]
-        result_ddpg = result_ddpg[0]
-    if if_using_ppo and isinstance(result_ppo, tuple):
-        actions_ppo = result_ppo[1]
-        result_ppo = result_ppo[0]
-    if if_using_sac and isinstance(result_sac, tuple):
-        actions_sac = result_sac[1]
-        result_sac = result_sac[0]
-    if if_using_td3 and isinstance(result_td3, tuple):
-        actions_td3 = result_td3[1]
-        result_td3 = result_td3[0]
-
-    # store actions
-    if if_store_actions:
-        actions_a2c.to_csv("actions_a2c.csv") if if_using_a2c else None
-        actions_ddpg.to_csv("actions_ddpg.csv") if if_using_ddpg else None
-        actions_td3.to_csv("actions_td3.csv") if if_using_td3 else None
-        actions_ppo.to_csv("actions_ppo.csv") if if_using_ppo else None
-        actions_sac.to_csv("actions_sac.csv") if if_using_sac else None
-
-    # dji
-    dji_ = get_baseline(ticker="^DJI", start=trade_start_date, end=trade_end_date)
-    dji = pd.DataFrame()
-    dji[date_col] = dji_[date_col]
-    dji["DJI"] = dji_["close"]
-    # select the rows between trade_start and trade_end (not included), since some values may not in this region
-    dji = dji.loc[
-        (dji[date_col] >= trade_start_date) & (dji[date_col] < trade_end_date)
-        ]
-
-    result = dji
-
-    if if_using_a2c:
-        result_a2c.rename(columns={"account_value": "A2C"}, inplace=True)
-        result = pd.merge(result, result_a2c, how="left")
-    if if_using_ddpg:
-        result_ddpg.rename(columns={"account_value": "DDPG"}, inplace=True)
-        result = pd.merge(result, result_ddpg, how="left")
-    if if_using_td3:
-        result_td3.rename(columns={"account_value": "TD3"}, inplace=True)
-        result = pd.merge(result, result_td3, how="left")
-    if if_using_ppo:
-        result_ppo.rename(columns={"account_value": "PPO"}, inplace=True)
-        result = pd.merge(result, result_ppo, how="left")
-    if if_using_sac:
-        result_sac.rename(columns={"account_value": "SAC"}, inplace=True)
-        result = pd.merge(result, result_sac, how="left")
-
-    # remove the rows with nan
-    result = result.dropna(axis=0, how="any")
-
-    # calc the column name of strategies, including DJI
-    col_strategies = []
-    for col in result.columns:
-        if col != date_col and col != "" and "Unnamed" not in col:
-            col_strategies.append(col)
-
-    # make sure that the first row is initial_amount
-    for col in col_strategies:
-        if result[col].iloc[0] != initial_amount:
-            result[col] = result[col] / result[col].iloc[0] * initial_amount
-    result = result.reset_index(drop=True)
-
-    # stats
-    for col in col_strategies:
-        stats = backtest_stats(result, value_col_name=col)
-        print("\nstats of " + col + ": \n", stats)
-
-    # print and save result
-    print("result: ", result)
-    if if_store_result:
-        result.to_csv("result.csv")
+### 4 Result
 
-    # plot fig
-    plot_return(
-        result=result,
-        column_as_x=date_col,
-        if_need_calc_return=True,
-        savefig_filename="stock_trading.png",
-        xlabel="Date",
-        ylabel="Return",
-        if_transfer_date=True,
-        num_days_xticks=20,
-    )