Skip to content

Commit

Permalink
0.9.31 新增指数合成
Browse files Browse the repository at this point in the history
  • Loading branch information
zengbin93 committed Oct 13, 2023
1 parent 77fb860 commit 0c0b30a
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 0 deletions.
1 change: 1 addition & 0 deletions czsc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
get_dir_size,
empty_cache_path,
print_df_sample,
index_composition,
)

# 交易日历工具
Expand Down
1 change: 1 addition & 0 deletions czsc/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from .stats import daily_performance, net_value_stats, subtract_fee
from .signal_analyzer import SignalAnalyzer, SignalPerformance
from .cache import home_path, get_dir_size, empty_cache_path
from .index_composition import index_composition


sorted_freqs = ['Tick', '1分钟', '2分钟', '3分钟', '4分钟', '5分钟', '6分钟', '10分钟', '12分钟',
Expand Down
92 changes: 92 additions & 0 deletions czsc/utils/index_composition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# -*- coding: utf-8 -*-
"""
author: zengbin93
email: [email protected]
create_dt: 2023/10/13 15:01
describe: 因子(特征)处理
"""
import pandas as pd


def index_composition(klines, weights=None, base_point=1000, **kwagrs):
"""设置基点,按收益率加权合成指数
输入:
1. 成分标的K线行情
2. 每个时刻的成分权重
3. 基点
过程:
1. 计算成分标的每根K线的收益
2. 在每个时刻,按成分权重对标的收益加权计算,得到每个时刻的指数涨跌幅
3. 用基点和每个时刻的涨跌幅计算出每个时刻的指数价
输出:指数K线行情
:param klines: K线行情,样例如下:
========= =================== ======= ======= ======= ======= ========== ===========
symbol dt open close high low vol amount
========= =================== ======= ======= ======= ======= ========== ===========
000001.SH 2021-01-04 09:31:00 3473.82 3469.37 3474.33 3468.86 1041014208 13018854400
000001.SH 2021-01-04 09:32:00 3470.18 3467.58 3471.95 3467.58 570367232 7721827328
000001.SH 2021-01-04 09:33:00 3467.11 3466.98 3468.53 3466.94 660060480 8371049984
000001.SH 2021-01-04 09:34:00 3466.83 3463.5 3466.83 3463.4 563931392 7435783168
000001.SH 2021-01-04 09:35:00 3463.23 3460.33 3463.75 3460.33 504271904 6500695552
========= =================== ======= ======= ======= ======= ========== ===========
:param weights: 权重调整记录;索引为dt,columns为成分权重,每个时间截面的权重之和可以不为1,样例如下:
=================== =========== =========== =========== ===========
dt 000001.SH 000016.SH 000300.SH 000905.SH
=================== =========== =========== =========== ===========
2021-01-04 09:31:00 0.244275 0.236822 0.271415 0.204674
2021-01-04 10:10:00 0.250273 0.140398 0.151955 0.294418
2021-01-04 10:54:00 0.127531 0.123941 0.199969 0.19682
=================== =========== =========== =========== ===========
注意:权重调整记录的时间截面必须是K线行情的时间截面的子集,且必须包含K线行情的第一根K线的时间截面
:param base_point: 基点,默认为1000
:return: 指数K线行情,样例如下:
=================== ============ ======== ========== ===========
dt returns price vol amount
=================== ============ ======== ========== ===========
2021-01-04 09:31:00 0 1000 2195268112 31725149952
2021-01-04 09:32:00 -0.000230561 999.769 1187524832 18900989440
2021-01-04 09:33:00 -0.000165153 999.604 1330775568 19418287360
2021-01-04 09:34:00 -0.00103582 998.569 1133046300 17488768512
2021-01-04 09:35:00 -0.00067244 997.897 1025222108 15351070080
=================== ============ ======== ========== ===========
"""
klines["dt"] = pd.to_datetime(klines["dt"])

if 'returns' not in klines.columns.tolist():
data = []
for _, kline in klines.groupby("symbol"):
kline = kline.sort_values("dt", ascending=True).reset_index(drop=True)
kline["returns"] = kline["close"].pct_change()
kline["returns"] = kline["returns"].fillna(0)
data.append(kline)
klines = pd.concat(data, ignore_index=True)

returns = klines.pivot_table(index="dt", columns="symbol", values="returns")

if weights is None:
weights = returns.copy()
weights[:] = 1 / len(returns.columns.tolist())
else:
weights['dt'] = pd.to_datetime(weights['dt'])
assert weights['dt'].min() <= klines['dt'].min(), "权重调整记录的首个时刻必须小于等于成分K线首个时刻"
weights.set_index("dt", inplace=True)
weights = weights.reindex(returns.index, method="ffill", copy=True)

index_ = (returns * weights).sum(axis=1).to_frame("returns")
index_["price"] = base_point * (1 + index_["returns"]).cumprod()
index_['vol'] = klines.groupby("dt")["vol"].sum()
index_['amount'] = klines.groupby("dt")["amount"].sum()
index_ = index_.reset_index()
return index_

0 comments on commit 0c0b30a

Please sign in to comment.