microsoft/qlib/main 758k tokens More Tools
```
├── .commitlintrc.js (200 tokens)
├── .deepsource.toml
├── .dockerignore
├── .github/
   ├── ISSUE_TEMPLATE/
      ├── bug-report.md (200 tokens)
      ├── documentation.md
      ├── feature-request.md (100 tokens)
      ├── question.md (100 tokens)
   ├── PULL_REQUEST_TEMPLATE.md (500 tokens)
   ├── brew_install.sh (3.8k tokens)
   ├── release-drafter.yml (100 tokens)
   ├── workflows/
      ├── lint_title.yml (200 tokens)
      ├── release.yml (700 tokens)
      ├── stale.yml (200 tokens)
      ├── test_qlib_from_pip.yml (300 tokens)
      ├── test_qlib_from_source.yml (900 tokens)
      ├── test_qlib_from_source_slow.yml (300 tokens)
├── .gitignore (100 tokens)
├── .mypy.ini (100 tokens)
├── .pre-commit-config.yaml (100 tokens)
├── .pylintrc
├── .readthedocs.yaml (100 tokens)
├── CHANGELOG.md
├── CHANGES.rst (1500 tokens)
├── CODE_OF_CONDUCT.md (100 tokens)
├── Dockerfile (200 tokens)
├── LICENSE (omitted)
├── MANIFEST.in
├── Makefile (1800 tokens)
├── README.md (8.6k tokens)
├── SECURITY.md (600 tokens)
├── build_docker_image.sh (200 tokens)
├── docs/
   ├── FAQ/
      ├── FAQ.rst (1300 tokens)
   ├── Makefile (100 tokens)
   ├── _static/
      ├── demo.sh
      ├── img/
         ├── QlibRL_framework.png
         ├── RL_framework.png
         ├── Task-Gen-Recorder-Collector.svg (40.6k tokens)
         ├── analysis/
            ├── analysis_model_IC.png
            ├── analysis_model_NDQ.png
            ├── analysis_model_auto_correlation.png
            ├── analysis_model_cumulative_return.png
            ├── analysis_model_long_short.png
            ├── analysis_model_monthly_IC.png
            ├── cumulative_return_buy.png
            ├── cumulative_return_buy_minus_sell.png
            ├── cumulative_return_hold.png
            ├── cumulative_return_sell.png
            ├── rank_label_buy.png
            ├── rank_label_hold.png
            ├── rank_label_sell.png
            ├── report.png
            ├── risk_analysis_annualized_return.png
            ├── risk_analysis_bar.png
            ├── risk_analysis_information_ratio.png
            ├── risk_analysis_max_drawdown.png
            ├── risk_analysis_std.png
            ├── score_ic.png
         ├── change doc.gif
         ├── framework-abstract.jpg
         ├── framework.png
         ├── framework.svg (29.5k tokens)
         ├── logo/
            ├── 1.png
            ├── 2.png
            ├── 3.png
            ├── white_bg_rec+word.png
            ├── yel_bg_rec+word.png
            ├── yellow_bg_rec+word .png
            ├── yellow_bg_rec.png
         ├── online_serving.png
         ├── qrcode/
            ├── gitter_qr.png
         ├── rdagent_logo.png
         ├── topk_drop.png
   ├── advanced/
      ├── PIT.rst (1500 tokens)
      ├── alpha.rst (600 tokens)
      ├── serial.rst (400 tokens)
      ├── server.rst (200 tokens)
      ├── task_management.rst (1200 tokens)
   ├── changelog/
      ├── changelog.rst
   ├── component/
      ├── data.rst (5.6k tokens)
      ├── highfreq.rst (800 tokens)
      ├── meta.rst (600 tokens)
      ├── model.rst (1000 tokens)
      ├── online.rst (400 tokens)
      ├── recorder.rst (1400 tokens)
      ├── report.rst (2.4k tokens)
      ├── rl/
         ├── framework.rst (900 tokens)
         ├── guidance.rst (900 tokens)
         ├── overall.rst (1600 tokens)
         ├── quickstart.rst (1500 tokens)
         ├── toctree.rst (100 tokens)
      ├── strategy.rst (2.9k tokens)
      ├── workflow.rst (2.2k tokens)
   ├── conf.py (1300 tokens)
   ├── developer/
      ├── code_standard_and_dev_guide.rst (500 tokens)
      ├── how_to_build_image.rst (500 tokens)
   ├── hidden/
      ├── client.rst (1700 tokens)
      ├── online.rst (1700 tokens)
      ├── tuner.rst (2.9k tokens)
   ├── index.rst (400 tokens)
   ├── introduction/
      ├── introduction.rst (800 tokens)
      ├── quick.rst (800 tokens)
   ├── make.bat (200 tokens)
   ├── reference/
      ├── api.rst (900 tokens)
   ├── requirements.txt
   ├── start/
      ├── getdata.rst (1300 tokens)
      ├── initialization.rst (1100 tokens)
      ├── installation.rst (200 tokens)
      ├── integration.rst (1500 tokens)
├── examples/
   ├── README.md (100 tokens)
   ├── benchmarks/
      ├── ADARNN/
         ├── README.md (100 tokens)
         ├── requirements.txt
         ├── workflow_config_adarnn_Alpha360.yaml (500 tokens)
      ├── ADD/
         ├── README.md
         ├── requirements.txt
         ├── workflow_config_add_Alpha360.yaml (500 tokens)
      ├── ALSTM/
         ├── README.md (100 tokens)
         ├── requirements.txt
         ├── workflow_config_alstm_Alpha158.yaml (600 tokens)
         ├── workflow_config_alstm_Alpha360.yaml (500 tokens)
      ├── CatBoost/
         ├── README.md (100 tokens)
         ├── requirements.txt
         ├── workflow_config_catboost_Alpha158.yaml (400 tokens)
         ├── workflow_config_catboost_Alpha158_csi500.yaml (400 tokens)
         ├── workflow_config_catboost_Alpha360.yaml (500 tokens)
         ├── workflow_config_catboost_Alpha360_csi500.yaml (500 tokens)
      ├── DoubleEnsemble/
         ├── README.md (200 tokens)
         ├── requirements.txt
         ├── workflow_config_doubleensemble_Alpha158.yaml (500 tokens)
         ├── workflow_config_doubleensemble_Alpha158_csi500.yaml (500 tokens)
         ├── workflow_config_doubleensemble_Alpha360.yaml (600 tokens)
         ├── workflow_config_doubleensemble_Alpha360_csi500.yaml (600 tokens)
         ├── workflow_config_doubleensemble_early_stop_Alpha158.yaml (500 tokens)
      ├── GATs/
         ├── README.md (100 tokens)
         ├── requirements.txt
         ├── workflow_config_gats_Alpha158.yaml (600 tokens)
         ├── workflow_config_gats_Alpha360.yaml (500 tokens)
      ├── GRU/
         ├── README.md
         ├── csi300_gru_ts.pkl
         ├── model_gru_csi300.pkl
         ├── requirements.txt
         ├── workflow_config_gru_Alpha158.yaml (600 tokens)
         ├── workflow_config_gru_Alpha360.yaml (500 tokens)
      ├── GeneralPtNN/
         ├── README.md (200 tokens)
         ├── workflow_config_gru.yaml (600 tokens)
         ├── workflow_config_gru2mlp.yaml (600 tokens)
         ├── workflow_config_mlp.yaml (600 tokens)
      ├── HIST/
         ├── README.md (100 tokens)
         ├── qlib_csi300_stock_index.npy
         ├── requirements.txt
         ├── workflow_config_hist_Alpha360.yaml (500 tokens)
      ├── IGMTF/
         ├── README.md
         ├── requirements.txt
         ├── workflow_config_igmtf_Alpha360.yaml (500 tokens)
      ├── KRNN/
         ├── README.md (100 tokens)
         ├── requirements.txt
         ├── workflow_config_krnn_Alpha360.yaml (500 tokens)
      ├── LSTM/
         ├── README.md
         ├── csi300_lstm_ts.pkl
         ├── model_lstm_csi300.pkl
         ├── requirements.txt
         ├── workflow_config_lstm_Alpha158.yaml (600 tokens)
         ├── workflow_config_lstm_Alpha360.yaml (500 tokens)
      ├── LightGBM/
         ├── README.md (100 tokens)
         ├── features_resample_N.py (100 tokens)
         ├── features_sample.py (100 tokens)
         ├── multi_freq_handler.py (1400 tokens)
         ├── requirements.txt
         ├── workflow_config_lightgbm_Alpha158.yaml (400 tokens)
         ├── workflow_config_lightgbm_Alpha158_csi500.yaml (400 tokens)
         ├── workflow_config_lightgbm_Alpha158_multi_freq.yaml (500 tokens)
         ├── workflow_config_lightgbm_Alpha360.yaml (500 tokens)
         ├── workflow_config_lightgbm_Alpha360_csi500.yaml (500 tokens)
         ├── workflow_config_lightgbm_configurable_dataset.yaml (600 tokens)
         ├── workflow_config_lightgbm_multi_freq.yaml (500 tokens)
      ├── Linear/
         ├── requirements.txt
         ├── workflow_config_linear_Alpha158.yaml (400 tokens)
         ├── workflow_config_linear_Alpha158_csi500.yaml (400 tokens)
         ├── workflow_config_linear_Alpha158_multi_pass_bt.yaml (500 tokens)
      ├── Localformer/
         ├── README.md
         ├── requirements.txt
         ├── workflow_config_localformer_Alpha158.yaml (500 tokens)
         ├── workflow_config_localformer_Alpha360.yaml (400 tokens)
      ├── MLP/
         ├── README.md
         ├── requirements.txt
         ├── workflow_config_mlp_Alpha158.yaml (500 tokens)
         ├── workflow_config_mlp_Alpha158_csi500.yaml (500 tokens)
         ├── workflow_config_mlp_Alpha360.yaml (500 tokens)
         ├── workflow_config_mlp_Alpha360_csi500.yaml (500 tokens)
      ├── README.md (3.2k tokens)
      ├── SFM/
         ├── README.md (100 tokens)
         ├── requirements.txt
         ├── workflow_config_sfm_Alpha360.yaml (500 tokens)
      ├── Sandwich/
         ├── README.md (100 tokens)
         ├── requirements.txt
         ├── workflow_config_sandwich_Alpha360.yaml (500 tokens)
      ├── TCN/
         ├── README.md
         ├── requirements.txt
         ├── workflow_config_tcn_Alpha158.yaml (600 tokens)
         ├── workflow_config_tcn_Alpha360.yaml (500 tokens)
      ├── TCTS/
         ├── README.md (1100 tokens)
         ├── requirements.txt
         ├── workflow.png
         ├── workflow_config_tcts_Alpha360.yaml (600 tokens)
      ├── TFT/
         ├── README.md (200 tokens)
         ├── data_formatters/
            ├── __init__.py (100 tokens)
            ├── base.py (1600 tokens)
            ├── qlib_Alpha158.py (1700 tokens)
         ├── expt_settings/
            ├── __init__.py (100 tokens)
            ├── configs.py (700 tokens)
         ├── libs/
            ├── __init__.py (100 tokens)
            ├── hyperparam_opt.py (3.1k tokens)
            ├── tft_model.py (9.7k tokens)
            ├── utils.py (1400 tokens)
         ├── requirements.txt
         ├── tft.py (2.2k tokens)
         ├── workflow_config_tft_Alpha158.yaml (400 tokens)
      ├── TRA/
         ├── README.md (800 tokens)
         ├── Reports.ipynb (15.6k tokens)
         ├── configs/
            ├── config_alstm.yaml (300 tokens)
            ├── config_alstm_tra.yaml (300 tokens)
            ├── config_alstm_tra_init.yaml (300 tokens)
            ├── config_transformer.yaml (300 tokens)
            ├── config_transformer_tra.yaml (300 tokens)
            ├── config_transformer_tra_init.yaml (300 tokens)
         ├── data/
            ├── README.md
         ├── example.py (200 tokens)
         ├── requirements.txt
         ├── run.sh (200 tokens)
         ├── src/
            ├── dataset.py (1800 tokens)
            ├── model.py (3.9k tokens)
         ├── workflow_config_tra_Alpha158.yaml (700 tokens)
         ├── workflow_config_tra_Alpha158_full.yaml (600 tokens)
         ├── workflow_config_tra_Alpha360.yaml (600 tokens)
      ├── TabNet/
         ├── README.md
         ├── requirements.txt
         ├── workflow_config_TabNet_Alpha158.yaml (500 tokens)
         ├── workflow_config_TabNet_Alpha360.yaml (500 tokens)
      ├── Transformer/
         ├── README.md
         ├── requirements.txt
         ├── workflow_config_transformer_Alpha158.yaml (500 tokens)
         ├── workflow_config_transformer_Alpha360.yaml (500 tokens)
      ├── XGBoost/
         ├── README.md
         ├── requirements.txt
         ├── workflow_config_xgboost_Alpha158.yaml (400 tokens)
         ├── workflow_config_xgboost_Alpha360.yaml (400 tokens)
   ├── benchmarks_dynamic/
      ├── DDG-DA/
         ├── Makefile
         ├── README.md (500 tokens)
         ├── requirements.txt
         ├── vis_data.py (500 tokens)
         ├── workflow.py (300 tokens)
      ├── README.md (400 tokens)
      ├── baseline/
         ├── README.md (100 tokens)
         ├── rolling_benchmark.py (300 tokens)
         ├── workflow_config_lightgbm_Alpha158.yaml (400 tokens)
         ├── workflow_config_linear_Alpha158.yaml (400 tokens)
   ├── data_demo/
      ├── README.md
      ├── data_cache_demo.py (400 tokens)
      ├── data_mem_resuse_demo.py (400 tokens)
   ├── highfreq/
      ├── README.md (400 tokens)
      ├── highfreq_handler.py (1200 tokens)
      ├── highfreq_ops.py (900 tokens)
      ├── highfreq_processor.py (700 tokens)
      ├── workflow.py (1100 tokens)
      ├── workflow_config_High_Freq_Tree_Alpha158.yaml (400 tokens)
   ├── hyperparameter/
      ├── LightGBM/
         ├── Readme.md (100 tokens)
         ├── hyperparameter_158.py (400 tokens)
         ├── hyperparameter_360.py (400 tokens)
         ├── requirements.txt
   ├── model_interpreter/
      ├── feature.py (200 tokens)
   ├── model_rolling/
      ├── requirements.txt
      ├── task_manager_rolling.py (900 tokens)
   ├── nested_decision_execution/
      ├── README.md (200 tokens)
      ├── workflow.py (3.1k tokens)
   ├── online_srv/
      ├── online_management_simulate.py (1200 tokens)
      ├── rolling_online_management.py (1200 tokens)
      ├── update_online_pred.py (400 tokens)
   ├── orderbook_data/
      ├── README.md (400 tokens)
      ├── create_dataset.py (2.5k tokens)
      ├── example.py (2.5k tokens)
   ├── portfolio/
      ├── README.md (300 tokens)
      ├── config_enhanced_indexing.yaml (400 tokens)
      ├── prepare_riskdata.py (300 tokens)
   ├── rl/
      ├── simple_example.ipynb (14k tokens)
   ├── rl_order_execution/
      ├── README.md (1200 tokens)
      ├── exp_configs/
         ├── backtest_opds.yml (300 tokens)
         ├── backtest_ppo.yml (300 tokens)
         ├── backtest_twap.yml (100 tokens)
         ├── train_opds.yml (300 tokens)
         ├── train_ppo.yml (300 tokens)
      ├── scripts/
         ├── gen_pickle_data.py (400 tokens)
         ├── gen_training_orders.py (400 tokens)
         ├── merge_orders.py (100 tokens)
         ├── pickle_data_config.yml (600 tokens)
   ├── rolling_process_data/
      ├── README.md (100 tokens)
      ├── rolling_handler.py (200 tokens)
      ├── workflow.py (1100 tokens)
   ├── run_all_model.py (3.3k tokens)
   ├── tutorial/
      ├── detailed_workflow.ipynb (5.9k tokens)
   ├── workflow_by_code.ipynb (2.3k tokens)
   ├── workflow_by_code.py (600 tokens)
├── pyproject.toml (700 tokens)
├── qlib/
   ├── __init__.py (2.5k tokens)
   ├── backtest/
      ├── __init__.py (2.4k tokens)
      ├── account.py (3.6k tokens)
      ├── backtest.py (800 tokens)
      ├── decision.py (4.4k tokens)
      ├── exchange.py (8.8k tokens)
      ├── executor.py (5.3k tokens)
      ├── high_performance_ds.py (4.7k tokens)
      ├── position.py (4k tokens)
      ├── profit_attribution.py (3k tokens)
      ├── report.py (5.6k tokens)
      ├── signal.py (800 tokens)
      ├── utils.py (2.1k tokens)
   ├── cli/
      ├── __init__.py
      ├── data.py
      ├── run.py (1000 tokens)
   ├── config.py (3.8k tokens)
   ├── constant.py (100 tokens)
   ├── contrib/
      ├── __init__.py
      ├── data/
         ├── __init__.py
         ├── data.py (400 tokens)
         ├── dataset.py (2.8k tokens)
         ├── handler.py (1000 tokens)
         ├── highfreq_handler.py (3.7k tokens)
         ├── highfreq_processor.py (600 tokens)
         ├── highfreq_provider.py (2.5k tokens)
         ├── loader.py (3.2k tokens)
         ├── processor.py (1000 tokens)
         ├── utils/
            ├── __init__.py
            ├── sepdf.py (1400 tokens)
      ├── eva/
         ├── __init__.py
         ├── alpha.py (1400 tokens)
      ├── evaluate.py (3k tokens)
      ├── evaluate_portfolio.py (1300 tokens)
      ├── meta/
         ├── __init__.py
         ├── data_selection/
            ├── __init__.py
            ├── dataset.py (3.9k tokens)
            ├── model.py (1400 tokens)
            ├── net.py (600 tokens)
            ├── utils.py (800 tokens)
      ├── model/
         ├── __init__.py (300 tokens)
         ├── catboost_model.py (800 tokens)
         ├── double_ensemble.py (2.4k tokens)
         ├── gbdt.py (1000 tokens)
         ├── highfreq_gdbt_model.py (1400 tokens)
         ├── linear.py (800 tokens)
         ├── pytorch_adarnn.py (5.6k tokens)
         ├── pytorch_add.py (4.3k tokens)
         ├── pytorch_alstm.py (2.3k tokens)
         ├── pytorch_alstm_ts.py (2.3k tokens)
         ├── pytorch_gats.py (2.5k tokens)
         ├── pytorch_gats_ts.py (2.6k tokens)
         ├── pytorch_general_nn.py (2.4k tokens)
         ├── pytorch_gru.py (2.1k tokens)
         ├── pytorch_gru_ts.py (2000 tokens)
         ├── pytorch_hist.py (3.7k tokens)
         ├── pytorch_igmtf.py (3.2k tokens)
         ├── pytorch_krnn.py (3.2k tokens)
         ├── pytorch_localformer.py (2.2k tokens)
         ├── pytorch_localformer_ts.py (2.1k tokens)
         ├── pytorch_lstm.py (1900 tokens)
         ├── pytorch_lstm_ts.py (1900 tokens)
         ├── pytorch_nn.py (3.6k tokens)
         ├── pytorch_sandwich.py (2.3k tokens)
         ├── pytorch_sfm.py (3.2k tokens)
         ├── pytorch_tabnet.py (4.6k tokens)
         ├── pytorch_tcn.py (1900 tokens)
         ├── pytorch_tcn_ts.py (1800 tokens)
         ├── pytorch_tcts.py (2.9k tokens)
         ├── pytorch_tra.py (6.8k tokens)
         ├── pytorch_transformer.py (2000 tokens)
         ├── pytorch_transformer_ts.py (1800 tokens)
         ├── pytorch_utils.py (200 tokens)
         ├── tcn.py (500 tokens)
         ├── xgboost.py (600 tokens)
      ├── online/
         ├── __init__.py (100 tokens)
         ├── manager.py (1100 tokens)
         ├── online_model.py (200 tokens)
         ├── operator.py (2.6k tokens)
         ├── user.py (600 tokens)
         ├── utils.py (600 tokens)
      ├── ops/
         ├── __init__.py
         ├── high_freq.py (1600 tokens)
      ├── report/
         ├── __init__.py (100 tokens)
         ├── analysis_model/
            ├── __init__.py
            ├── analysis_model_performance.py (2.3k tokens)
         ├── analysis_position/
            ├── __init__.py (100 tokens)
            ├── cumulative_return.py (1900 tokens)
            ├── parse_position.py (1300 tokens)
            ├── rank_label.py (900 tokens)
            ├── report.py (1700 tokens)
            ├── risk_analysis.py (2.1k tokens)
            ├── score_ic.py (500 tokens)
         ├── data/
            ├── __init__.py
            ├── ana.py (1500 tokens)
            ├── base.py (300 tokens)
         ├── graph.py (2.2k tokens)
         ├── utils.py (500 tokens)
      ├── rolling/
         ├── __init__.py (100 tokens)
         ├── __main__.py (100 tokens)
         ├── base.py (2.3k tokens)
         ├── ddgda.py (3.1k tokens)
      ├── strategy/
         ├── __init__.py (100 tokens)
         ├── cost_control.py (800 tokens)
         ├── optimizer/
            ├── __init__.py (100 tokens)
            ├── base.py (100 tokens)
            ├── enhanced_indexing.py (1300 tokens)
            ├── optimizer.py (1700 tokens)
         ├── order_generator.py (1700 tokens)
         ├── rule_strategy.py (5.9k tokens)
         ├── signal_strategy.py (4.5k tokens)
      ├── torch.py (200 tokens)
      ├── tuner/
         ├── __init__.py
         ├── config.py (700 tokens)
         ├── launcher.py (200 tokens)
         ├── pipeline.py (700 tokens)
         ├── space.py (100 tokens)
         ├── tuner.py (1600 tokens)
      ├── workflow/
         ├── __init__.py
         ├── record_temp.py (700 tokens)
   ├── data/
      ├── __init__.py (300 tokens)
      ├── _libs/
         ├── __init__.py
         ├── expanding.pyx (800 tokens)
         ├── rolling.pyx (1200 tokens)
      ├── base.py (1700 tokens)
      ├── cache.py (9.5k tokens)
      ├── client.py (700 tokens)
      ├── data.py (9.9k tokens)
      ├── dataset/
         ├── __init__.py (5.5k tokens)
         ├── handler.py (5.7k tokens)
         ├── loader.py (3k tokens)
         ├── processor.py (2.9k tokens)
         ├── storage.py (1600 tokens)
         ├── utils.py (800 tokens)
         ├── weight.py (200 tokens)
      ├── filter.py (2.8k tokens)
      ├── inst_processor.py (100 tokens)
      ├── ops.py (9.1k tokens)
      ├── pit.py (600 tokens)
      ├── storage/
         ├── __init__.py (100 tokens)
         ├── file_storage.py (2.9k tokens)
         ├── storage.py (2.9k tokens)
   ├── log.py (1500 tokens)
   ├── model/
      ├── __init__.py
      ├── base.py (800 tokens)
      ├── ens/
         ├── __init__.py
         ├── ensemble.py (900 tokens)
         ├── group.py (800 tokens)
      ├── interpret/
         ├── __init__.py
         ├── base.py (200 tokens)
      ├── meta/
         ├── __init__.py
         ├── dataset.py (600 tokens)
         ├── model.py (500 tokens)
         ├── task.py (400 tokens)
      ├── riskmodel/
         ├── __init__.py (100 tokens)
         ├── base.py (1000 tokens)
         ├── poet.py (600 tokens)
         ├── shrink.py (2.1k tokens)
         ├── structured.py (800 tokens)
      ├── trainer.py (4.6k tokens)
      ├── utils.py (100 tokens)
   ├── rl/
      ├── __init__.py (100 tokens)
      ├── aux_info.py (200 tokens)
      ├── contrib/
         ├── __init__.py
         ├── backtest.py (2.6k tokens)
         ├── naive_config_parser.py (700 tokens)
         ├── train_onpolicy.py (2k tokens)
         ├── utils.py (200 tokens)
      ├── data/
         ├── __init__.py
         ├── base.py (400 tokens)
         ├── integration.py (600 tokens)
         ├── native.py (1500 tokens)
         ├── pickle_styled.py (2.1k tokens)
      ├── interpreter.py (1000 tokens)
      ├── order_execution/
         ├── __init__.py (200 tokens)
         ├── interpreter.py (1900 tokens)
         ├── network.py (1000 tokens)
         ├── policy.py (1400 tokens)
         ├── reward.py (700 tokens)
         ├── simulator_qlib.py (1000 tokens)
         ├── simulator_simple.py (2.9k tokens)
         ├── state.py (700 tokens)
         ├── strategy.py (4.2k tokens)
         ├── utils.py (300 tokens)
      ├── reward.py (500 tokens)
      ├── seed.py (100 tokens)
      ├── simulator.py (600 tokens)
      ├── strategy/
         ├── __init__.py
         ├── single_order.py (200 tokens)
      ├── trainer/
         ├── __init__.py (100 tokens)
         ├── api.py (700 tokens)
         ├── callbacks.py (2.3k tokens)
         ├── trainer.py (2.7k tokens)
         ├── vessel.py (2000 tokens)
      ├── utils/
         ├── __init__.py (100 tokens)
         ├── data_queue.py (1300 tokens)
         ├── env_wrapper.py (2000 tokens)
         ├── finite_env.py (2.7k tokens)
         ├── log.py (3.7k tokens)
   ├── strategy/
      ├── __init__.py
      ├── base.py (2.2k tokens)
   ├── tests/
      ├── __init__.py (2.4k tokens)
      ├── config.py (1000 tokens)
      ├── data.py (1700 tokens)
   ├── typehint.py (400 tokens)
   ├── utils/
      ├── __init__.py (6k tokens)
      ├── data.py (700 tokens)
      ├── exceptions.py (100 tokens)
      ├── file.py (1100 tokens)
      ├── index_data.py (4.6k tokens)
      ├── mod.py (1500 tokens)
      ├── objm.py (700 tokens)
      ├── paral.py (2k tokens)
      ├── resam.py (1900 tokens)
      ├── serial.py (1200 tokens)
      ├── time.py (2.4k tokens)
   ├── workflow/
      ├── __init__.py (5.1k tokens)
      ├── exp.py (3k tokens)
      ├── expm.py (3.6k tokens)
      ├── online/
         ├── __init__.py
         ├── manager.py (3.5k tokens)
         ├── strategy.py (1700 tokens)
         ├── update.py (2.1k tokens)
         ├── utils.py (1300 tokens)
      ├── record_temp.py (5.5k tokens)
      ├── recorder.py (3.7k tokens)
      ├── task/
         ├── __init__.py (100 tokens)
         ├── collect.py (2k tokens)
         ├── gen.py (2.4k tokens)
         ├── manage.py (3.7k tokens)
         ├── utils.py (2k tokens)
      ├── utils.py (300 tokens)
├── scripts/
   ├── README.md (400 tokens)
   ├── check_data_health.py (1800 tokens)
   ├── check_dump_bin.py (1100 tokens)
   ├── collect_info.py (400 tokens)
   ├── data_collector/
      ├── README.md (500 tokens)
      ├── baostock_5min/
         ├── README.md (900 tokens)
         ├── collector.py (2.4k tokens)
         ├── requirements.txt
      ├── base.py (3.2k tokens)
      ├── br_index/
         ├── README.md (600 tokens)
         ├── collector.py (2k tokens)
         ├── requirements.txt (100 tokens)
      ├── cn_index/
         ├── README.md (100 tokens)
         ├── collector.py (3.2k tokens)
         ├── requirements.txt
      ├── contrib/
         ├── fill_cn_1min_data/
            ├── README.md (100 tokens)
            ├── fill_cn_1min_data.py (700 tokens)
            ├── requirements.txt
         ├── future_trading_date_collector/
            ├── README.md (100 tokens)
            ├── future_trading_date_collector.py (600 tokens)
            ├── requirements.txt
      ├── crowd_source/
         ├── README.md (300 tokens)
      ├── crypto/
         ├── README.md (300 tokens)
         ├── collector.py (2k tokens)
         ├── requirement.txt
      ├── fund/
         ├── README.md (300 tokens)
         ├── collector.py (2000 tokens)
         ├── requirements.txt
      ├── future_calendar_collector.py (800 tokens)
      ├── index.py (1700 tokens)
      ├── pit/
         ├── README.md (300 tokens)
         ├── collector.py (2.1k tokens)
         ├── requirements.txt
      ├── us_index/
         ├── README.md (100 tokens)
         ├── collector.py (2000 tokens)
         ├── requirements.txt
      ├── utils.py (5.8k tokens)
      ├── yahoo/
         ├── README.md (2.7k tokens)
         ├── collector.py (7.5k tokens)
         ├── requirements.txt
   ├── dump_bin.py (4.4k tokens)
   ├── dump_pit.py (2.3k tokens)
   ├── get_data.py
├── setup.py (200 tokens)
├── tests/
   ├── backtest/
      ├── test_file_strategy.py (900 tokens)
      ├── test_high_freq_trading.py (1000 tokens)
   ├── conftest.py
   ├── data_mid_layer_tests/
      ├── README.md
      ├── test_dataloader.py (600 tokens)
      ├── test_dataset.py (1200 tokens)
      ├── test_handler.py (200 tokens)
      ├── test_handler_storage.py (800 tokens)
      ├── test_processor.py (600 tokens)
   ├── dataset_tests/
      ├── README.md
      ├── test_datalayer.py (400 tokens)
   ├── dependency_tests/
      ├── README.md
      ├── test_mlflow.py (200 tokens)
   ├── misc/
      ├── test_get_multi_proc.py (200 tokens)
      ├── test_index_data.py (1000 tokens)
      ├── test_sepdf.py (400 tokens)
      ├── test_utils.py (900 tokens)
   ├── model/
      ├── test_general_nn.py (500 tokens)
   ├── ops/
      ├── test_elem_operator.py (500 tokens)
      ├── test_special_ops.py (700 tokens)
   ├── pytest.ini
   ├── rl/
      ├── test_data_queue.py (500 tokens)
      ├── test_finite_env.py (1600 tokens)
      ├── test_logger.py (1100 tokens)
      ├── test_qlib_simulator.py (1500 tokens)
      ├── test_saoe_simple.py (2.6k tokens)
      ├── test_trainer.py (1300 tokens)
   ├── rolling_tests/
      ├── test_update_pred.py (900 tokens)
   ├── storage_tests/
      ├── test_storage.py (1300 tokens)
   ├── test_all_pipeline.py (1100 tokens)
   ├── test_contrib_model.py (100 tokens)
   ├── test_contrib_workflow.py (500 tokens)
   ├── test_dump_data.py (600 tokens)
   ├── test_get_data.py (300 tokens)
   ├── test_pit.py (3.2k tokens)
   ├── test_register_ops.py (400 tokens)
   ├── test_structured_cov_estimator.py (800 tokens)
   ├── test_workflow.py (200 tokens)
```


## /.commitlintrc.js

```js path="/.commitlintrc.js" 
module.exports = {
    extends: ["@commitlint/config-conventional"],
    rules: {
        // Configuration Format: [level, applicability, value]
        // level: Error level, usually expressed as a number:
        //     0 - disable rule
        //     1 - Warning (does not prevent commits)
        //     2 - Error (will block the commit)
        // applicability: the conditions under which the rule applies, commonly used values:
        //     “always” - always apply the rule
        //     “never” - never apply the rule
        // value: the specific value of the rule, e.g. a maximum length of 100.
        // Refs: https://commitlint.js.org/reference/rules-configuration.html
      "header-max-length": [2, "always", 100],
      "type-enum": [
        2,
        "always",
        ["build", "chore", "ci", "docs", "feat", "fix", "perf", "refactor", "revert", "style", "test", "Release-As"]
      ]
    }
  };

```

## /.deepsource.toml

```toml path="/.deepsource.toml" 
version = 1

test_patterns = ["tests/test_*.py"]

exclude_patterns = ["examples/**"]

[[analyzers]]
name = "python"
enabled = true

  [analyzers.meta]
  runtime_version = "3.x.x"

```

## /.dockerignore

```dockerignore path="/.dockerignore" 
__pycache__
*.pyc
*.pyo
*.pyd
.Python
.env
.git


```

## /.github/ISSUE_TEMPLATE/bug-report.md

---
name: "\U0001F41B Bug Report"
about: Submit a bug report to help us improve Qlib
labels: bug

---

## 🐛 Bug Description

<!-- A clear and concise description of what the bug is. -->

## To Reproduce

Steps to reproduce the behavior:

1.
1.
1.


## Expected Behavior

<!-- A clear and concise description of what you expected to happen. -->

## Screenshot

<!-- A screenshot of the error message or anything shouldn't appear-->

## Environment

**Note**: User could run `cd scripts && python collect_info.py all` under project directory to get system information
and paste them here directly.

 - Qlib version:
 - Python version:
 - OS (`Windows`, `Linux`, `MacOS`):
 - Commit number (optional, please provide it if you are using the dev version):

## Additional Notes

<!-- Add any other information about the problem here. -->


## /.github/ISSUE_TEMPLATE/documentation.md

---
name: "\U0001F4D6 Documentation"
about: Report an issue related to documentation

---

## 📖 Documentation

<!-- Please specify whether it's tutorial part or API reference part, and describe it.-->


## /.github/ISSUE_TEMPLATE/feature-request.md

---
name: "\U0001F31FFeature Request"
about: Request for a new Qlib feature
labels: enhancement

---

## 🌟 Feature Description
<!-- A clear and concise description of the feature proposal -->

## Motivation

1. Application scenario
2. Related works (Papers, Github repos etc.):
3. Any other relevant and important information:

<!-- Please describe why the feature is important. -->

## Alternatives

<!-- A short description of any alternative solutions or features you've considered. -->

## Additional Notes

<!-- Add any other context or screenshots about the feature request here. -->

## /.github/ISSUE_TEMPLATE/question.md

---
name: "❓Questions & Help"
about: Have some questions? We can offer help.
labels: question

---

## ❓ Questions and Help

We sincerely suggest you to carefully read the [documentation](http://qlib.readthedocs.io/) of our library as well as the official [paper](https://arxiv.org/abs/2009.11189). After that, if you still feel puzzled, please describe the question clearly under this issue.

## /.github/PULL_REQUEST_TEMPLATE.md

<!--- Thank you for submitting a Pull Request! In order to make our work smoother. -->
<!--- please make sure your Pull Request meets the following requirements: -->
<!---   1. Provide a general summary of your changes in the Title above; -->
<!---   2. Add appropriate prefixes to titles, such as `build:`, `chore:`, `ci:`, `docs:`, `feat:`, `fix:`, `perf:`, `refactor:`, `revert:`, `style:`, `test:`(Ref: https://www.conventionalcommits.org/). -->
<!--- Category: -->
<!--- Patch Updates: `fix:` -->
<!---   Example: fix(auth): correct login validation issue -->
<!--- minor update (introduces new functionality): `feat` -->
<!---   Example: feature(parser): add ability to parse arrays -->
<!--- major update(destructive update): Include BREAKING CHANGE in the commit message footer, or add `! ` in the commit footer to indicate that there is a destructive update. -->
<!---   Example: feat(auth)! : remove support for old authentication method -->
<!--- Other updates: `build:`, `chore:`, `ci:`, `docs:`, `perf:`, `refactor:`, `revert:`, `style:`, `test:`. -->

<!--- Provide a general summary of your changes in the Title above -->

## Description
<!--- Describe your changes in detail -->

## Motivation and Context
<!--- Are there any related issues? If so, please put the link here. -->
<!--- Why is this change required? What problem does it solve? -->

## How Has This Been Tested?
<!---  Put an `x` in all the boxes that apply: --->
- [ ] Pass the test by running: `pytest qlib/tests/test_all_pipeline.py` under upper directory of `qlib`.
- [ ] If you are adding a new feature, test on your own test scripts.

<!--- **ATTENTION**: If you are adding a new feature, please make sure your codes are **correctly tested**. If our test scripts do not cover your cases, please provide your own test scripts under the `tests` folder and test them. More information about test scripts can be found [here](https://docs.python.org/3/library/unittest.html#basic-example), or you could refer to those we provide under the `tests` folder. -->

## Screenshots of Test Results (if appropriate):
1. Pipeline test:
2. Your own tests:

## Types of changes
<!--- What types of changes does your code introduce? Put an `x` in all the boxes that apply: -->
- [ ] Fix bugs
- [ ] Add new feature
- [ ] Update documentation


## /.github/brew_install.sh

```sh path="/.github/brew_install.sh" 
#!/bin/bash
set -u

# First check if the OS is Linux.
if [[ "$(uname)" = "Linux" ]]; then
  HOMEBREW_ON_LINUX=1
fi

# On macOS, this script installs to /usr/local only.
# On Linux, it installs to /home/linuxbrew/.linuxbrew if you have sudo access
# and ~/.linuxbrew otherwise.
# To install elsewhere (which is unsupported)
# you can untar https://github.com/Homebrew/brew/tarball/master
# anywhere you like.
if [[ -z "${HOMEBREW_ON_LINUX-}" ]]; then
  HOMEBREW_PREFIX="/usr/local"
  HOMEBREW_REPOSITORY="/usr/local/Homebrew"
  HOMEBREW_CACHE="${HOME}/Library/Caches/Homebrew"

  STAT="stat -f"
  CHOWN="/usr/sbin/chown"
  CHGRP="/usr/bin/chgrp"
  GROUP="admin"
  TOUCH="/usr/bin/touch"
else
  HOMEBREW_PREFIX_DEFAULT="/home/linuxbrew/.linuxbrew"
  HOMEBREW_CACHE="${HOME}/.cache/Homebrew"

  STAT="stat --printf"
  CHOWN="/bin/chown"
  CHGRP="/bin/chgrp"
  GROUP="$(id -gn)"
  TOUCH="/bin/touch"
fi
BREW_REPO="https://github.com/Homebrew/brew"

# TODO: bump version when new macOS is released
MACOS_LATEST_SUPPORTED="10.15"
# TODO: bump version when new macOS is released
MACOS_OLDEST_SUPPORTED="10.13"

# For Homebrew on Linux
REQUIRED_RUBY_VERSION=2.6  # https://github.com/Homebrew/brew/pull/6556
REQUIRED_GLIBC_VERSION=2.13  # https://docs.brew.sh/Homebrew-on-Linux#requirements

# no analytics during installation
export HOMEBREW_NO_ANALYTICS_THIS_RUN=1
export HOMEBREW_NO_ANALYTICS_MESSAGE_OUTPUT=1

# string formatters
if [[ -t 1 ]]; then
  tty_escape() { printf "\033[%sm" "$1"; }
else
  tty_escape() { :; }
fi
tty_mkbold() { tty_escape "1;$1"; }
tty_underline="$(tty_escape "4;39")"
tty_blue="$(tty_mkbold 34)"
tty_red="$(tty_mkbold 31)"
tty_bold="$(tty_mkbold 39)"
tty_reset="$(tty_escape 0)"

have_sudo_access() {
  local -a args
  if [[ -n "${SUDO_ASKPASS-}" ]]; then
    args=("-A")
  fi

  if [[ -z "${HAVE_SUDO_ACCESS-}" ]]; then
    if [[ -n "${args[*]-}" ]]; then
      /usr/bin/sudo "${args[@]}" -l mkdir &>/dev/null
    else
      /usr/bin/sudo -l mkdir &>/dev/null
    fi
    HAVE_SUDO_ACCESS="$?"
  fi

  if [[ -z "${HOMEBREW_ON_LINUX-}" ]] && [[ "$HAVE_SUDO_ACCESS" -ne 0 ]]; then
    abort "Need sudo access on macOS (e.g. the user $USER to be an Administrator)!"
  fi

  return "$HAVE_SUDO_ACCESS"
}

shell_join() {
  local arg
  printf "%s" "$1"
  shift
  for arg in "$@"; do
    printf " "
    printf "%s" "${arg// /\ }"
  done
}

chomp() {
  printf "%s" "${1/"{{contextString}}#39;\n'"/}"
}

ohai() {
  printf "${tty_blue}==>${tty_bold} %s${tty_reset}\n" "$(shell_join "$@")"
}

warn() {
  printf "${tty_red}Warning${tty_reset}: %s\n" "$(chomp "$1")"
}

abort() {
  printf "%s\n" "$1"
  exit 1
}

execute() {
  if ! "$@"; then
    abort "$(printf "Failed during: %s" "$(shell_join "$@")")"
  fi
}

execute_sudo() {
  local -a args=("$@")
  if [[ -n "${SUDO_ASKPASS-}" ]]; then
    args=("-A" "${args[@]}")
  fi
  if have_sudo_access; then
    ohai "/usr/bin/sudo" "${args[@]}"
    execute "/usr/bin/sudo" "${args[@]}"
  else
    ohai "${args[@]}"
    execute "${args[@]}"
  fi
}

getc() {
  local save_state
  save_state=$(/bin/stty -g)
  /bin/stty raw -echo
  IFS= read -r -n 1 -d '' "$@"
  /bin/stty "$save_state"
}

wait_for_user() {
  local c
  echo
  echo "Press RETURN to continue or any other key to abort"
  getc c
  # we test for \r and \n because some stuff does \r instead
  if ! [[ "$c" == {{contextString}}#39;\r' || "$c" == {{contextString}}#39;\n' ]]; then
    exit 1
  fi
}

major_minor() {
  echo "${1%%.*}.$(x="${1#*.}"; echo "${x%%.*}")"
}

if [[ -z "${HOMEBREW_ON_LINUX-}" ]]; then
  macos_version="$(major_minor "$(/usr/bin/sw_vers -productVersion)")"
fi

version_gt() {
  [[ "${1%.*}" -gt "${2%.*}" ]] || [[ "${1%.*}" -eq "${2%.*}" && "${1#*.}" -gt "${2#*.}" ]]
}
version_ge() {
  [[ "${1%.*}" -gt "${2%.*}" ]] || [[ "${1%.*}" -eq "${2%.*}" && "${1#*.}" -ge "${2#*.}" ]]
}
version_lt() {
  [[ "${1%.*}" -lt "${2%.*}" ]] || [[ "${1%.*}" -eq "${2%.*}" && "${1#*.}" -lt "${2#*.}" ]]
}

should_install_command_line_tools() {
  if [[ -n "${HOMEBREW_ON_LINUX-}" ]]; then
    return 1
  fi

  if version_gt "$macos_version" "10.13"; then
    ! [[ -e "/Library/Developer/CommandLineTools/usr/bin/git" ]]
  else
    ! [[ -e "/Library/Developer/CommandLineTools/usr/bin/git" ]] ||
      ! [[ -e "/usr/include/iconv.h" ]]
  fi
}

get_permission() {
  $STAT "%A" "$1"
}

user_only_chmod() {
  [[ -d "$1" ]] && [[ "$(get_permission "$1")" != "755" ]]
}

exists_but_not_writable() {
  [[ -e "$1" ]] && ! [[ -r "$1" && -w "$1" && -x "$1" ]]
}

get_owner() {
  $STAT "%u" "$1"
}

file_not_owned() {
  [[ "$(get_owner "$1")" != "$(id -u)" ]]
}

get_group() {
  $STAT "%g" "$1"
}

file_not_grpowned() {
  [[ " $(id -G "$USER") " != *" $(get_group "$1") "*  ]]
}

# Please sync with 'test_ruby()' in 'Library/Homebrew/utils/ruby.sh' from Homebrew/brew repository.
test_ruby () {
  if [[ ! -x $1 ]]
  then
    return 1
  fi

  "$1" --enable-frozen-string-literal --disable=gems,did_you_mean,rubyopt -rrubygems -e \
    "abort if Gem::Version.new(RUBY_VERSION.to_s.dup).to_s.split('.').first(2) != \
              Gem::Version.new('$REQUIRED_RUBY_VERSION').to_s.split('.').first(2)" 2>/dev/null
}

no_usable_ruby() {
  local ruby_exec
  IFS={{contextString}}#39;\n' # Do word splitting on new lines only
  for ruby_exec in $(which -a ruby); do
    if test_ruby "$ruby_exec"; then
      return 1
    fi
  done
  IFS={{contextString}}#39; \t\n' # Restore IFS to its default value
  return 0
}

outdated_glibc() {
  local glibc_version
  glibc_version=$(ldd --version | head -n1 | grep -o '[0-9.]*{{contextString}}#39; | grep -o '^[0-9]\+\.[0-9]\+')
  version_lt "$glibc_version" "$REQUIRED_GLIBC_VERSION"
}

if [[ -n "${HOMEBREW_ON_LINUX-}" ]] && no_usable_ruby && outdated_glibc
then
    abort "$(cat <<-EOFABORT
	Homebrew requires Ruby $REQUIRED_RUBY_VERSION which was not found on your system.
	Homebrew portable Ruby requires Glibc version $REQUIRED_GLIBC_VERSION or newer,
	and your Glibc version is too old.
	See ${tty_underline}https://docs.brew.sh/Homebrew-on-Linux#requirements${tty_reset}
	Install Ruby $REQUIRED_RUBY_VERSION and add its location to your PATH.
	EOFABORT
    )"
fi

# USER isn't always set so provide a fall back for the installer and subprocesses.
if [[ -z "${USER-}" ]]; then
  USER="$(chomp "$(id -un)")"
  export USER
fi

# Invalidate sudo timestamp before exiting (if it wasn't active before).
if ! /usr/bin/sudo -n -v 2>/dev/null; then
  trap '/usr/bin/sudo -k' EXIT
fi

# Things can fail later if `pwd` doesn't exist.
# Also sudo prints a warning message for no good reason
cd "/usr" || exit 1

####################################################################### script
if ! command -v git >/dev/null; then
    abort "$(cat <<EOABORT
You must install Git before installing Homebrew. See:
  ${tty_underline}https://docs.brew.sh/Installation${tty_reset}
EOABORT
)"
fi

if ! command -v curl >/dev/null; then
    abort "$(cat <<EOABORT
You must install cURL before installing Homebrew. See:
  ${tty_underline}https://docs.brew.sh/Installation${tty_reset}
EOABORT
)"
fi

if [[ -z "${HOMEBREW_ON_LINUX-}" ]]; then
 have_sudo_access
else
  if [[ -n "${CI-}" ]] || [[ -w "$HOMEBREW_PREFIX_DEFAULT" ]] || [[ -w "/home/linuxbrew" ]] || [[ -w "/home" ]]; then
    HOMEBREW_PREFIX="$HOMEBREW_PREFIX_DEFAULT"
  else
    trap exit SIGINT
    if [[ $(/usr/bin/sudo -n -l mkdir 2>&1) != *"mkdir"* ]]; then
      ohai "Select the Homebrew installation directory"
      echo "- ${tty_bold}Enter your password${tty_reset} to install to ${tty_underline}${HOMEBREW_PREFIX_DEFAULT}${tty_reset} (${tty_bold}recommended${tty_reset})"
      echo "- ${tty_bold}Press Control-D${tty_reset} to install to ${tty_underline}$HOME/.linuxbrew${tty_reset}"
      echo "- ${tty_bold}Press Control-C${tty_reset} to cancel installation"
    fi
    if have_sudo_access; then
      HOMEBREW_PREFIX="$HOMEBREW_PREFIX_DEFAULT"
    else
      HOMEBREW_PREFIX="$HOME/.linuxbrew"
    fi
    trap - SIGINT
  fi
  HOMEBREW_REPOSITORY="${HOMEBREW_PREFIX}/Homebrew"
fi

if [[ "$UID" == "0" ]]; then
  abort "Don't run this as root!"
elif [[ -d "$HOMEBREW_PREFIX" && ! -x "$HOMEBREW_PREFIX" ]]; then
  abort "$(cat <<EOABORT
The Homebrew prefix, ${HOMEBREW_PREFIX}, exists but is not searchable. If this is
not intentional, please restore the default permissions and try running the
installer again:
    sudo chmod 775 ${HOMEBREW_PREFIX}
EOABORT
)"
fi

if [[ -z "${HOMEBREW_ON_LINUX-}" ]]; then
  if version_lt "$macos_version" "10.7"; then
    abort "$(cat <<EOABORT
Your Mac OS X version is too old. See:
  ${tty_underline}https://github.com/mistydemeo/tigerbrew${tty_reset}
EOABORT
)"
  elif version_lt "$macos_version" "10.10"; then
    abort "Your OS X version is too old"
  elif version_gt "$macos_version" "$MACOS_LATEST_SUPPORTED" || \
    version_lt "$macos_version" "$MACOS_OLDEST_SUPPORTED"; then
    who="We"
    what=""
    if version_gt "$macos_version" "$MACOS_LATEST_SUPPORTED"; then
      what="pre-release version"
    else
      who+=" (and Apple)"
      what="old version"
    fi
    ohai "You are using macOS ${macos_version}."
    ohai "${who} do not provide support for this ${what}."

    echo "$(cat <<EOS
This installation may not succeed.
After installation, you will encounter build failures with some formulae.
Please create pull requests instead of asking for help on Homebrew\'s GitHub,
Discourse, Twitter or IRC. You are responsible for resolving any issues you
experience while you are running this ${what}.
EOS
)
"
  fi
fi

ohai "This script will install:"
echo "${HOMEBREW_PREFIX}/bin/brew"
echo "${HOMEBREW_PREFIX}/share/doc/homebrew"
echo "${HOMEBREW_PREFIX}/share/man/man1/brew.1"
echo "${HOMEBREW_PREFIX}/share/zsh/site-functions/_brew"
echo "${HOMEBREW_PREFIX}/etc/bash_completion.d/brew"
echo "${HOMEBREW_REPOSITORY}"

# Keep relatively in sync with
# https://github.com/Homebrew/brew/blob/master/Library/Homebrew/keg.rb
directories=(bin etc include lib sbin share opt var
             Frameworks
             etc/bash_completion.d lib/pkgconfig
             share/aclocal share/doc share/info share/locale share/man
             share/man/man1 share/man/man2 share/man/man3 share/man/man4
             share/man/man5 share/man/man6 share/man/man7 share/man/man8
             var/log var/homebrew var/homebrew/linked
             bin/brew)
group_chmods=()
for dir in "${directories[@]}"; do
  if exists_but_not_writable "${HOMEBREW_PREFIX}/${dir}"; then
    group_chmods+=("${HOMEBREW_PREFIX}/${dir}")
  fi
done

# zsh refuses to read from these directories if group writable
directories=(share/zsh share/zsh/site-functions)
zsh_dirs=()
for dir in "${directories[@]}"; do
  zsh_dirs+=("${HOMEBREW_PREFIX}/${dir}")
done

directories=(bin etc include lib sbin share var opt
             share/zsh share/zsh/site-functions
             var/homebrew var/homebrew/linked
             Cellar Caskroom Homebrew Frameworks)
mkdirs=()
for dir in "${directories[@]}"; do
  if ! [[ -d "${HOMEBREW_PREFIX}/${dir}" ]]; then
    mkdirs+=("${HOMEBREW_PREFIX}/${dir}")
  fi
done

user_chmods=()
if [[ "${#zsh_dirs[@]}" -gt 0 ]]; then
  for dir in "${zsh_dirs[@]}"; do
    if user_only_chmod "${dir}"; then
      user_chmods+=("${dir}")
    fi
  done
fi

chmods=()
if [[ "${#group_chmods[@]}" -gt 0 ]]; then
  chmods+=("${group_chmods[@]}")
fi
if [[ "${#user_chmods[@]}" -gt 0 ]]; then
  chmods+=("${user_chmods[@]}")
fi

chowns=()
chgrps=()
if [[ "${#chmods[@]}" -gt 0 ]]; then
  for dir in "${chmods[@]}"; do
    if file_not_owned "${dir}"; then
      chowns+=("${dir}")
    fi
    if file_not_grpowned "${dir}"; then
      chgrps+=("${dir}")
    fi
  done
fi

if [[ "${#group_chmods[@]}" -gt 0 ]]; then
  ohai "The following existing directories will be made group writable:"
  printf "%s\n" "${group_chmods[@]}"
fi
if [[ "${#user_chmods[@]}" -gt 0 ]]; then
  ohai "The following existing directories will be made writable by user only:"
  printf "%s\n" "${user_chmods[@]}"
fi
if [[ "${#chowns[@]}" -gt 0 ]]; then
  ohai "The following existing directories will have their owner set to ${tty_underline}${USER}${tty_reset}:"
  printf "%s\n" "${chowns[@]}"
fi
if [[ "${#chgrps[@]}" -gt 0 ]]; then
  ohai "The following existing directories will have their group set to ${tty_underline}${GROUP}${tty_reset}:"
  printf "%s\n" "${chgrps[@]}"
fi
if [[ "${#mkdirs[@]}" -gt 0 ]]; then
  ohai "The following new directories will be created:"
  printf "%s\n" "${mkdirs[@]}"
fi

if should_install_command_line_tools; then
  ohai "The Xcode Command Line Tools will be installed."
fi

if [[ -t 0 && -z "${CI-}" ]]; then
  wait_for_user
fi

if [[ -d "${HOMEBREW_PREFIX}" ]]; then
  if [[ "${#chmods[@]}" -gt 0 ]]; then
    execute_sudo "/bin/chmod" "u+rwx" "${chmods[@]}"
  fi
  if [[ "${#group_chmods[@]}" -gt 0 ]]; then
    execute_sudo "/bin/chmod" "g+rwx" "${group_chmods[@]}"
  fi
  if [[ "${#user_chmods[@]}" -gt 0 ]]; then
    execute_sudo "/bin/chmod" "755" "${user_chmods[@]}"
  fi
  if [[ "${#chowns[@]}" -gt 0 ]]; then
    execute_sudo "$CHOWN" "$USER" "${chowns[@]}"
  fi
  if [[ "${#chgrps[@]}" -gt 0 ]]; then
    execute_sudo "$CHGRP" "$GROUP" "${chgrps[@]}"
  fi
else
  execute_sudo "/bin/mkdir" "-p" "${HOMEBREW_PREFIX}"
  if [[ -z "${HOMEBREW_ON_LINUX-}" ]]; then
    execute_sudo "$CHOWN" "root:wheel" "${HOMEBREW_PREFIX}"
  else
    execute_sudo "$CHOWN" "$USER:$GROUP" "${HOMEBREW_PREFIX}"
  fi
fi

if [[ "${#mkdirs[@]}" -gt 0 ]]; then
  execute_sudo "/bin/mkdir" "-p" "${mkdirs[@]}"
  execute_sudo "/bin/chmod" "g+rwx" "${mkdirs[@]}"
  execute_sudo "$CHOWN" "$USER" "${mkdirs[@]}"
  execute_sudo "$CHGRP" "$GROUP" "${mkdirs[@]}"
fi

if ! [[ -d "${HOMEBREW_CACHE}" ]]; then
  if [[ -z "${HOMEBREW_ON_LINUX-}" ]]; then
    execute_sudo "/bin/mkdir" "-p" "${HOMEBREW_CACHE}"
  else
    execute "/bin/mkdir" "-p" "${HOMEBREW_CACHE}"
  fi
fi
if exists_but_not_writable "${HOMEBREW_CACHE}"; then
  execute_sudo "/bin/chmod" "g+rwx" "${HOMEBREW_CACHE}"
fi
if file_not_owned "${HOMEBREW_CACHE}"; then
  execute_sudo "$CHOWN" "$USER" "${HOMEBREW_CACHE}"
fi
if file_not_grpowned "${HOMEBREW_CACHE}"; then
  execute_sudo "$CHGRP" "$GROUP" "${HOMEBREW_CACHE}"
fi
if [[ -d "${HOMEBREW_CACHE}" ]]; then
  execute "$TOUCH" "${HOMEBREW_CACHE}/.cleaned"
fi

if should_install_command_line_tools && version_ge "$macos_version" "10.13"; then
  ohai "Searching online for the Command Line Tools"
  # This temporary file prompts the 'softwareupdate' utility to list the Command Line Tools
  clt_placeholder="/tmp/.com.apple.dt.CommandLineTools.installondemand.in-progress"
  execute_sudo "$TOUCH" "$clt_placeholder"

  clt_label_command="/usr/sbin/softwareupdate -l |
                      grep -B 1 -E 'Command Line Tools' |
                      awk -F'*' '/^ *\\*/ {print \$2}' |
                      sed -e 's/^ *Label: //' -e 's/^ *//' |
                      sort -V |
                      tail -n1"
  clt_label="$(chomp "$(/bin/bash -c "$clt_label_command")")"

  if [[ -n "$clt_label" ]]; then
    ohai "Installing $clt_label"
    execute_sudo "/usr/sbin/softwareupdate" "-i" "$clt_label"
    execute_sudo "/bin/rm" "-f" "$clt_placeholder"
    execute_sudo "/usr/bin/xcode-select" "--switch" "/Library/Developer/CommandLineTools"
  fi
fi

# Headless install may have failed, so fallback to original 'xcode-select' method
if should_install_command_line_tools && test -t 0; then
  ohai "Installing the Command Line Tools (expect a GUI popup):"
  execute_sudo "/usr/bin/xcode-select" "--install"
  echo "Press any key when the installation has completed."
  getc
  execute_sudo "/usr/bin/xcode-select" "--switch" "/Library/Developer/CommandLineTools"
fi

if [[ -z "${HOMEBREW_ON_LINUX-}" ]] && ! output="$(/usr/bin/xcrun clang 2>&1)" && [[ "$output" == *"license"* ]]; then
  abort "$(cat <<EOABORT
You have not agreed to the Xcode license.
Before running the installer again please agree to the license by opening
Xcode.app or running:
    sudo xcodebuild -license
EOABORT
)"
fi

ohai "Downloading and installing Homebrew..."
(
  cd "${HOMEBREW_REPOSITORY}" >/dev/null || return

  # we do it in four steps to avoid merge errors when reinstalling
  execute "git" "init" "-q"

  # "git remote add" will fail if the remote is defined in the global config
  execute "git" "config" "remote.origin.url" "${BREW_REPO}"
  execute "git" "config" "remote.origin.fetch" "+refs/heads/*:refs/remotes/origin/*"

  # ensure we don't munge line endings on checkout
  execute "git" "config" "core.autocrlf" "false"

  execute "git" "fetch" "origin" "--force"
  execute "git" "fetch" "origin" "--tags" "--force"

  execute "git" "reset" "--hard" "origin/master"

  execute "ln" "-sf" "${HOMEBREW_REPOSITORY}/bin/brew" "${HOMEBREW_PREFIX}/bin/brew"

) || exit 1

if [[ ":${PATH}:" != *":${HOMEBREW_PREFIX}/bin:"* ]]; then
  warn "${HOMEBREW_PREFIX}/bin is not in your PATH."
fi

ohai "Installation successful!"
echo

# Use the shell's audible bell.
if [[ -t 1 ]]; then
  printf "\a"
fi

# Use an extra newline and bold to avoid this being missed.
ohai "Homebrew has enabled anonymous aggregate formulae and cask analytics."
echo "$(cat <<EOS
${tty_bold}Read the analytics documentation (and how to opt-out) here:
  ${tty_underline}https://docs.brew.sh/Analytics${tty_reset}
No analytics data has been sent yet (or will be during this \`install\` run).
EOS
)
"

ohai "Homebrew is run entirely by unpaid volunteers. Please consider donating:"
echo "$(cat <<EOS
  ${tty_underline}https://github.com/Homebrew/brew#donations${tty_reset}
EOS
)
"

(
  cd "${HOMEBREW_REPOSITORY}" >/dev/null || return
  execute "git" "config" "--replace-all" "homebrew.analyticsmessage" "true"
  execute "git" "config" "--replace-all" "homebrew.caskanalyticsmessage" "true"
) || exit 1

ohai "Next steps:"
echo "- Run \`brew help\` to get started"
echo "- Further documentation: "
echo "    ${tty_underline}https://docs.brew.sh${tty_reset}"

if [[ -n "${HOMEBREW_ON_LINUX-}" ]]; then
  case "$SHELL" in
    */bash*)
      if [[ -r "$HOME/.bash_profile" ]]; then
        shell_profile="$HOME/.bash_profile"
      else
        shell_profile="$HOME/.profile"
      fi
      ;;
    */zsh*)
      shell_profile="$HOME/.zprofile"
      ;;
    *)
      shell_profile="$HOME/.profile"
      ;;
  esac

  echo "- Install the Homebrew dependencies if you have sudo access:"

  if [[ $(command -v apt-get) ]]; then
    echo "    sudo apt-get install build-essential"
  elif [[ $(command -v yum) ]]; then
    echo "    sudo yum groupinstall 'Development Tools'"
  elif [[ $(command -v pacman) ]]; then
    echo "    sudo pacman -S base-devel"
  elif [[ $(command -v apk) ]]; then
    echo "    sudo apk add build-base"
  fi

  cat <<EOS
    See ${tty_underline}https://docs.brew.sh/linux${tty_reset} for more information
- Add Homebrew to your ${tty_bold}PATH${tty_reset} in ${tty_underline}${shell_profile}${tty_reset}:
    echo 'eval \$(${HOMEBREW_PREFIX}/bin/brew shellenv)' >> ${shell_profile}
    eval \$(${HOMEBREW_PREFIX}/bin/brew shellenv)
- We recommend that you install GCC:
    brew install gcc

EOS
fi
```

## /.github/release-drafter.yml

```yml path="/.github/release-drafter.yml" 
name-template: 'v$RESOLVED_VERSION 🌈'
tag-template: 'v$RESOLVED_VERSION'
categories:
  - title: '🌟 Features'
    labels:
      - 'feature'
      - 'enhancement'
  - title: '🐛 Bug Fixes'
    labels:
      - 'fix'
      - 'bugfix'
      - 'bug'
  - title: '📚 Documentation'
    label: 
      - 'doc'
      - 'documentation'
  - title: '🧹 Maintenance'
    label: 
      - 'maintenance'
change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
version-resolver:
  major:
    labels:
      - 'major'
  minor:
    labels:
      - 'minor'
  patch:
    labels:
      - 'patch'
  default: patch
template: |
  ## Changes

  $CHANGES

```

## /.github/workflows/lint_title.yml

```yml path="/.github/workflows/lint_title.yml" 
name: Lint pull request title

on:
  pull_request:
    types:
      - opened
      - synchronize
      - reopened
      - edited

concurrency:
  cancel-in-progress: true
  group: ${{ github.workflow }}-${{ github.ref }}

jobs:
  lint-title:
    runs-on: ubuntu-latest
    steps:
      # This step is necessary because the lint title uses the .commitlintrc.js file in the project root directory.
      - name: Checkout Repository
        uses: actions/checkout@v4

      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
          node-version: '16'

      - name: Install commitlint
        run: npm install --save-dev @commitlint/{config-conventional,cli}

      - name: Validate PR Title with commitlint
        env:
          BODY: ${{ github.event.pull_request.title }}
        run: |
          echo "$BODY" | npx commitlint --config .commitlintrc.js

```

## /.github/workflows/release.yml

```yml path="/.github/workflows/release.yml" 
name: Release

on:
  push:
    branches:
      - main

permissions:
  contents: read

jobs:
  release:
    runs-on: ubuntu-latest
    outputs:
      release_created: ${{ steps.release_please.outputs.release_created }}

    steps:
      - name: Release please
        id: release_please
        uses: googleapis/release-please-action@v4
        with:
          token: ${{ secrets.PAT }}
          release-type: simple

  deploy_with_manylinux:
    needs: release
    permissions:
      contents: write
      pull-requests: read
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v4
        if: needs.release.outputs.release_created == 'true'
        with:
          fetch-depth: 0

      - name: Set up Python ${{ matrix.python-version }}
        if: needs.release.outputs.release_created == 'true'
        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python-version }}

      - name: Build wheel on Linux
        if: needs.release.outputs.release_created == 'true'
        uses: RalfG/python-wheels-manylinux-build@v0.7.1-manylinux2014_x86_64
        with:
          python-versions: 'cp38-cp38 cp39-cp39 cp310-cp310 cp311-cp311 cp312-cp312'
          build-requirements: 'numpy cython'

      - name: Install dependencies
        if: needs.release.outputs.release_created == 'true'
        run: |
          python -m pip install twine

      - name: Upload to PyPi
        if: needs.release.outputs.release_created == 'true'
        env:
          TWINE_USERNAME: __token__
          TWINE_PASSWORD: ${{ secrets.TESTPYPI }}
        run: |
          twine check dist/pyqlib-*-manylinux*.whl
          twine upload --repository-url https://test.pypi.org/legacy/ dist/pyqlib-*-manylinux*.whl --verbose

  deploy_with_bdist_wheel:
    needs: release
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        # After testing, the whl files of pyqlib built by macos-14 and macos-15 in python environments of 3.8, 3.9, 3.10, 3.11, 3.12,
        # the filenames are exactly duplicated, which will result in the duplicated whl files not being able to be uploaded to pypi,
        # so we chose to just keep the latest macos-latest. macos-latest currently points to macos-15.
        # Also, macos-13 will stop being supported on 2025-11-14.
        # Refs: https://github.blog/changelog/2025-07-11-upcoming-changes-to-macos-hosted-runners-macos-latest-migration-and-xcode-support-policy-updates/
        os: [windows-latest, macos-latest]
        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]

    steps:
      - uses: actions/checkout@v4
        if: needs.release.outputs.release_created == 'true'
        with:
          fetch-depth: 0

      - name: Set up Python ${{ matrix.python-version }}
        if: needs.release.outputs.release_created == 'true'
        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install dependencies
        if: needs.release.outputs.release_created == 'true'
        run: |
          make dev

      - name: Build wheel on ${{ matrix.os }}
        if: needs.release.outputs.release_created == 'true'
        run: |
          make build

      - name: Upload to PyPi
        if: needs.release.outputs.release_created == 'true'
        env:
          TWINE_USERNAME: __token__
          TWINE_PASSWORD: ${{ secrets.TESTPYPI }}
        run: |
          twine check dist/*.whl
          twine upload --repository-url https://test.pypi.org/legacy/ dist/*.whl --verbose

```

## /.github/workflows/stale.yml

```yml path="/.github/workflows/stale.yml" 
name: Mark stale issues and pull requests

on:
  schedule:
  - cron: "0 0/3 * * *"

jobs:
  stale:

    runs-on: ubuntu-latest

    steps:
    - uses: actions/stale@v3
      with:
        repo-token: ${{ secrets.GITHUB_TOKEN }}
        stale-issue-message: 'This issue is stale because it has been open for three months with no activity. Remove the stale label or comment on the issue otherwise this will be closed in 5 days'
        stale-pr-message: 'This PR is stale because it has been open for a year with no activity. Remove the stale label or comment on the PR otherwise this will be closed in 5 days'
        stale-issue-label: 'stale'
        stale-pr-label: 'stale'
        days-before-stale: 90
        days-before-pr-stale: 365
        days-before-close: 5
        operations-per-run: 100
        exempt-issue-labels: 'bug,enhancement'
        remove-stale-when-updated: true

```

## /.github/workflows/test_qlib_from_pip.yml

```yml path="/.github/workflows/test_qlib_from_pip.yml" 
name: Test qlib from pip

on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]

jobs:
  build:
    timeout-minutes: 120

    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [windows-latest, ubuntu-24.04, ubuntu-22.04, macos-14, macos-15]
        # In github action, using python 3.7, pip install will not match the latest version of the package.
        # Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
        # All things considered, we have removed python 3.7.
        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]

    steps:
    - name: Test qlib from pip
      uses: actions/checkout@v4
      with:
        fetch-depth: 0

    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v4
      with:
        python-version: ${{ matrix.python-version }}

    - name: Update pip to the latest version
      run: |
        python -m pip install --upgrade pip
      
    - name: Qlib installation test
      run: |
        python -m pip install pyqlib

    - name: Install Lightgbm for MacOS
      if: ${{ matrix.os == 'macos-14' || matrix.os == 'macos-15' }}
      run: |
        brew update
        brew install libomp || brew reinstall libomp
        python -m pip install --no-binary=:all: lightgbm

    - name: Downloads dependencies data
      run: |
        cd ..
        python -m qlib.cli.data qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
        cd qlib

    - name: Test workflow by config
      run: |
        qrun examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml

```

## /.github/workflows/test_qlib_from_source.yml

```yml path="/.github/workflows/test_qlib_from_source.yml" 
name: Test qlib from source

on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]

jobs:
  build:
    timeout-minutes: 180
    # we may retry for 3 times for `Unit tests with Pytest`

    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [windows-latest, ubuntu-24.04, ubuntu-22.04, macos-14, macos-15]
        # In github action, using python 3.7, pip install will not match the latest version of the package.
        # Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
        # All things considered, we have removed python 3.7.
        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]

    steps:
    - name: Test qlib from source
      uses: actions/checkout@v4
      with:
        fetch-depth: 0

    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v4
      with:
        python-version: ${{ matrix.python-version }}

    - name: Update pip to the latest version
      run: |
        python -m pip install --upgrade pip

    - name: Installing pytorch for macos
      if: ${{ matrix.os == 'macos-14' || matrix.os == 'macos-15' }}
      run: |
        python -m pip install torch torchvision torchaudio

    - name: Installing pytorch for ubuntu
      if: ${{ matrix.os == 'ubuntu-24.04' || matrix.os == 'ubuntu-22.04' }}
      run: |
        python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu

    - name: Installing pytorch for windows
      if: ${{ matrix.os == 'windows-latest' }}
      run: |
        python -m pip install torch torchvision torchaudio

    - name: Set up Python tools
      run: |
        make dev

    - name: Lint with Black
      run: |
        make black

    - name: Make html with sphinx
      # Since read the docs builds on ubuntu 22.04, we only need to test that the build passes on ubuntu 22.04.
      if: ${{ matrix.os == 'ubuntu-22.04' }}
      run: |
        make docs-gen

    - name: Check Qlib with pylint
      run: |
        make pylint

    - name: Check Qlib with flake8
      run: |
        make flake8

    - name: Check Qlib with mypy
      run: |
        make mypy
    
    - name: Check Qlib ipynb with nbqa
      run: |
        make nbqa

    - name: Test data downloads
      run: |
        python scripts/get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
        python scripts/get_data.py download_data --file_name rl_data.zip --target_dir tests/.data/rl

    - name: Install Lightgbm for MacOS
      if: ${{ matrix.os == 'macos-14' || matrix.os == 'macos-15' }}
      run: |
        brew update
        brew install libomp || brew reinstall libomp
        python -m pip install --no-binary=:all: lightgbm

    - name: Check Qlib ipynb with nbconvert
      run: |
        make nbconvert

    - name: Test workflow by config (install from source)
      run: |
        python -m pip install numba
        python qlib/cli/run.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml

    - name: Unit tests with Pytest (MacOS)
      if: ${{ matrix.os == 'macos-14' || matrix.os == 'macos-15' }}
      uses: nick-fields/retry@v2
      with:
        timeout_minutes: 60
        max_attempts: 3
        command: |
          # Limit the number of threads in various libraries to prevent Segmentation faults caused by OpenMP multithreading conflicts under macOS.
          export OMP_NUM_THREADS=1  # Limit the number of OpenMP threads
          export MKL_NUM_THREADS=1  # Limit the number of Intel MKL threads
          export NUMEXPR_NUM_THREADS=1  # Limit the number of NumExpr threads
          export OPENBLAS_NUM_THREADS=1  # Limit the number of OpenBLAS threads
          export VECLIB_MAXIMUM_THREADS=1  # Limit the number of macOS Accelerate/vecLib threads
          cd tests
          python -m pytest . -m "not slow" --durations=0

    - name: Unit tests with Pytest (Ubuntu and Windows)
      if: ${{ matrix.os != 'macos-13' && matrix.os != 'macos-14' && matrix.os != 'macos-15' }}
      uses: nick-fields/retry@v2
      with:
        timeout_minutes: 60
        max_attempts: 3
        command: |
          cd tests
          python -m pytest . -m "not slow" --durations=0

```

## /.github/workflows/test_qlib_from_source_slow.yml

```yml path="/.github/workflows/test_qlib_from_source_slow.yml" 
name: Test qlib from source slow

on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]

jobs:
  build:
    timeout-minutes: 720
    # we may retry for 3 times for `Unit tests with Pytest`

    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [windows-latest, ubuntu-24.04, ubuntu-22.04, macos-14, macos-15]
        # In github action, using python 3.7, pip install will not match the latest version of the package.
        # Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
        # All things considered, we have removed python 3.7.
        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]

    steps:
    - name: Test qlib from source slow
      uses: actions/checkout@v4
      with:
        fetch-depth: 0

    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v4
      with:
        python-version: ${{ matrix.python-version }}

    - name: Set up Python tools
      run: |
        make dev

    - name: Downloads dependencies data
      run: |
        python scripts/get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn

    - name: Install Lightgbm for MacOS
      if: ${{ matrix.os == 'macos-14' || matrix.os == 'macos-15' }}
      run: |
        brew update
        brew install libomp || brew reinstall libomp
        python -m pip install --no-binary=:all: lightgbm

    - name: Unit tests with Pytest
      uses: nick-fields/retry@v2
      with:
        timeout_minutes: 240
        max_attempts: 3
        command: |
          cd tests
          python -m pytest . -m "slow" --durations=0

```

## /.gitignore

```gitignore path="/.gitignore" 
# https://github.com/github/gitignore/blob/master/Python.gitignore
__pycache__/

*.pyc
*.pyd
*.so
*.ipynb
.ipynb_checkpoints
_build
build/
dist/

*.pkl
*.hd5
*.csv

.env
.vim
.nvimrc
.vscode

qlib/VERSION.txt
qlib/data/_libs/expanding.cpp
qlib/data/_libs/rolling.cpp
examples/estimator/estimator_example/
examples/rl/data/
examples/rl/checkpoints/
examples/rl/outputs/
examples/rl_order_execution/data/
examples/rl_order_execution/outputs/

*.egg-info/

# test related
test-output.xml
.output
.data

# special software
mlruns/

tags

.pytest_cache/
.mypy_cache/
.vscode/

*.swp

./pretrain
.idea/
.aider*

```

## /.mypy.ini

```ini path="/.mypy.ini" 
[mypy]
exclude = (?x)(
    ^qlib/backtest/high_performance_ds\.py$
    | ^qlib/contrib
    | ^qlib/data
    | ^qlib/model
    | ^qlib/strategy
    | ^qlib/tests
    | ^qlib/utils
    | ^qlib/workflow
    | ^qlib/config\.py$
    | ^qlib/log\.py$
    | ^qlib/__init__\.py$
  )
ignore_missing_imports = true
disallow_incomplete_defs = true
follow_imports = skip

```

## /.pre-commit-config.yaml

```yaml path="/.pre-commit-config.yaml" 
repos:
-   repo: https://github.com/psf/black
    rev: 23.7.0
    hooks:
    -   id: black
        args: ["qlib", "-l 120"]

-   repo: https://github.com/PyCQA/flake8
    rev: 4.0.1
    hooks:
        - id: flake8
          args: ["--ignore=E501,F541,E266,E402,W503,E731,E203"]

```

## /.pylintrc

```pylintrc path="/.pylintrc" 
[TYPECHECK]
# https://stackoverflow.com/a/53572939 
# List of members which are set dynamically and missed by Pylint inference
# system, and so shouldn't trigger E1101 when accessed.
generated-members=numpy.*, torch.*

```

## /.readthedocs.yaml

```yaml path="/.readthedocs.yaml" 
# .readthedocs.yml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

# Required
version: 2

# Set the version of Python and other tools you might need
build:
  os: ubuntu-22.04
  tools:
    python: "3.8"

# Build documentation in the docs/ directory with Sphinx
sphinx:
  configuration: docs/conf.py

# Build all formats
formats: all

# Optionally set the version of Python and requirements required to build your docs
python:
  install:
    - requirements: docs/requirements.txt
    - method: pip
      path: .

```

## /CHANGELOG.md



## /CHANGES.rst

```rst path="/CHANGES.rst" 
Changelog
=========
Here you can see the full list of changes between each QLib release.

Version 0.1.0
-------------
This is the initial release of QLib library.

Version 0.1.1
-------------
Performance optimize. Add more features and operators.

Version 0.1.2
-------------
- Support operator syntax. Now ``High() - Low()`` is equivalent to ``Sub(High(), Low())``.
- Add more technical indicators.

Version 0.1.3
-------------
Bug fix and add instruments filtering mechanism.

Version 0.2.0
-------------
- Redesign ``LocalProvider`` database format for performance improvement.
- Support load features as string fields.
- Add scripts for database construction.
- More operators and technical indicators.

Version 0.2.1
-------------
- Support registering user-defined ``Provider``.
- Support use operators in string format, e.g. ``['Ref($close, 1)']`` is valid field format.
- Support dynamic fields in ``$some_field`` format. And existing fields like ``Close()`` may be deprecated in the future.

Version 0.2.2
-------------
- Add ``disk_cache`` for reusing features (enabled by default).
- Add ``qlib.contrib`` for experimental model construction and evaluation.


Version 0.2.3
-------------
- Add ``backtest`` module
- Decoupling the Strategy, Account, Position, Exchange from the backtest module

Version 0.2.4
-------------
- Add ``profit attribution`` module
- Add ``rick_control`` and ``cost_control`` strategies

Version 0.3.0
-------------
- Add ``estimator`` module

Version 0.3.1
-------------
- Add ``filter`` module

Version 0.3.2
-------------
- Add real price trading, if the ``factor`` field in the data set is incomplete, use ``adj_price`` trading
- Refactor ``handler`` ``launcher`` ``trainer`` code
- Support ``backtest`` configuration parameters in the configuration file
- Fix bug in position ``amount`` is 0
- Fix bug of ``filter`` module

Version 0.3.3
-------------
- Fix bug of ``filter`` module

Version 0.3.4
-------------
- Support for ``finetune model``
- Refactor ``fetcher`` code

Version 0.3.5
-------------
- Support multi-label training, you can provide multiple label in ``handler``. (But LightGBM doesn't support due to the algorithm itself)
- Refactor ``handler`` code, dataset.py is no longer used, and you can deploy your own labels and features in ``feature_label_config``
- Handler only offer DataFrame. Also, ``trainer`` and model.py only receive DataFrame
- Change ``split_rolling_data``, we roll the data on market calendar now, not on normal date
- Move some date config from ``handler`` to ``trainer``

Version 0.4.0
-------------
- Add `data` package that holds all data-related codes
- Reform the data provider structure
- Create a server for data centralized management `qlib-server <https://amc-msra.visualstudio.com/trading-algo/_git/qlib-server>`_
- Add a `ClientProvider` to work with server
- Add a pluggable cache mechanism
- Add a recursive backtracking algorithm to inspect the furthest reference date for an expression

.. note::
    The ``D.instruments`` function does not support ``start_time``, ``end_time``, and ``as_list`` parameters, if you want to get the results of previous versions of ``D.instruments``, you can do this:


    >>> from qlib.data import D
    >>> instruments = D.instruments(market='csi500')
    >>> D.list_instruments(instruments=instruments, start_time='2015-01-01', end_time='2016-02-15', as_list=True)


Version 0.4.1
-------------
- Add support Windows
- Fix ``instruments`` type bug
- Fix ``features`` is empty bug(It will cause failure in updating)
- Fix ``cache`` lock and update bug
- Fix use the same cache for the same field (the original space will add a new cache)
- Change "logger handler" from config
- Change model load support 0.4.0 later
- The default value of the ``method`` parameter of ``risk_analysis`` function is changed from **ci** to **si**


Version 0.4.2
-------------
- Refactor DataHandler
- Add ``Alpha360`` DataHandler


Version 0.4.3
-------------
- Implementing Online Inference and Trading Framework
- Refactoring The interfaces of backtest and strategy module.


Version 0.4.4
-------------
- Optimize cache generation performance
- Add report module
- Fix bug when using ``ServerDatasetCache`` offline.
- In the previous version of ``long_short_backtest``, there is a case of ``np.nan`` in long_short. The current version ``0.4.4`` has been fixed, so ``long_short_backtest`` will be different from the previous version.
- In the ``0.4.2`` version of ``risk_analysis`` function, ``N`` is ``250``, and ``N`` is ``252`` from ``0.4.3``, so ``0.4.2`` is ``0.002122`` smaller than the ``0.4.3`` the backtest result is slightly different between ``0.4.2`` and ``0.4.3``.
- refactor the argument of backtest function.
    - **NOTE**:
      - The default arguments of topk margin strategy is changed. Please pass the arguments explicitly if you want to get the same backtest result as previous version.
      - The TopkWeightStrategy is changed slightly. It will try to sell the stocks more than ``topk``.  (The backtest result of TopkAmountStrategy remains the same)
- The margin ratio mechanism is supported in the Topk Margin strategies.


Version 0.4.5
-------------
- Add multi-kernel implementation for both client and server.
    - Support a new way to load data from client which skips dataset cache.
    - Change the default dataset method from single kernel implementation to multi kernel implementation.
- Accelerate the high frequency data reading by optimizing the relative modules.
- Support a new method to write config file by using dict.

Version 0.4.6
-------------
- Some bugs are fixed
    - The default config in `Version 0.4.5` is not friendly to daily frequency data.
    - Backtest error in TopkWeightStrategy when `WithInteract=True`.


Version 0.5.0
-------------
- First opensource version
    - Refine the docs, code
    - Add baselines
    - public data crawler


Version 0.8.0
-------------
- The backtest is greatly refactored.
    - Nested decision execution framework is supported
    - There are lots of changes for daily trading, it is hard to list all of them. But a few important changes could be noticed
        - The trading limitation is more accurate;
            - In `previous version <https://github.com/microsoft/qlib/blob/v0.7.2/qlib/contrib/backtest/exchange.py#L160>`__, longing and shorting actions share the same action.
            - In `current version <https://github.com/microsoft/qlib/blob/7c31012b507a3823117bddcc693fc64899460b2a/qlib/backtest/exchange.py#L304>`__, the trading limitation is different between logging and shorting action.
        - The constant is different when calculating annualized metrics.
            - `Current version <https://github.com/microsoft/qlib/blob/7c31012b507a3823117bddcc693fc64899460b2a/qlib/contrib/evaluate.py#L42>`_ uses more accurate constant than `previous version <https://github.com/microsoft/qlib/blob/v0.7.2/qlib/contrib/evaluate.py#L22>`__
        - `A new version <https://github.com/microsoft/qlib/blob/7c31012b507a3823117bddcc693fc64899460b2a/qlib/tests/data.py#L17>`__ of data is released. Due to the unstability of Yahoo data source, the data may be different after downloading data again.
        - Users could check out the backtesting results between  `Current version <https://github.com/microsoft/qlib/tree/7c31012b507a3823117bddcc693fc64899460b2a/examples/benchmarks>`__ and `previous version <https://github.com/microsoft/qlib/tree/v0.7.2/examples/benchmarks>`__


Other Versions
--------------
Please refer to `Github release Notes <https://github.com/microsoft/qlib/releases>`_

```

## /CODE_OF_CONDUCT.md

# Microsoft Open Source Code of Conduct

This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).

Resources:

- [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
- [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
- Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns


## /Dockerfile

``` path="/Dockerfile" 
FROM continuumio/miniconda3:latest

WORKDIR /qlib

COPY . .

RUN apt-get update && \
    apt-get install -y build-essential

RUN conda create --name qlib_env python=3.8 -y
RUN echo "conda activate qlib_env" >> ~/.bashrc
ENV PATH /opt/conda/envs/qlib_env/bin:$PATH

RUN python -m pip install --upgrade pip

RUN python -m pip install numpy==1.23.5
RUN python -m pip install pandas==1.5.3
RUN python -m pip install importlib-metadata==5.2.0
RUN python -m pip install "cloudpickle<3"
RUN python -m pip install scikit-learn==1.3.2

RUN python -m pip install cython packaging tables matplotlib statsmodels
RUN python -m pip install pybind11 cvxpy

ARG IS_STABLE="yes"

RUN if [ "$IS_STABLE" = "yes" ]; then \
        python -m pip install pyqlib; \
    else \
        python setup.py install; \
    fi

```

## /MANIFEST.in

```in path="/MANIFEST.in" 
exclude tests/*
include qlib/*
include qlib/*/*
include qlib/*/*/*
include qlib/*/*/*/*
include qlib/*/*/*/*/*

```

## /Makefile

``` path="/Makefile" 
.PHONY: clean deepclean prerequisite dependencies lightgbm rl develop lint docs package test analysis all install dev black pylint flake8 mypy nbqa nbconvert lint build upload docs-gen
#You can modify it according to your terminal
SHELL := /bin/bash

########################################################################################
# Variables
########################################################################################

# Documentation target directory, will be adapted to specific folder for readthedocs.
PUBLIC_DIR := $(shell [ "$READTHEDOCS" = "True" ] && echo "$READTHEDOCS_OUTPUT/html" || echo "public")

SO_DIR := qlib/data/_libs
SO_FILES := $(wildcard $(SO_DIR)/*.so)

ifeq ($(OS),Windows_NT)
    IS_WINDOWS = true
else
    IS_WINDOWS = false
endif

########################################################################################
# Development Environment Management
########################################################################################
# Remove common intermediate files.
clean:
	-rm -rf \
		$(PUBLIC_DIR) \
		qlib/data/_libs/*.cpp \
		qlib/data/_libs/*.so \
		mlruns \
		public \
		build \
		.coverage \
		.mypy_cache \
		.pytest_cache \
		.ruff_cache \
		Pipfile* \
		coverage.xml \
		dist \
		release-notes.md

	find . -name '*.egg-info' -print0 | xargs -0 rm -rf
	find . -name '*.pyc' -print0 | xargs -0 rm -f
	find . -name '*.swp' -print0 | xargs -0 rm -f
	find . -name '.DS_Store' -print0 | xargs -0 rm -f
	find . -name '__pycache__' -print0 | xargs -0 rm -rf

# Remove pre-commit hook, virtual environment alongside itermediate files.
deepclean: clean
	if command -v pre-commit > /dev/null 2>&1; then pre-commit uninstall --hook-type pre-push; fi
	if command -v pipenv >/dev/null 2>&1 && pipenv --venv >/dev/null 2>&1; then pipenv --rm; fi

# Prerequisite section
# What this code does is compile two Cython modules, rolling and expanding, using setuptools and Cython,
# and builds them as binary expansion modules that can be imported directly into Python.
# Since pyproject.toml can't do that, we compile it here.

# pywinpty as a dependency of jupyter on windows, if you use pip install pywinpty installation,
# will first download the tar.gz file, and then locally compiled and installed,
# this will lead to some unnecessary trouble, so we choose to install the compiled whl file, to avoid trouble.
prerequisite:
	@if [ -n "$(SO_FILES)" ]; then \
		echo "Shared library files exist, skipping build."; \
	else \
		echo "No shared library files found, building..."; \
		pip install --upgrade setuptools wheel; \
		python -m pip install cython numpy; \
		python -c "from setuptools import setup, Extension; from Cython.Build import cythonize; import numpy; extensions = [Extension('qlib.data._libs.rolling', ['qlib/data/_libs/rolling.pyx'], language='c++', include_dirs=[numpy.get_include()]), Extension('qlib.data._libs.expanding', ['qlib/data/_libs/expanding.pyx'], language='c++', include_dirs=[numpy.get_include()])]; setup(ext_modules=cythonize(extensions, language_level='3'), script_args=['build_ext', '--inplace'])"; \
	fi

	@if [ "$(IS_WINDOWS)" = "true" ]; then \
		python -m pip install pywinpty --only-binary=:all:; \
	fi

# Install the package in editable mode.
dependencies:
	python -m pip install -e .

lightgbm:
	python -m pip install lightgbm --prefer-binary

rl:
	python -m pip install -e .[rl]

develop:
	python -m pip install -e .[dev]

lint:
	python -m pip install -e .[lint]

docs:
	python -m pip install -e .[docs]

package:
	python -m pip install -e .[package]

test:
	python -m pip install -e .[test]

analysis:
	python -m pip install -e .[analysis]

all:
	python -m pip install -e .[pywinpty,dev,lint,docs,package,test,analysis,rl]

install: prerequisite dependencies

dev: prerequisite all

########################################################################################
# Lint and pre-commit
########################################################################################

# Check lint with black.
black:
	black . -l 120 --check --diff

# Check code folder with pylint.
# TODO: These problems we will solve in the future. Important among them are: W0221, W0223, W0237, E1102
# 	C0103: invalid-name
# 	C0209: consider-using-f-string
# 	R0402: consider-using-from-import
# 	R1705: no-else-return
# 	R1710: inconsistent-return-statements
# 	R1725: super-with-arguments
# 	R1735: use-dict-literal
# 	W0102: dangerous-default-value
# 	W0212: protected-access
# 	W0221: arguments-differ
# 	W0223: abstract-method
# 	W0231: super-init-not-called
# 	W0237: arguments-renamed
# 	W0612: unused-variable
# 	W0621: redefined-outer-name
# 	W0622: redefined-builtin
# 	FIXME: specify exception type
# 	W0703: broad-except
# 	W1309: f-string-without-interpolation
# 	E1102: not-callable
# 	E1136: unsubscriptable-object
# 	W4904: deprecated-class
# 	R0917: too-many-positional-arguments
# 	E1123: unexpected-keyword-arg
# References for disable error: https://pylint.pycqa.org/en/latest/user_guide/messages/messages_overview.html
# We use sys.setrecursionlimit(2000) to make the recursion depth larger to ensure that pylint works properly (the default recursion depth is 1000).
# References for parameters: https://github.com/PyCQA/pylint/issues/4577#issuecomment-1000245962
pylint:
	pylint --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,W4904,E0401,E1121,C0103,C0209,R0402,R1705,R1710,R1725,R1730,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0612,W0621,W0622,W0703,W1309,E1102,E1136 --const-rgx='[a-z_][a-z0-9_]{2,30}' qlib --init-hook="import astroid; astroid.context.InferenceContext.max_inferred = 500; import sys; sys.setrecursionlimit(2000)"
	pylint --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,E0401,E1121,E1123,C0103,C0209,R0402,R1705,R1710,R1725,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0246,W0612,W0621,W0622,W0703,W1309,E1102,E1136 --const-rgx='[a-z_][a-z0-9_]{2,30}' scripts --init-hook="import astroid; astroid.context.InferenceContext.max_inferred = 500; import sys; sys.setrecursionlimit(2000)"

# Check code with flake8.
# The following flake8 error codes were ignored:
# E501 line too long
# 	Description: We have used black to limit the length of each line to 120.
# F541 f-string is missing placeholders
# 	Description: The same thing is done when using pylint for detection.
# E266 too many leading '#' for block comment
# 	Description: To make the code more readable, a lot of "#" is used.
#         This error code appears centrally in:
# 			qlib/backtest/executor.py
# 			qlib/data/ops.py
# 			qlib/utils/__init__.py
# E402 module level import not at top of file
# 	Description: There are times when module level import is not available at the top of the file.
# W503 line break before binary operator
# 	Description: Since black formats the length of each line of code, it has to perform a line break when a line of arithmetic is too long.
# E731 do not assign a lambda expression, use a def
# 	Description: Restricts the use of lambda expressions, but at some point lambda expressions are required.
# E203 whitespace before ':'
# 	Description: If there is whitespace before ":", it cannot pass the black check.
flake8:
	flake8 --ignore=E501,F541,E266,E402,W503,E731,E203 --per-file-ignores="__init__.py:F401,F403" qlib

# Check code with mypy.
# https://github.com/python/mypy/issues/10600
mypy:
	mypy qlib --install-types --non-interactive
	mypy qlib --verbose

# Check ipynb with nbqa.
nbqa:
	nbqa black . -l 120 --check --diff
	nbqa pylint . --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,E0401,E1121,C0103,C0209,R0402,R1705,R1710,R1725,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0612,W0621,W0622,W0703,W1309,E1102,E1136,W0719,W0104,W0404,C0412,W0611,C0410 --const-rgx='[a-z_][a-z0-9_]{2,30}'

# Check ipynb with nbconvert.(Run after data downloads)
# TODO: Add more ipynb files in future
nbconvert:
	jupyter nbconvert --to notebook --execute examples/workflow_by_code.ipynb

lint: black pylint flake8 mypy nbqa

########################################################################################
# Package
########################################################################################

# Build the package.
build:
	python -m build --wheel

# Upload the package.
upload:
	python -m twine upload dist/*

########################################################################################
# Documentation
########################################################################################

docs-gen:
	python -m sphinx.cmd.build -W docs $(PUBLIC_DIR)

```

## /README.md

[![Python Versions](https://img.shields.io/pypi/pyversions/pyqlib.svg?logo=python&logoColor=white)](https://pypi.org/project/pyqlib/#files)
[![Platform](https://img.shields.io/badge/platform-linux%20%7C%20windows%20%7C%20macos-lightgrey)](https://pypi.org/project/pyqlib/#files)
[![PypI Versions](https://img.shields.io/pypi/v/pyqlib)](https://pypi.org/project/pyqlib/#history)
[![Upload Python Package](https://github.com/microsoft/qlib/workflows/Upload%20Python%20Package/badge.svg)](https://pypi.org/project/pyqlib/)
[![Github Actions Test Status](https://github.com/microsoft/qlib/workflows/Test/badge.svg?branch=main)](https://github.com/microsoft/qlib/actions)
[![Documentation Status](https://readthedocs.org/projects/qlib/badge/?version=latest)](https://qlib.readthedocs.io/en/latest/?badge=latest)
[![License](https://img.shields.io/pypi/l/pyqlib)](LICENSE)
[![Join the chat at https://gitter.im/Microsoft/qlib](https://badges.gitter.im/Microsoft/qlib.svg)](https://gitter.im/Microsoft/qlib?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)

## :newspaper: **What's NEW!** &nbsp;   :sparkling_heart: 

Recent released features

### Introducing <a href="https://github.com/microsoft/RD-Agent"><img src="docs/_static/img/rdagent_logo.png" alt="RD_Agent" style="height: 2em"></a>: LLM-Based Autonomous Evolving Agents for Industrial Data-Driven R&D

We are excited to announce the release of **RD-Agent**📢, a powerful tool that supports automated factor mining and model optimization in quant investment R&D.

RD-Agent is now available on [GitHub](https://github.com/microsoft/RD-Agent), and we welcome your star🌟!

To learn more, please visit our [♾️Demo page](https://rdagent.azurewebsites.net/). Here, you will find demo videos in both English and Chinese to help you better understand the scenario and usage of RD-Agent.

We have prepared several demo videos for you:
| Scenario | Demo video (English) | Demo video (中文) |
| --                      | ------    | ------    |
| Quant Factor Mining | [Link](https://rdagent.azurewebsites.net/factor_loop?lang=en) | [Link](https://rdagent.azurewebsites.net/factor_loop?lang=zh) |
| Quant Factor Mining from reports | [Link](https://rdagent.azurewebsites.net/report_factor?lang=en) | [Link](https://rdagent.azurewebsites.net/report_factor?lang=zh) |
| Quant Model Optimization | [Link](https://rdagent.azurewebsites.net/model_loop?lang=en) | [Link](https://rdagent.azurewebsites.net/model_loop?lang=zh) |

- 📃**Paper**: [R&D-Agent-Quant: A Multi-Agent Framework for Data-Centric Factors and Model Joint Optimization](https://arxiv.org/abs/2505.15155)
- 👾**Code**: https://github.com/microsoft/RD-Agent/
```BibTeX
@misc{li2025rdagentquant,
    title={R\&D-Agent-Quant: A Multi-Agent Framework for Data-Centric Factors and Model Joint Optimization},
    author={Yuante Li and Xu Yang and Xiao Yang and Minrui Xu and Xisen Wang and Weiqing Liu and Jiang Bian},
    year={2025},
    eprint={2505.15155},
    archivePrefix={arXiv},
    primaryClass={cs.AI}
}
```
![image](https://github.com/user-attachments/assets/3198bc10-47ba-4ee0-8a8e-46d5ce44f45d)

***

| Feature | Status |
| --                      | ------    |
| [R&D-Agent-Quant](https://arxiv.org/abs/2505.15155) Published | Apply R&D-Agent to Qlib for quant trading | 
| BPQP for End-to-end learning | 📈Coming soon!([Under review](https://github.com/microsoft/qlib/pull/1863)) |
| 🔥LLM-driven Auto Quant Factory🔥 | 🚀 Released in [♾️RD-Agent](https://github.com/microsoft/RD-Agent) on Aug 8, 2024 |
| KRNN and Sandwich models | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/1414/) on May 26, 2023 |
| Release Qlib v0.9.0 | :octocat: [Released](https://github.com/microsoft/qlib/releases/tag/v0.9.0) on Dec 9, 2022 |
| RL Learning Framework | :hammer: :chart_with_upwards_trend: Released on Nov 10, 2022. [#1332](https://github.com/microsoft/qlib/pull/1332), [#1322](https://github.com/microsoft/qlib/pull/1322), [#1316](https://github.com/microsoft/qlib/pull/1316),[#1299](https://github.com/microsoft/qlib/pull/1299),[#1263](https://github.com/microsoft/qlib/pull/1263), [#1244](https://github.com/microsoft/qlib/pull/1244), [#1169](https://github.com/microsoft/qlib/pull/1169), [#1125](https://github.com/microsoft/qlib/pull/1125), [#1076](https://github.com/microsoft/qlib/pull/1076)|
| HIST and IGMTF models | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/1040) on Apr 10, 2022 |
| Qlib [notebook tutorial](https://github.com/microsoft/qlib/tree/main/examples/tutorial) | 📖 [Released](https://github.com/microsoft/qlib/pull/1037) on Apr 7, 2022 | 
| Ibovespa index data | :rice: [Released](https://github.com/microsoft/qlib/pull/990) on Apr 6, 2022 |
| Point-in-Time database | :hammer: [Released](https://github.com/microsoft/qlib/pull/343) on Mar 10, 2022 |
| Arctic Provider Backend & Orderbook data example | :hammer: [Released](https://github.com/microsoft/qlib/pull/744) on Jan 17, 2022 |
| Meta-Learning-based framework & DDG-DA  | :chart_with_upwards_trend:  :hammer: [Released](https://github.com/microsoft/qlib/pull/743) on Jan 10, 2022 | 
| Planning-based portfolio optimization | :hammer: [Released](https://github.com/microsoft/qlib/pull/754) on Dec 28, 2021 | 
| Release Qlib v0.8.0 | :octocat: [Released](https://github.com/microsoft/qlib/releases/tag/v0.8.0) on Dec 8, 2021 |
| ADD model | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/704) on Nov 22, 2021 |
| ADARNN  model | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/689) on Nov 14, 2021 |
| TCN  model | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/668) on Nov 4, 2021 |
| Nested Decision Framework | :hammer: [Released](https://github.com/microsoft/qlib/pull/438) on Oct 1, 2021. [Example](https://github.com/microsoft/qlib/blob/main/examples/nested_decision_execution/workflow.py) and [Doc](https://qlib.readthedocs.io/en/latest/component/highfreq.html) |
| Temporal Routing Adaptor (TRA) | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/531) on July 30, 2021 |
| Transformer & Localformer | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/508) on July 22, 2021 |
| Release Qlib v0.7.0 | :octocat: [Released](https://github.com/microsoft/qlib/releases/tag/v0.7.0) on July 12, 2021 |
| TCTS Model | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/491) on July 1, 2021 |
| Online serving and automatic model rolling | :hammer:  [Released](https://github.com/microsoft/qlib/pull/290) on May 17, 2021 | 
| DoubleEnsemble Model | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/286) on Mar 2, 2021 | 
| High-frequency data processing example | :hammer: [Released](https://github.com/microsoft/qlib/pull/257) on Feb 5, 2021  |
| High-frequency trading example | :chart_with_upwards_trend: [Part of code released](https://github.com/microsoft/qlib/pull/227) on Jan 28, 2021  | 
| High-frequency data(1min) | :rice: [Released](https://github.com/microsoft/qlib/pull/221) on Jan 27, 2021 |
| Tabnet Model | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/205) on Jan 22, 2021 |

Features released before 2021 are not listed here.

<p align="center">
  <img src="docs/_static/img/logo/1.png" />
</p>

Qlib is an open-source, AI-oriented quantitative investment platform that aims to realize the potential, empower research, and create value using AI technologies in quantitative investment, from exploring ideas to implementing productions. Qlib supports diverse machine learning modeling paradigms, including supervised learning, market dynamics modeling, and reinforcement learning.

An increasing number of SOTA Quant research works/papers in diverse paradigms are being released in Qlib to collaboratively solve key challenges in quantitative investment. For example, 1) using supervised learning to mine the market's complex non-linear patterns from rich and heterogeneous financial data, 2) modeling the dynamic nature of the financial market using adaptive concept drift technology, and 3) using reinforcement learning to model continuous investment decisions and assist investors in optimizing their trading strategies.

It contains the full ML pipeline of data processing, model training, back-testing; and covers the entire chain of quantitative investment: alpha seeking, risk modeling, portfolio optimization, and order execution. 
For more details, please refer to our paper ["Qlib: An AI-oriented Quantitative Investment Platform"](https://arxiv.org/abs/2009.11189).


<table>
  <tbody>
    <tr>
      <th>Frameworks, Tutorial, Data & DevOps</th>
      <th>Main Challenges & Solutions in Quant Research</th>
    </tr>
    <tr>
      <td>
        <li><a href="#plans"><strong>Plans</strong></a></li>
        <li><a href="#framework-of-qlib">Framework of Qlib</a></li>
        <li><a href="#quick-start">Quick Start</a></li>
          <ul dir="auto">
            <li type="circle"><a href="#installation">Installation</a> </li>
            <li type="circle"><a href="#data-preparation">Data Preparation</a></li>
            <li type="circle"><a href="#auto-quant-research-workflow">Auto Quant Research Workflow</a></li>
            <li type="circle"><a href="#building-customized-quant-research-workflow-by-code">Building Customized Quant Research Workflow by Code</a></li></ul>
        <li><a href="#quant-dataset-zoo"><strong>Quant Dataset Zoo</strong></a></li>
        <li><a href="#learning-framework">Learning Framework</a></li>
        <li><a href="#more-about-qlib">More About Qlib</a></li>
        <li><a href="#offline-mode-and-online-mode">Offline Mode and Online Mode</a>
        <ul>
          <li type="circle"><a href="#performance-of-qlib-data-server">Performance of Qlib Data Server</a></li></ul>
        <li><a href="#related-reports">Related Reports</a></li>
        <li><a href="#contact-us">Contact Us</a></li>
        <li><a href="#contributing">Contributing</a></li>
      </td>
      <td valign="baseline">
        <li><a href="#main-challenges--solutions-in-quant-research">Main Challenges &amp; Solutions in Quant Research</a>
          <ul>
            <li type="circle"><a href="#forecasting-finding-valuable-signalspatterns">Forecasting: Finding Valuable Signals/Patterns</a>
              <ul>
                <li type="disc"><a href="#quant-model-paper-zoo"><strong>Quant Model (Paper) Zoo</strong></a>
                  <ul>
                    <li type="circle"><a href="#run-a-single-model">Run a Single Model</a></li>
                    <li type="circle"><a href="#run-multiple-models">Run Multiple Models</a></li>
                  </ul>
                </li>
              </ul>
            </li>
          <li type="circle"><a href="#adapting-to-market-dynamics">Adapting to Market Dynamics</a></li>
          <li type="circle"><a href="#reinforcement-learning-modeling-continuous-decisions">Reinforcement Learning: modeling continuous decisions</a></li>
          </ul>
        </li>
      </td>
    </tr>
  </tbody>
</table>

# Plans
New features under development(order by estimated release time).
Your feedbacks about the features are very important.
<!-- | Feature                        | Status      | -->
<!-- | --                      | ------    | -->

# Framework of Qlib

<div style="align: center">
<img src="docs/_static/img/framework-abstract.jpg" />
</div>

The high-level framework of Qlib can be found above(users can find the [detailed framework](https://qlib.readthedocs.io/en/latest/introduction/introduction.html#framework) of Qlib's design when getting into nitty gritty).
The components are designed as loose-coupled modules, and each component could be used stand-alone.

Qlib provides a strong infrastructure to support Quant research. [Data](https://qlib.readthedocs.io/en/latest/component/data.html) is always an important part.
A strong learning framework is designed to support diverse learning paradigms (e.g. [reinforcement learning](https://qlib.readthedocs.io/en/latest/component/rl.html), [supervised learning](https://qlib.readthedocs.io/en/latest/component/workflow.html#model-section)) and patterns at different levels(e.g. [market dynamic modeling](https://qlib.readthedocs.io/en/latest/component/meta.html)).
By modeling the market, [trading strategies](https://qlib.readthedocs.io/en/latest/component/strategy.html) will generate trade decisions that will be executed. Multiple trading strategies and executors in different levels or granularities can be [nested to be optimized and run together](https://qlib.readthedocs.io/en/latest/component/highfreq.html).
At last, a comprehensive [analysis](https://qlib.readthedocs.io/en/latest/component/report.html) will be provided and the model can be [served online](https://qlib.readthedocs.io/en/latest/component/online.html) in a low cost.


# Quick Start

This quick start guide tries to demonstrate
1. It's very easy to build a complete Quant research workflow and try your ideas with _Qlib_.
2. Though with *public data* and *simple models*, machine learning technologies **work very well** in practical Quant investment.

Here is a quick **[demo](https://terminalizer.com/view/3f24561a4470)** shows how to install ``Qlib``, and run LightGBM with ``qrun``. **But**, please make sure you have already prepared the data following the [instruction](#data-preparation).


## Installation

This table demonstrates the supported Python version of `Qlib`:
|               | install with pip      | install from source  |        plot        |
| ------------- |:---------------------:|:--------------------:|:------------------:|
| Python 3.8    | :heavy_check_mark:    | :heavy_check_mark:   | :heavy_check_mark: |
| Python 3.9    | :heavy_check_mark:    | :heavy_check_mark:   | :heavy_check_mark: |
| Python 3.10   | :heavy_check_mark:    | :heavy_check_mark:   | :heavy_check_mark: |
| Python 3.11   | :heavy_check_mark:    | :heavy_check_mark:   | :heavy_check_mark: |
| Python 3.12   | :heavy_check_mark:    | :heavy_check_mark:   | :heavy_check_mark: |

**Note**: 
1. **Conda** is suggested for managing your Python environment. In some cases, using Python outside of a `conda` environment may result in missing header files, causing the installation failure of certain packages.
2. Please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.8 or higher, or use `conda`'s Python to install ``Qlib`` from source.

### Install with pip
Users can easily install ``Qlib`` by pip according to the following command.

```bash
  pip install pyqlib
```

**Note**: pip will install the latest stable qlib. However, the main branch of qlib is in active development. If you want to test the latest scripts or functions in the main branch. Please install qlib with the methods below.

### Install from source
Also, users can install the latest dev version ``Qlib`` by the source code according to the following steps:

* Before installing ``Qlib`` from source, users need to install some dependencies:

  ```bash
  pip install numpy
  pip install --upgrade cython
  ```

* Clone the repository and install ``Qlib`` as follows.
    ```bash
    git clone https://github.com/microsoft/qlib.git && cd qlib
    pip install .  # `pip install -e .[dev]` is recommended for development. check details in docs/developer/code_standard_and_dev_guide.rst
    ```

**Tips**: If you fail to install `Qlib` or run the examples in your environment,  comparing your steps and the [CI workflow](.github/workflows/test_qlib_from_source.yml) may help you find the problem.

**Tips for Mac**: If you are using Mac with M1, you might encounter issues in building the wheel for LightGBM, which is due to missing dependencies from OpenMP. To solve the problem, install openmp first with ``brew install libomp`` and then run ``pip install .`` to build it successfully. 

## Data Preparation
❗ Due to more restrict data security policy. The official dataset is disabled temporarily. You can try [this data source](https://github.com/chenditc/investment_data/releases) contributed by the community.
Here is an example to download the latest data.
```bash
wget https://github.com/chenditc/investment_data/releases/latest/download/qlib_bin.tar.gz
mkdir -p ~/.qlib/qlib_data/cn_data
tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=1
rm -f qlib_bin.tar.gz
```

The official dataset below will resume in short future.


----

Load and prepare data by running the following code:

### Get with module
  ```bash
  # get 1d data
  python -m qlib.cli.data qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn

  # get 1min data
  python -m qlib.cli.data qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min

  ```

### Get from source

  ```bash
  # get 1d data
  python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn

  # get 1min data
  python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min

  ```

This dataset is created by public data collected by [crawler scripts](scripts/data_collector/), which have been released in
the same repository.
Users could create the same dataset with it. [Description of dataset](https://github.com/microsoft/qlib/tree/main/scripts/data_collector#description-of-dataset)

*Please pay **ATTENTION** that the data is collected from [Yahoo Finance](https://finance.yahoo.com/lookup), and the data might not be perfect.
We recommend users to prepare their own data if they have a high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)*.

### Automatic update of daily frequency data (from yahoo finance)
  > This step is *Optional* if users only want to try their models and strategies on history data.
  > 
  > It is recommended that users update the data manually once (--trading_date 2021-05-25) and then set it to update automatically.
  >
  > **NOTE**: Users can't incrementally  update data based on the offline data provided by Qlib(some fields are removed to reduce the data size). Users should use [yahoo collector](https://github.com/microsoft/qlib/tree/main/scripts/data_collector/yahoo#automatic-update-of-daily-frequency-datafrom-yahoo-finance) to download Yahoo data from scratch and then incrementally update it.
  > 
  > For more information, please refer to: [yahoo collector](https://github.com/microsoft/qlib/tree/main/scripts/data_collector/yahoo#automatic-update-of-daily-frequency-datafrom-yahoo-finance)

  * Automatic update of data to the "qlib" directory each trading day(Linux)
      * use *crontab*: `crontab -e`
      * set up timed tasks:

        ```
        * * * * 1-5 python <script path> update_data_to_bin --qlib_data_1d_dir <user data dir>
        ```
        * **script path**: *scripts/data_collector/yahoo/collector.py*

  * Manual update of data
      ```
      python scripts/data_collector/yahoo/collector.py update_data_to_bin --qlib_data_1d_dir <user data dir> --trading_date <start date> --end_date <end date>
      ```
      * *trading_date*: start of trading day
      * *end_date*: end of trading day(not included)

### Checking the health of the data
  * We provide a script to check the health of the data, you can run the following commands to check whether the data is healthy or not.
    ```
    python scripts/check_data_health.py check_data --qlib_dir ~/.qlib/qlib_data/cn_data
    ```
  * Of course, you can also add some parameters to adjust the test results, such as this.
    ```
    python scripts/check_data_health.py check_data --qlib_dir ~/.qlib/qlib_data/cn_data --missing_data_num 30055 --large_step_threshold_volume 94485 --large_step_threshold_price 20
    ```
  * If you want more information about `check_data_health`, please refer to the [documentation](https://qlib.readthedocs.io/en/latest/component/data.html#checking-the-health-of-the-data).

<!-- 
- Run the initialization code and get stock data:

  ```python
  import qlib
  from qlib.data import D
  from qlib.constant import REG_CN

  # Initialization
  mount_path = "~/.qlib/qlib_data/cn_data"  # target_dir
  qlib.init(mount_path=mount_path, region=REG_CN)

  # Get stock data by Qlib
  # Load trading calendar with the given time range and frequency
  print(D.calendar(start_time='2010-01-01', end_time='2017-12-31', freq='day')[:2])

  # Parse a given market name into a stockpool config
  instruments = D.instruments('csi500')
  print(D.list_instruments(instruments=instruments, start_time='2010-01-01', end_time='2017-12-31', as_list=True)[:6])

  # Load features of certain instruments in given time range
  instruments = ['SH600000']
  fields = ['$close', '$volume', 'Ref($close, 1)', 'Mean($close, 3)', '$high-$low']
  print(D.features(instruments, fields, start_time='2010-01-01', end_time='2017-12-31', freq='day').head())
  ```
 -->

## Docker images
1. Pulling a docker image from a docker hub repository
    ```bash
    docker pull pyqlib/qlib_image_stable:stable
    ```
2. Start a new Docker container
    ```bash
    docker run -it --name <container name> -v <Mounted local directory>:/app pyqlib/qlib_image_stable:stable
    ```
3. At this point you are in the docker environment and can run the qlib scripts. An example:
    ```bash
    >>> python scripts/get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
    >>> python qlib/cli/run.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
    ```
4. Exit the container
    ```bash
    >>> exit
    ```
5. Restart the container
    ```bash
    docker start -i -a <container name>
    ```
6. Stop the container
    ```bash
    docker stop <container name>
    ```
7. Delete the container
    ```bash
    docker rm <container name>
    ```
8. If you want to know more information, please refer to the [documentation](https://qlib.readthedocs.io/en/latest/developer/how_to_build_image.html).

## Auto Quant Research Workflow
Qlib provides a tool named `qrun` to run the whole workflow automatically (including building dataset, training models, backtest and evaluation). You can start an auto quant research workflow and have a graphical reports analysis according to the following steps: 

1. Quant Research Workflow: Run  `qrun` with lightgbm workflow config ([workflow_config_lightgbm_Alpha158.yaml](examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml) as following.
    ```bash
      cd examples  # Avoid running program under the directory contains `qlib`
      qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
    ```
    If users want to use `qrun` under debug mode, please use the following command:
    ```bash
    python -m pdb qlib/cli/run.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
    ```
    The result of `qrun` is as follows, please refer to [docs](https://qlib.readthedocs.io/en/latest/component/strategy.html#result) for more explanations about the result. 

    ```bash

    'The following are analysis results of the excess return without cost.'
                           risk
    mean               0.000708
    std                0.005626
    annualized_return  0.178316
    information_ratio  1.996555
    max_drawdown      -0.081806
    'The following are analysis results of the excess return with cost.'
                           risk
    mean               0.000512
    std                0.005626
    annualized_return  0.128982
    information_ratio  1.444287
    max_drawdown      -0.091078
    ```
    Here are detailed documents for `qrun` and [workflow](https://qlib.readthedocs.io/en/latest/component/workflow.html).

2. Graphical Reports Analysis: First, run `python -m pip install .[analysis]` to install the required dependencies. Then run `examples/workflow_by_code.ipynb` with `jupyter notebook` to get graphical reports. 
    - Forecasting signal (model prediction) analysis
      - Cumulative Return of groups
      ![Cumulative Return](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_cumulative_return.png)
      - Return distribution
      ![long_short](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_long_short.png)
      - Information Coefficient (IC)
      ![Information Coefficient](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_IC.png)
      ![Monthly IC](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_monthly_IC.png)
      ![IC](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_NDQ.png)
      - Auto Correlation of forecasting signal (model prediction)
      ![Auto Correlation](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_auto_correlation.png)

    - Portfolio analysis
      - Backtest return
      ![Report](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/report.png)
      <!-- 
      - Score IC
      ![Score IC](docs/_static/img/score_ic.png)
      - Cumulative Return
      ![Cumulative Return](docs/_static/img/cumulative_return.png)
      - Risk Analysis
      ![Risk Analysis](docs/_static/img/risk_analysis.png)
      - Rank Label
      ![Rank Label](docs/_static/img/rank_label.png)
      -->
   - [Explanation](https://qlib.readthedocs.io/en/latest/component/report.html) of above results

## Building Customized Quant Research Workflow by Code
The automatic workflow may not suit the research workflow of all Quant researchers. To support a flexible Quant research workflow, Qlib also provides a modularized interface to allow researchers to build their own workflow by code. [Here](examples/workflow_by_code.ipynb) is a demo for customized Quant research workflow by code.

# Main Challenges & Solutions in Quant Research
Quant investment is a very unique scenario with lots of key challenges to be solved.
Currently, Qlib provides some solutions for several of them.

## Forecasting: Finding Valuable Signals/Patterns
Accurate forecasting of the stock price trend is a very important part to construct profitable portfolios.
However, huge amount of data with various formats in the financial market which make it challenging to build forecasting models.

An increasing number of SOTA Quant research works/papers, which focus on building forecasting models to mine valuable signals/patterns in complex financial data, are released in `Qlib`


### [Quant Model (Paper) Zoo](examples/benchmarks)

Here is a list of models built on `Qlib`.
- [GBDT based on XGBoost (Tianqi Chen, et al. KDD 2016)](examples/benchmarks/XGBoost/)
- [GBDT based on LightGBM (Guolin Ke, et al. NIPS 2017)](examples/benchmarks/LightGBM/)
- [GBDT based on Catboost (Liudmila Prokhorenkova, et al. NIPS 2018)](examples/benchmarks/CatBoost/)
- [MLP based on pytorch](examples/benchmarks/MLP/)
- [LSTM based on pytorch (Sepp Hochreiter, et al. Neural computation 1997)](examples/benchmarks/LSTM/)
- [GRU based on pytorch (Kyunghyun Cho, et al. 2014)](examples/benchmarks/GRU/)
- [ALSTM based on pytorch (Yao Qin, et al. IJCAI 2017)](examples/benchmarks/ALSTM)
- [GATs based on pytorch (Petar Velickovic, et al. 2017)](examples/benchmarks/GATs/)
- [SFM based on pytorch (Liheng Zhang, et al. KDD 2017)](examples/benchmarks/SFM/)
- [TFT based on tensorflow (Bryan Lim, et al. International Journal of Forecasting 2019)](examples/benchmarks/TFT/)
- [TabNet based on pytorch (Sercan O. Arik, et al. AAAI 2019)](examples/benchmarks/TabNet/)
- [DoubleEnsemble based on LightGBM (Chuheng Zhang, et al. ICDM 2020)](examples/benchmarks/DoubleEnsemble/)
- [TCTS based on pytorch (Xueqing Wu, et al. ICML 2021)](examples/benchmarks/TCTS/)
- [Transformer based on pytorch (Ashish Vaswani, et al. NeurIPS 2017)](examples/benchmarks/Transformer/)
- [Localformer based on pytorch (Juyong Jiang, et al.)](examples/benchmarks/Localformer/)
- [TRA based on pytorch (Hengxu, Dong, et al. KDD 2021)](examples/benchmarks/TRA/)
- [TCN based on pytorch (Shaojie Bai, et al. 2018)](examples/benchmarks/TCN/)
- [ADARNN based on pytorch (YunTao Du, et al. 2021)](examples/benchmarks/ADARNN/)
- [ADD based on pytorch (Hongshun Tang, et al.2020)](examples/benchmarks/ADD/)
- [IGMTF based on pytorch (Wentao Xu, et al.2021)](examples/benchmarks/IGMTF/)
- [HIST based on pytorch (Wentao Xu, et al.2021)](examples/benchmarks/HIST/)
- [KRNN based on pytorch](examples/benchmarks/KRNN/)
- [Sandwich based on pytorch](examples/benchmarks/Sandwich/)

Your PR of new Quant models is highly welcomed.

The performance of each model on the `Alpha158` and `Alpha360` datasets can be found [here](examples/benchmarks/README.md).

### Run a single model
All the models listed above are runnable with ``Qlib``. Users can find the config files we provide and some details about the model through the [benchmarks](examples/benchmarks) folder. More information can be retrieved at the model files listed above.

`Qlib` provides three different ways to run a single model, users can pick the one that fits their cases best:
- Users can use the tool `qrun` mentioned above to run a model's workflow based from a config file.
- Users can create a `workflow_by_code` python script based on the [one](examples/workflow_by_code.py) listed in the `examples` folder.

- Users can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py run --models=lightgbm`, where the `--models` arguments can take any number of models listed above(the available models can be found  in [benchmarks](examples/benchmarks/)). For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
    - **NOTE**: Each baseline has different environment dependencies, please make sure that your python version aligns with the requirements(e.g. TFT only supports Python 3.6~3.7 due to the limitation of `tensorflow==1.15.0`)

### Run multiple models
`Qlib` also provides a script [`run_all_model.py`](examples/run_all_model.py) which can run multiple models for several iterations. (**Note**: the script only support *Linux* for now. Other OS will be supported in the future. Besides, it doesn't support parallel running the same model for multiple times as well, and this will be fixed in the future development too.)

The script will create a unique virtual environment for each model, and delete the environments after training. Thus, only experiment results such as `IC` and `backtest` results will be generated and stored.

Here is an example of running all the models for 10 iterations:
```python
python run_all_model.py run 10
```

It also provides the API to run specific models at once. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py). 

### Break change
In `pandas`, `group_key` is one of the parameters of the `groupby` method. From version 1.5 to 2.0 of `pandas`, the default value of `group_key` has been changed from `no default` to `True`, which will cause qlib to report an error during operation. So we set `group_key=False`, but it doesn't guarantee that some programmes will run correctly, including:
* qlib\examples\rl_order_execution\scripts\gen_training_orders.py
* qlib\examples\benchmarks\TRA\src\dataset.MTSDatasetH.py
* qlib\examples\benchmarks\TFT\tft.py



## [Adapting to Market Dynamics](examples/benchmarks_dynamic)

Due to the non-stationary nature of the environment of the financial market, the data distribution may change in different periods, which makes the performance of models build on training data decays in the future test data.
So adapting the forecasting models/strategies to market dynamics is very important to the model/strategies' performance.

Here is a list of solutions built on `Qlib`.
- [Rolling Retraining](examples/benchmarks_dynamic/baseline/)
- [DDG-DA on pytorch (Wendi, et al. AAAI 2022)](examples/benchmarks_dynamic/DDG-DA/)

##  Reinforcement Learning: modeling continuous decisions
Qlib now supports reinforcement learning, a feature designed to model continuous investment decisions. This functionality assists investors in optimizing their trading strategies by learning from interactions with the environment to maximize some notion of cumulative reward.

Here is a list of solutions built on `Qlib` categorized by scenarios.

### [RL for order execution](examples/rl_order_execution)
[Here](https://qlib.readthedocs.io/en/latest/component/rl/overall.html#order-execution) is the introduction of this scenario.  All the methods below are compared [here](examples/rl_order_execution).
- [TWAP](examples/rl_order_execution/exp_configs/backtest_twap.yml)
- [PPO: "An End-to-End Optimal Trade Execution Framework based on Proximal Policy Optimization", IJCAL 2020](examples/rl_order_execution/exp_configs/backtest_ppo.yml)
- [OPDS: "Universal Trading for Order Execution with Oracle Policy Distillation", AAAI 2021](examples/rl_order_execution/exp_configs/backtest_opds.yml)

# Quant Dataset Zoo
Dataset plays a very important role in Quant. Here is a list of the datasets built on `Qlib`:

| Dataset                                    | US Market | China Market |
| --                                         | --        | --           |
| [Alpha360](./qlib/contrib/data/handler.py) |  √        |  √           |
| [Alpha158](./qlib/contrib/data/handler.py) |  √        |  √           |

[Here](https://qlib.readthedocs.io/en/latest/advanced/alpha.html) is a tutorial to build dataset with `Qlib`.
Your PR to build new Quant dataset is highly welcomed.


# Learning Framework
Qlib is high customizable and a lot of its components are learnable.
The learnable components are instances of `Forecast Model` and `Trading Agent`. They are learned based on the `Learning Framework` layer and then applied to multiple scenarios in `Workflow` layer.
The learning framework leverages the `Workflow` layer as well(e.g. sharing `Information Extractor`, creating environments based on `Execution Env`).

Based on learning paradigms, they can be categorized into reinforcement learning and supervised learning.
- For supervised learning, the detailed docs can be found [here](https://qlib.readthedocs.io/en/latest/component/model.html).
- For reinforcement learning, the detailed docs can be found [here](https://qlib.readthedocs.io/en/latest/component/rl.html). Qlib's RL learning framework leverages `Execution Env` in `Workflow` layer to create environments.  It's worth noting that `NestedExecutor` is supported as well. This empowers users to optimize different level of strategies/models/agents together (e.g. optimizing an order execution strategy for a specific portfolio management strategy).


# More About Qlib
If you want to have a quick glance at the most frequently used components of qlib, you can try notebooks [here](examples/tutorial/).

The detailed documents are organized in [docs](docs/).
[Sphinx](http://www.sphinx-doc.org) and the readthedocs theme is required to build the documentation in html formats. 
```bash
cd docs/
conda install sphinx sphinx_rtd_theme -y
# Otherwise, you can install them with pip
# pip install sphinx sphinx_rtd_theme
make html
```
You can also view the [latest document](http://qlib.readthedocs.io/) online directly.

Qlib is in active and continuing development. Our plan is in the roadmap, which is managed as a [github project](https://github.com/microsoft/qlib/projects/1).



# Offline Mode and Online Mode
The data server of Qlib can either deployed as `Offline` mode or `Online` mode. The default mode is offline mode.

Under `Offline` mode, the data will be deployed locally. 

Under `Online` mode, the data will be deployed as a shared data service. The data and their cache will be shared by all the clients. The data retrieval performance is expected to be improved due to a higher rate of cache hits. It will consume less disk space, too. The documents of the online mode can be found in [Qlib-Server](https://qlib-server.readthedocs.io/). The online mode can be deployed automatically with [Azure CLI based scripts](https://qlib-server.readthedocs.io/en/latest/build.html#one-click-deployment-in-azure). The source code of online data server can be found in [Qlib-Server repository](https://github.com/microsoft/qlib-server).

## Performance of Qlib Data Server
The performance of data processing is important to data-driven methods like AI technologies. As an AI-oriented platform, Qlib provides a solution for data storage and data processing. To demonstrate the performance of Qlib data server, we
compare it with several other data storage solutions. 

We evaluate the performance of several storage solutions by finishing the same task,
which creates a dataset (14 features/factors) from the basic OHLCV daily data of a stock market (800 stocks each day from 2007 to 2020). The task involves data queries and processing.

|                         | HDF5      | MySQL     | MongoDB   | InfluxDB  | Qlib -E -D  | Qlib +E -D   | Qlib +E +D  |
| --                      | ------    | ------    | --------  | --------- | ----------- | ------------ | ----------- |
| Total (1CPU) (seconds)  | 184.4±3.7 | 365.3±7.5 | 253.6±6.7 | 368.2±3.6 | 147.0±8.8   | 47.6±1.0     | **7.4±0.3** |
| Total (64CPU) (seconds) |           |           |           |           | 8.8±0.6     | **4.2±0.2**  |             |
* `+(-)E` indicates with (out) `ExpressionCache`
* `+(-)D` indicates with (out) `DatasetCache`

Most general-purpose databases take too much time to load data. After looking into the underlying implementation, we find that data go through too many layers of interfaces and unnecessary format transformations in general-purpose database solutions.
Such overheads greatly slow down the data loading process.
Qlib data are stored in a compact format, which is efficient to be combined into arrays for scientific computation.

# Related Reports
- [Guide To Qlib: Microsoft’s AI Investment Platform](https://analyticsindiamag.com/qlib/)
- [微软也搞AI量化平台?还是开源的!](https://mp.weixin.qq.com/s/47bP5YwxfTp2uTHjUBzJQQ)
- [微矿Qlib:业内首个AI量化投资开源平台](https://mp.weixin.qq.com/s/vsJv7lsgjEi-ALYUz4CvtQ)

# Contact Us
- If you have any issues, please create issue [here](https://github.com/microsoft/qlib/issues/new/choose) or send messages in [gitter](https://gitter.im/Microsoft/qlib).
- If you want to make contributions to `Qlib`, please [create pull requests](https://github.com/microsoft/qlib/compare). 
- For other reasons, you are welcome to contact us by email([qlib@microsoft.com](mailto:qlib@microsoft.com)).
  - We are recruiting new members(both FTEs and interns), your resumes are welcome!

Join IM discussion groups:
|[Gitter](https://gitter.im/Microsoft/qlib)|
|----|
|![image](https://github.com/microsoft/qlib/blob/main/docs/_static/img/qrcode/gitter_qr.png)|

# Contributing
We appreciate all contributions and thank all the contributors!
<a href="https://github.com/microsoft/qlib/graphs/contributors"><img src="https://contrib.rocks/image?repo=microsoft/qlib" /></a>

Before we released Qlib as an open-source project on Github in Sep 2020, Qlib is an internal project in our group. Unfortunately, the internal commit history is not kept. A lot of members in our group have also contributed a lot to Qlib, which includes Ruihua Wang, Yinda Zhang, Haisu Yu, Shuyu Wang, Bochen Pang, and [Dong Zhou](https://github.com/evanzd/evanzd). Especially thanks to [Dong Zhou](https://github.com/evanzd/evanzd) due to his initial version of Qlib.

## Guidance

This project welcomes contributions and suggestions.  
**Here are some 
[code standards and development guidance](docs/developer/code_standard_and_dev_guide.rst) for submiting a pull request.**

Making contributions is not a hard thing. Solving an issue(maybe just answering a question raised in [issues list](https://github.com/microsoft/qlib/issues) or [gitter](https://gitter.im/Microsoft/qlib)), fixing/issuing a bug, improving the documents and even fixing a typo are important contributions to Qlib.

For example, if you want to contribute to Qlib's document/code, you can follow the steps in the figure below.
<p align="center">
  <img src="https://github.com/demon143/qlib/blob/main/docs/_static/img/change%20doc.gif" />
</p>

If you don't know how to start to contribute, you can refer to the following examples.
| Type | Examples |
| -- | -- |
| Solving issues | [Answer a question](https://github.com/microsoft/qlib/issues/749);  [issuing](https://github.com/microsoft/qlib/issues/765) or [fixing](https://github.com/microsoft/qlib/pull/792) a bug |
| Docs | [Improve docs quality](https://github.com/microsoft/qlib/pull/797/files) ;  [Fix a typo](https://github.com/microsoft/qlib/pull/774) | 
| Feature |  Implement a [requested feature](https://github.com/microsoft/qlib/projects) like [this](https://github.com/microsoft/qlib/pull/754); [Refactor interfaces](https://github.com/microsoft/qlib/pull/539/files) |
| Dataset | [Add a dataset](https://github.com/microsoft/qlib/pull/733) | 
| Models |  [Implement a new model](https://github.com/microsoft/qlib/pull/689), [some instructions to contribute models](https://github.com/microsoft/qlib/tree/main/examples/benchmarks#contributing) |

[Good first issues](https://github.com/microsoft/qlib/labels/good%20first%20issue) are labelled to indicate that they are easy to start your contributions.

You can find some impefect implementation in Qlib by  `rg 'TODO|FIXME' qlib`
 
If you would like to become one of Qlib's maintainers to contribute more (e.g. help merge PR, triage issues), please contact us by email([qlib@microsoft.com](mailto:qlib@microsoft.com)).  We are glad to help to upgrade your permission.

## License
Most contributions require you to agree to a
Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
the right to use your contribution. For details, visit https://cla.opensource.microsoft.com.

When you submit a pull request, a CLA bot will automatically determine whether you need to provide
a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
provided by the bot. You will only need to do this once across all repos using our CLA.

This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.


## /SECURITY.md

<!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->

## Security

Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).

If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.

## Reporting Security Issues

**Please do not report security vulnerabilities through public GitHub issues.**

Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).

If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).

You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 

Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:

  * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
  * Full paths of source file(s) related to the manifestation of the issue
  * The location of the affected source code (tag/branch/commit or direct URL)
  * Any special configuration required to reproduce the issue
  * Step-by-step instructions to reproduce the issue
  * Proof-of-concept or exploit code (if possible)
  * Impact of the issue, including how an attacker might exploit the issue

This information will help us triage your report more quickly.

If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.

## Preferred Languages

We prefer all communications to be in English.

## Policy

Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).

<!-- END MICROSOFT SECURITY.MD BLOCK -->

## /build_docker_image.sh

```sh path="/build_docker_image.sh" 
#!/bin/bash

docker_user="your_dockerhub_username"

read -p "Do you want to build the nightly version of the qlib image? (default is stable) (yes/no): " answer;
answer=$(echo "$answer" | tr '[:upper:]' '[:lower:]')

if [ "$answer" = "yes" ]; then
    # Build the nightly version of the qlib image
    docker build --build-arg IS_STABLE=no -t qlib_image -f ./Dockerfile .
    image_tag="nightly"
else
    # Build the stable version of the qlib image
    docker build -t qlib_image -f ./Dockerfile .
    image_tag="stable"
fi

read -p "Is it uploaded to docker hub? (default is no) (yes/no): " answer;
answer=$(echo "$answer" | tr '[:upper:]' '[:lower:]')

if [ "$answer" = "yes" ]; then
    # Log in to Docker Hub
    # If you are a new docker hub user, please verify your email address before proceeding with this step.
    docker login
    # Tag the Docker image
    docker tag qlib_image "$docker_user/qlib_image:$image_tag"
    # Push the Docker image to Docker Hub
    docker push "$docker_user/qlib_image:$image_tag"
else
    echo "Not uploaded to docker hub."
fi

```

## /docs/FAQ/FAQ.rst

```rst path="/docs/FAQ/FAQ.rst" 

Qlib FAQ
############

Qlib Frequently Asked Questions
===============================
.. contents::
    :depth: 1
    :local:
    :backlinks: none

------


1. RuntimeError: An attempt has been made to start a new process before the current process has finished its bootstrapping phase...
-----------------------------------------------------------------------------------------------------------------------------------

.. code-block:: console

    RuntimeError:
            An attempt has been made to start a new process before the
            current process has finished its bootstrapping phase.

            This probably means that you are not using fork to start your
            child processes and you have forgotten to use the proper idiom
            in the main module:

                if __name__ == '__main__':
                    freeze_support()
                    ...

            The "freeze_support()" line can be omitted if the program
            is not going to be frozen to produce an executable.

This is caused by the limitation of multiprocessing under windows OS. Please refer to `here <https://stackoverflow.com/a/24374798>`_ for more info.

**Solution**: To select a start method you use the ``D.features`` in the if __name__ == '__main__' clause of the main module. For example:

.. code-block:: python

    import qlib
    from qlib.data import D


    if __name__ == "__main__":
        qlib.init()
        instruments = ["SH600000"]
        fields = ["$close", "$change"]
        df = D.features(instruments, fields, start_time='2010-01-01', end_time='2012-12-31')
        print(df.head())



2. qlib.data.cache.QlibCacheException: It sees the key(...) of the redis lock has existed in your redis db now.
---------------------------------------------------------------------------------------------------------------

It sees the key of the redis lock has existed in your redis db now. You can use the following command to clear your redis keys and rerun your commands

.. code-block:: console

    $ redis-cli
    > select 1
    > flushdb

If the issue is not resolved, use ``keys *`` to find if multiple keys exist. If so, try using ``flushall`` to clear all the keys.

.. note::

    ``qlib.config.redis_task_db`` defaults is ``1``, users can use ``qlib.init(redis_task_db=<other_db>)`` settings.


Also, feel free to post a new issue in our GitHub repository. We always check each issue carefully and try our best to solve them.

3. ModuleNotFoundError: No module named 'qlib.data._libs.rolling'
-----------------------------------------------------------------

.. code-block:: python

    #### Do not import qlib package in the repository directory in case of importing qlib from . without compiling #####
    Traceback (most recent call last):
    File "<stdin>", line 1, in <module>
    File "qlib/qlib/__init__.py", line 19, in init
        from .data.cache import H
    File "qlib/qlib/data/__init__.py", line 8, in <module>
        from .data import (
    File "qlib/qlib/data/data.py", line 20, in <module>
        from .cache import H
    File "qlib/qlib/data/cache.py", line 36, in <module>
        from .ops import Operators
    File "qlib/qlib/data/ops.py", line 19, in <module>
        from ._libs.rolling import rolling_slope, rolling_rsquare, rolling_resi
    ModuleNotFoundError: No module named 'qlib.data._libs.rolling'

- If the error occurs when importing ``qlib`` package with ``PyCharm`` IDE, users can execute the following command in the project root folder to compile Cython files and generate executable files:

    .. code-block:: bash

        python setup.py build_ext --inplace

- If the error occurs when importing ``qlib`` package with command ``python`` , users need to change the running directory to ensure that the script does not run in the project directory.


4. BadNamespaceError: / is not a connected namespace
----------------------------------------------------

.. code-block:: python

      File "qlib_online.py", line 35, in <module>
        cal = D.calendar()
      File "e:\code\python\microsoft\qlib_latest\qlib\qlib\data\data.py", line 973, in calendar
        return Cal.calendar(start_time, end_time, freq, future=future)
      File "e:\code\python\microsoft\qlib_latest\qlib\qlib\data\data.py", line 798, in calendar
        self.conn.send_request(
      File "e:\code\python\microsoft\qlib_latest\qlib\qlib\data\client.py", line 101, in send_request
        self.sio.emit(request_type + "_request", request_content)
      File "G:\apps\miniconda\envs\qlib\lib\site-packages\python_socketio-5.3.0-py3.8.egg\socketio\client.py", line 369, in emit
        raise exceptions.BadNamespaceError(
      BadNamespaceError: / is not a connected namespace.

- The version of ``python-socketio`` in qlib needs to be the same as the version of ``python-socketio`` in qlib-server:

    .. code-block:: bash

        pip install -U python-socketio==<qlib-server python-socketio version>


5. TypeError: send() got an unexpected keyword argument 'binary'
----------------------------------------------------------------

.. code-block:: python

      File "qlib_online.py", line 35, in <module>
        cal = D.calendar()
      File "e:\code\python\microsoft\qlib_latest\qlib\qlib\data\data.py", line 973, in calendar
        return Cal.calendar(start_time, end_time, freq, future=future)
      File "e:\code\python\microsoft\qlib_latest\qlib\qlib\data\data.py", line 798, in calendar
        self.conn.send_request(
      File "e:\code\python\microsoft\qlib_latest\qlib\qlib\data\client.py", line 101, in send_request
        self.sio.emit(request_type + "_request", request_content)
      File "G:\apps\miniconda\envs\qlib\lib\site-packages\socketio\client.py", line 263, in emit
        self._send_packet(packet.Packet(packet.EVENT, namespace=namespace,
      File "G:\apps\miniconda\envs\qlib\lib\site-packages\socketio\client.py", line 339, in _send_packet
        self.eio.send(ep, binary=binary)
      TypeError: send() got an unexpected keyword argument 'binary'


- The ``python-engineio`` version needs to be compatible with the ``python-socketio`` version, reference: https://github.com/miguelgrinberg/python-socketio#version-compatibility

    .. code-block:: bash

        pip install -U python-engineio==<compatible python-socketio version>
        # or
        pip install -U python-socketio==3.1.2 python-engineio==3.13.2

```

## /docs/Makefile

``` path="/docs/Makefile" 
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS    =
SPHINXBUILD   = python3 -msphinx
SPHINXPROJ    = Quantlab
SOURCEDIR     = .
BUILDDIR      = _build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	pip install -r requirements.txt
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

```

## /docs/_static/demo.sh

```sh path="/docs/_static/demo.sh" 
#!/bin/sh
git clone https://github.com/microsoft/qlib.git
cd qlib
ls
pip install pyqlib
# or
# pip install numpy
# pip install --upgrade cython
# python setup.py install
cd examples
ls
qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
```

## /docs/_static/img/QlibRL_framework.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/QlibRL_framework.png

## /docs/_static/img/RL_framework.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/RL_framework.png

## /docs/_static/img/analysis/analysis_model_IC.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/analysis_model_IC.png

## /docs/_static/img/analysis/analysis_model_NDQ.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/analysis_model_NDQ.png

## /docs/_static/img/analysis/analysis_model_auto_correlation.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/analysis_model_auto_correlation.png

## /docs/_static/img/analysis/analysis_model_cumulative_return.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/analysis_model_cumulative_return.png

## /docs/_static/img/analysis/analysis_model_long_short.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/analysis_model_long_short.png

## /docs/_static/img/analysis/analysis_model_monthly_IC.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/analysis_model_monthly_IC.png

## /docs/_static/img/analysis/cumulative_return_buy.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/cumulative_return_buy.png

## /docs/_static/img/analysis/cumulative_return_buy_minus_sell.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/cumulative_return_buy_minus_sell.png

## /docs/_static/img/analysis/cumulative_return_hold.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/cumulative_return_hold.png

## /docs/_static/img/analysis/cumulative_return_sell.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/cumulative_return_sell.png

## /docs/_static/img/analysis/rank_label_buy.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/rank_label_buy.png

## /docs/_static/img/analysis/rank_label_hold.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/rank_label_hold.png

## /docs/_static/img/analysis/rank_label_sell.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/rank_label_sell.png

## /docs/_static/img/analysis/report.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/report.png

## /docs/_static/img/analysis/risk_analysis_annualized_return.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/risk_analysis_annualized_return.png

## /docs/_static/img/analysis/risk_analysis_bar.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/risk_analysis_bar.png

## /docs/_static/img/analysis/risk_analysis_information_ratio.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/risk_analysis_information_ratio.png

## /docs/_static/img/analysis/risk_analysis_max_drawdown.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/risk_analysis_max_drawdown.png

## /docs/_static/img/analysis/risk_analysis_std.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/risk_analysis_std.png

## /docs/_static/img/analysis/score_ic.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/analysis/score_ic.png

## /docs/_static/img/change doc.gif

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/change doc.gif

## /docs/_static/img/framework-abstract.jpg

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/framework-abstract.jpg

## /docs/_static/img/framework.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/framework.png

## /docs/_static/img/logo/1.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/logo/1.png

## /docs/_static/img/logo/2.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/logo/2.png

## /docs/_static/img/logo/3.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/logo/3.png

## /docs/_static/img/logo/white_bg_rec+word.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/logo/white_bg_rec+word.png

## /docs/_static/img/logo/yel_bg_rec+word.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/logo/yel_bg_rec+word.png

## /docs/_static/img/logo/yellow_bg_rec+word .png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/logo/yellow_bg_rec+word .png

## /docs/_static/img/logo/yellow_bg_rec.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/logo/yellow_bg_rec.png

## /docs/_static/img/online_serving.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/online_serving.png

## /docs/_static/img/qrcode/gitter_qr.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/qrcode/gitter_qr.png

## /docs/_static/img/rdagent_logo.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/rdagent_logo.png

## /docs/_static/img/topk_drop.png

Binary file available at https://raw.githubusercontent.com/microsoft/qlib/refs/heads/main/docs/_static/img/topk_drop.png

## /docs/advanced/PIT.rst

```rst path="/docs/advanced/PIT.rst" 
.. _pit:

============================
(P)oint-(I)n-(T)ime Database
============================
.. currentmodule:: qlib


Introduction
------------
Point-in-time data is a very important consideration when performing any sort of historical market analysis.

For example, let’s say we are backtesting a trading strategy and we are using the past five years of historical data as our input.
Our model is assumed to trade once a day, at the market close, and we’ll say we are calculating the trading signal for 1 January 2020 in our backtest. At that point, we should only have data for 1 January 2020, 31 December 2019, 30 December 2019 etc.

In financial data (especially financial reports), the same piece of data may be amended for multiple times overtime.  If we only use the latest version for historical backtesting, data leakage will happen.
Point-in-time database is designed for solving this problem to make sure user get the right version of data at any historical timestamp. It will keep the performance of online trading and historical backtesting the same.



Data Preparation
----------------

Qlib provides a crawler to help users to download financial data and then a converter to dump the data in Qlib format.
Please follow `scripts/data_collector/pit/README.md <https://github.com/microsoft/qlib/tree/main/scripts/data_collector/pit/>`_ to download and convert data.
Besides, you can find some additional usage examples there.


File-based design for PIT data
------------------------------

Qlib provides a file-based storage for PIT data.

For each feature, it contains 4 columns, i.e. date, period, value, _next.
Each row corresponds to a statement.

The meaning of each feature with filename like `XXX_a.data`:

- `date`: the statement's date of publication.
- `period`: the period of the statement. (e.g. it will be quarterly frequency in most of the markets)
    - If it is an annual period, it will be an integer corresponding to the year
    - If it is an quarterly  periods, it will be an integer like `<year><index of quarter>`.  The last two decimal digits represents the index of quarter. Others represent the year.
- `value`: the described value
- `_next`: the byte index of the next occurance of the field.

Besides the feature data, an index `XXX_a.index` is included to speed up the querying performance

The statements are soted by the `date` in ascending order from the beginning of the file.

.. code-block:: python

    # the data format from XXXX.data
    array([(20070428, 200701, 0.090219  , 4294967295),
           (20070817, 200702, 0.13933   , 4294967295),
           (20071023, 200703, 0.24586301, 4294967295),
           (20080301, 200704, 0.3479    ,         80),
           (20080313, 200704, 0.395989  , 4294967295),
           (20080422, 200801, 0.100724  , 4294967295),
           (20080828, 200802, 0.24996801, 4294967295),
           (20081027, 200803, 0.33412001, 4294967295),
           (20090325, 200804, 0.39011699, 4294967295),
           (20090421, 200901, 0.102675  , 4294967295),
           (20090807, 200902, 0.230712  , 4294967295),
           (20091024, 200903, 0.30072999, 4294967295),
           (20100402, 200904, 0.33546099, 4294967295),
           (20100426, 201001, 0.083825  , 4294967295),
           (20100812, 201002, 0.200545  , 4294967295),
           (20101029, 201003, 0.260986  , 4294967295),
           (20110321, 201004, 0.30739301, 4294967295),
           (20110423, 201101, 0.097411  , 4294967295),
           (20110831, 201102, 0.24825101, 4294967295),
           (20111018, 201103, 0.318919  , 4294967295),
           (20120323, 201104, 0.4039    ,        420),
           (20120411, 201104, 0.403925  , 4294967295),
           (20120426, 201201, 0.112148  , 4294967295),
           (20120810, 201202, 0.26484701, 4294967295),
           (20121026, 201203, 0.370487  , 4294967295),
           (20130329, 201204, 0.45004699, 4294967295),
           (20130418, 201301, 0.099958  , 4294967295),
           (20130831, 201302, 0.21044201, 4294967295),
           (20131016, 201303, 0.30454299, 4294967295),
           (20140325, 201304, 0.394328  , 4294967295),
           (20140425, 201401, 0.083217  , 4294967295),
           (20140829, 201402, 0.16450299, 4294967295),
           (20141030, 201403, 0.23408499, 4294967295),
           (20150421, 201404, 0.319612  , 4294967295),
           (20150421, 201501, 0.078494  , 4294967295),
           (20150828, 201502, 0.137504  , 4294967295),
           (20151023, 201503, 0.201709  , 4294967295),
           (20160324, 201504, 0.26420501, 4294967295),
           (20160421, 201601, 0.073664  , 4294967295),
           (20160827, 201602, 0.136576  , 4294967295),
           (20161029, 201603, 0.188062  , 4294967295),
           (20170415, 201604, 0.244385  , 4294967295),
           (20170425, 201701, 0.080614  , 4294967295),
           (20170728, 201702, 0.15151   , 4294967295),
           (20171026, 201703, 0.25416601, 4294967295),
           (20180328, 201704, 0.32954201, 4294967295),
           (20180428, 201801, 0.088887  , 4294967295),
           (20180802, 201802, 0.170563  , 4294967295),
           (20181029, 201803, 0.25522   , 4294967295),
           (20190329, 201804, 0.34464401, 4294967295),
           (20190425, 201901, 0.094737  , 4294967295),
           (20190713, 201902, 0.        ,       1040),
           (20190718, 201902, 0.175322  , 4294967295),
           (20191016, 201903, 0.25581899, 4294967295)],
          dtype=[('date', '<u4'), ('period', '<u4'), ('value', '<f8'), ('_next', '<u4')])
    # - each row contains 20 byte


    # The data format from XXXX.index.  It consists of two parts
    # 1) the start index of the data. So the first part of the info will be like
    2007
    # 2) the remain index data will be like information below
    #    - The data indicate the **byte index** of first data update of a period.
    #    - e.g. Because the info at both byte 80 and 100 corresponds to 200704. The byte index of first occurance (i.e. 100) is recorded in the data.
    array([         0,         20,         40,         60,        100,
                  120,        140,        160,        180,        200,
                  220,        240,        260,        280,        300,
                  320,        340,        360,        380,        400,
                  440,        460,        480,        500,        520,
                  540,        560,        580,        600,        620,
                  640,        660,        680,        700,        720,
                  740,        760,        780,        800,        820,
                  840,        860,        880,        900,        920,
                  940,        960,        980,       1000,       1020,
                 1060, 4294967295], dtype=uint32)




Known limitations:

- Currently, the PIT database is designed for quarterly or annually factors, which can handle fundamental data of financial reports in most markets.
- Qlib leverage the file name to identify the type of the data. File with name like `XXX_q.data` corresponds to quarterly data. File with name like `XXX_a.data` corresponds to annual data.
- The caclulation of PIT is not performed in the optimal way. There is great potential to boost the performance of PIT data calcuation.

```

## /docs/advanced/alpha.rst

```rst path="/docs/advanced/alpha.rst" 
.. _alpha:

=========================
Building Formulaic Alphas
=========================
.. currentmodule:: qlib

Introduction
============

In quantitative trading practice, designing novel factors that can explain and predict future asset returns are of vital importance to the profitability of a strategy. Such factors are usually called alpha factors, or alphas in short.


A formulaic alpha, as the name suggests, is a kind of alpha that can be presented as a formula or a mathematical expression.


Building Formulaic Alphas in ``Qlib``
=====================================

In ``Qlib``, users can easily build formulaic alphas.

Example
-------

`MACD`, short for moving average convergence/divergence, is a formulaic alpha used in technical analysis of stock prices. It is designed to reveal changes in the strength, direction, momentum, and duration of a trend in a stock's price.

`MACD` can be presented as the following formula:

.. math::

    MACD = 2\times (DIF-DEA)

.. note::

    `DIF` means Differential value, which is 12-period EMA minus 26-period EMA.

    .. math::

        DIF = \frac{EMA(CLOSE, 12) - EMA(CLOSE, 26)}{CLOSE}

    `DEA` means a 9-period EMA of the DIF.

    .. math::

        DEA = EMA(DIF, 9)

Users can use ``Data Handler`` to build formulaic alphas `MACD` in qlib:

.. note:: Users need to initialize ``Qlib`` with `qlib.init` first.  Please refer to `initialization <../start/initialization.html>`_.

.. code-block:: python

    >> from qlib.data.dataset.loader import QlibDataLoader
    >> MACD_EXP = '2 * ((EMA($close, 12) - EMA($close, 26))/$close - EMA((EMA($close, 12) - EMA($close, 26))/$close, 9))'
    >> fields = [MACD_EXP] # MACD
    >> names = ['MACD']
    >> labels = ['Ref($close, -2)/Ref($close, -1) - 1'] # label
    >> label_names = ['LABEL']
    >> data_loader_config = {
    ..     "feature": (fields, names),
    ..     "label": (labels, label_names)
    .. }
    >> data_loader = QlibDataLoader(config=data_loader_config)
    >> df = data_loader.load(instruments='csi300', start_time='2010-01-01', end_time='2017-12-31')
    >> print(df)
                            feature     label
                               MACD     LABEL
    datetime   instrument
    2010-01-04 SH600000    0.008781 -0.019672
               SH600004    0.006699 -0.014721
               SH600006    0.005714  0.002911
               SH600008    0.000798  0.009818
               SH600009    0.017015 -0.017758
    ...                         ...       ...
    2017-12-29 SZ300124    0.015071 -0.005074
               SZ300136   -0.015466  0.056352
               SZ300144    0.013082  0.011853
               SZ300251   -0.001026  0.021739
               SZ300315   -0.007559  0.012455

Reference
=========

To learn more about ``Data Loader``, please refer to `Data Loader <../component/data.html#data-loader>`_

To learn more about ``Data API``, please refer to `Data API <../component/data.html>`_

```

## /docs/advanced/serial.rst

```rst path="/docs/advanced/serial.rst" 
.. _serial:

=============
Serialization
=============
.. currentmodule:: qlib

Introduction
============
``Qlib`` supports dumping the state of ``DataHandler``, ``DataSet``, ``Processor`` and ``Model``, etc. into a disk and reloading them.

Serializable Class
==================

``Qlib`` provides a base class ``qlib.utils.serial.Serializable``, whose state can be dumped into or loaded from disk in `pickle` format.
When users dump the state of a ``Serializable`` instance, the attributes of the instance whose name **does not** start with `_` will be saved on the disk.
However, users can use ``config`` method or override ``default_dump_all`` attribute to prevent this feature.

Users can also override ``pickle_backend`` attribute to choose a pickle backend. The supported value is "pickle" (default and common) and "dill" (dump more things such as function, more information in `here <https://pypi.org/project/dill/>`_).

Example
=======
``Qlib``'s serializable class includes  ``DataHandler``, ``DataSet``, ``Processor`` and ``Model``, etc., which are subclass of  ``qlib.utils.serial.Serializable``.
Specifically, ``qlib.data.dataset.DatasetH`` is one of them. Users can serialize ``DatasetH`` as follows.

.. code-block:: Python

    ##=============dump dataset=============
    dataset.to_pickle(path="dataset.pkl") # dataset is an instance of qlib.data.dataset.DatasetH

    ##=============reload dataset=============
    with open("dataset.pkl", "rb") as file_dataset:
        dataset = pickle.load(file_dataset)

.. note::
    Only state of ``DatasetH`` should be saved on the disk, such as some `mean` and `variance` used for data normalization, etc.

    After reloading the ``DatasetH``, users need to reinitialize it. It means that users can reset some states of ``DatasetH`` or ``QlibDataHandler`` such as `instruments`, `start_time`, `end_time` and `segments`, etc.,  and generate new data according to the states (data is not state and should not be saved on the disk).

A more detailed example is in this `link <https://github.com/microsoft/qlib/tree/main/examples/highfreq>`_.


API
===
Please refer to `Serializable API <../reference/api.html#module-qlib.utils.serial.Serializable>`_.

```

## /docs/advanced/server.rst

```rst path="/docs/advanced/server.rst" 
.. _server:

=============================
``Online`` & ``Offline`` mode
=============================
.. currentmodule:: qlib


Introduction
============

``Qlib`` supports ``Online`` mode and ``Offline`` mode. Only the ``Offline`` mode is introduced in this document.

The ``Online`` mode is designed to solve the following problems:

- Manage the data in a centralized way. Users don't have to manage data of different versions.
- Reduce the amount of cache to be generated.
- Make the data can be accessed in a remote way.

Qlib-Server
===========

``Qlib-Server`` is the assorted server system for ``Qlib``, which utilizes ``Qlib`` for basic calculations and provides extensive server system and cache mechanism. With QLibServer, the data provided for ``Qlib`` can be managed in a centralized manner. With ``Qlib-Server``, users can use ``Qlib`` in ``Online`` mode.



Reference
=========
If users are interested in ``Qlib-Server`` and ``Online`` mode, please refer to `Qlib-Server Project <https://github.com/microsoft/qlib-server>`_ and `Qlib-Server Document <https://qlib-server.readthedocs.io/en/latest/>`_.

```

## /docs/advanced/task_management.rst

```rst path="/docs/advanced/task_management.rst" 
.. _task_management:

===============
Task Management
===============
.. currentmodule:: qlib


Introduction
============

The `Workflow <../component/introduction.html>`_ part introduces how to run research workflow in a loosely-coupled way. But it can only execute one ``task`` when you use ``qrun``.
To automatically generate and execute different tasks, ``Task Management`` provides a whole process including `Task Generating`_, `Task Storing`_, `Task Training`_ and `Task Collecting`_. 
With this module, users can run their ``task`` automatically at different periods, in different losses, or even by different models.The processes of task generation, model training and combine and collect data are shown in the following figure.

.. image:: ../_static/img/Task-Gen-Recorder-Collector.svg
    :align: center

This whole process can be used in `Online Serving <../component/online.html>`_.

An example of the entire process is shown `here <https://github.com/microsoft/qlib/tree/main/examples/model_rolling/task_manager_rolling.py>`__.

Task Generating
===============
A ``task`` consists of `Model`, `Dataset`, `Record`, or anything added by users. 
The specific task template can be viewed in 
`Task Section <../component/workflow.html#task-section>`_.
Even though the task template is fixed, users can customize their ``TaskGen`` to generate different ``task`` by task template.

Here is the base class of ``TaskGen``:

.. autoclass:: qlib.workflow.task.gen.TaskGen
    :members:
    :noindex:

``Qlib`` provides a class `RollingGen <https://github.com/microsoft/qlib/tree/main/qlib/workflow/task/gen.py>`_ to generate a list of ``task`` of the dataset in different date segments.
This class allows users to verify the effect of data from different periods on the model in one experiment. More information is `here <../reference/api.html#TaskGen>`__.

Task Storing
============
To achieve higher efficiency and the possibility of cluster operation, ``Task Manager`` will store all tasks in `MongoDB <https://www.mongodb.com/>`_.
``TaskManager`` can fetch undone tasks automatically and manage the lifecycle of a set of tasks with error handling.
Users **MUST** finish the configuration of `MongoDB <https://www.mongodb.com/>`_ when using this module.

Users need to provide the MongoDB URL and database name for using ``TaskManager`` in `initialization <../start/initialization.html#Parameters>`_ or make a statement like this.

    .. code-block:: python

        from qlib.config import C
        C["mongo"] = {
            "task_url" : "mongodb://localhost:27017/", # your MongoDB url
            "task_db_name" : "rolling_db" # database name
        }

.. autoclass:: qlib.workflow.task.manage.TaskManager
    :members:
    :noindex:

More information of ``Task Manager`` can be found in `here <../reference/api.html#TaskManager>`__.

Task Training
=============
After generating and storing those ``task``, it's time to run the ``task`` which is in the *WAITING* status.
``Qlib`` provides a method called ``run_task`` to run those ``task`` in task pool, however, users can also customize how tasks are executed.
An easy way to get the ``task_func`` is using ``qlib.model.trainer.task_train`` directly.
It will run the whole workflow defined by ``task``, which includes *Model*, *Dataset*, *Record*.

.. autofunction:: qlib.workflow.task.manage.run_task
    :noindex:

Meanwhile, ``Qlib`` provides a module called ``Trainer``. 

.. autoclass:: qlib.model.trainer.Trainer
    :members:
    :noindex:

``Trainer`` will train a list of tasks and return a list of model recorders.
``Qlib`` offer two kinds of Trainer, TrainerR is the simplest way and TrainerRM is based on TaskManager to help manager tasks lifecycle automatically. 
If you do not want to use ``Task Manager`` to manage tasks, then use TrainerR to train a list of tasks generated by ``TaskGen`` is enough.
`Here <../reference/api.html#Trainer>`_ are the details about different ``Trainer``.

Task Collecting
===============
Before collecting model training results, you need to use the ``qlib.init`` to specify the path of mlruns.

To collect the results of ``task`` after training, ``Qlib`` provides `Collector <../reference/api.html#Collector>`_, `Group <../reference/api.html#Group>`_ and `Ensemble <../reference/api.html#Ensemble>`_ to collect the results in a readable, expandable and loosely-coupled way.

`Collector <../reference/api.html#Collector>`_ can collect objects from everywhere and process them such as merging, grouping, averaging and so on. It has 2 step action including ``collect`` (collect anything in a dict) and ``process_collect`` (process collected dict).

`Group <../reference/api.html#Group>`_ also has 2 steps including ``group`` (can group a set of object based on `group_func` and change them to a dict) and ``reduce`` (can make a dict become an ensemble based on some rule).
For example: {(A,B,C1): object, (A,B,C2): object} ---``group``---> {(A,B): {C1: object, C2: object}} ---``reduce``---> {(A,B): object}

`Ensemble <../reference/api.html#Ensemble>`_ can merge the objects in an ensemble. 
For example: {C1: object, C2: object} ---``Ensemble``---> object.
You can set the ensembles you want in the ``Collector``'s process_list.
Common ensembles include ``AverageEnsemble`` and ``RollingEnsemble``. Average ensemble is used to ensemble the results of different models in the same time period. Rollingensemble is used to ensemble the results of different models in the same time period

So the hierarchy is ``Collector``'s second step corresponds to ``Group``. And ``Group``'s second step correspond to ``Ensemble``.

For more information, please see `Collector <../reference/api.html#Collector>`_, `Group <../reference/api.html#Group>`_ and `Ensemble <../reference/api.html#Ensemble>`_, or the `example <https://github.com/microsoft/qlib/tree/main/examples/model_rolling/task_manager_rolling.py>`_.

```

## /docs/changelog/changelog.rst

```rst path="/docs/changelog/changelog.rst" 
.. include:: ../../CHANGES.rst

```

## /docs/component/data.rst

```rst path="/docs/component/data.rst" 
.. _data:

==================================
Data Layer: Data Framework & Usage
==================================

Introduction
============

``Data Layer`` provides user-friendly APIs to manage and retrieve data. It provides high-performance data infrastructure.

It is designed for quantitative investment. For example, users could build formulaic alphas with ``Data Layer`` easily. Please refer to `Building Formulaic Alphas <../advanced/alpha.html>`_ for more details.

The introduction of ``Data Layer`` includes the following parts.

- Data Preparation
- Data API
- Data Loader
- Data Handler
- Dataset
- Cache
- Data and Cache File Structure

Here is a typical example of Qlib data workflow

- Users download data and converting data into Qlib format(with filename suffix `.bin`).  In this step, typically only some basic data are stored on disk(such as OHLCV).
- Creating some basic features based on Qlib's expression Engine(e.g. "Ref($close, 60) / $close", the return of last 60 trading days). Supported operators in the expression engine can be found `here <https://github.com/microsoft/qlib/blob/main/qlib/data/ops.py>`__. This step is typically implemented in Qlib's `Data Loader <https://qlib.readthedocs.io/en/latest/component/data.html#data-loader>`_ which is a component of `Data Handler <https://qlib.readthedocs.io/en/latest/component/data.html#data-handler>`_ .
- If users require more complicated data processing (e.g. data normalization),  `Data Handler <https://qlib.readthedocs.io/en/latest/component/data.html#data-handler>`_ support user-customized processors to process data(some predefined processors can be found `here <https://github.com/microsoft/qlib/blob/main/qlib/data/dataset/processor.py>`__).  The processors are different from operators in expression engine. It is designed for some complicated data processing methods which is hard to supported in operators in expression engine.
- At last, `Dataset <https://qlib.readthedocs.io/en/latest/component/data.html#dataset>`_ is responsible to prepare model-specific dataset from the processed data of Data Handler

Data Preparation
================

Qlib Format Data
----------------

We've specially designed a data structure to manage financial data, please refer to the `File storage design section in Qlib paper <https://arxiv.org/abs/2009.11189>`_ for detailed information.
Such data will be stored with filename suffix `.bin` (We'll call them `.bin` file, `.bin` format, or qlib format). `.bin` file is designed for scientific computing on finance data.

``Qlib`` provides two different off-the-shelf datasets, which can be accessed through this `link <https://github.com/microsoft/qlib/blob/main/qlib/contrib/data/handler.py>`__:

========================  =================  ================
Dataset                   US Market          China Market
========================  =================  ================
Alpha360                  √                  √

Alpha158                  √                  √
========================  =================  ================

Also, ``Qlib`` provides a high-frequency dataset. Users can run a high-frequency dataset example through this `link <https://github.com/microsoft/qlib/tree/main/examples/highfreq>`__.

Qlib Format Dataset
-------------------
``Qlib`` has provided an off-the-shelf dataset in `.bin` format, users could use the script ``scripts/get_data.py`` to download the China-Stock dataset as follows. User can also use numpy to load `.bin` file to validate data.
The price volume data look different from the actual dealing price because of they are **adjusted** (`adjusted price <https://www.investopedia.com/terms/a/adjusted_closing_price.asp>`_).  And then you may find that the adjusted price may be different from different data sources. This is because different data sources may vary in the way of adjusting prices. Qlib normalize the price on first trading day of each stock to 1 when adjusting them.
Users can leverage `$factor` to get the original trading price (e.g. `$close / $factor` to get the original close price).

Here are some discussions about the price adjusting of Qlib. 

- https://github.com/microsoft/qlib/issues/991#issuecomment-1075252402


.. code-block:: bash

    # download 1d
    python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn

    # download 1min
    python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/qlib_cn_1min --region cn --interval 1min

In addition to China-Stock data, ``Qlib`` also includes a US-Stock dataset, which can be downloaded with the following command:

.. code-block:: bash

    python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/us_data --region us

After running the above command, users can find china-stock and us-stock data in ``Qlib`` format in the ``~/.qlib/qlib_data/cn_data`` directory and ``~/.qlib/qlib_data/us_data`` directory respectively.

``Qlib`` also provides the scripts in ``scripts/data_collector`` to help users crawl the latest data on the Internet and convert it to qlib format.

When ``Qlib`` is initialized with this dataset, users could build and evaluate their own models with it.  Please refer to `Initialization <../start/initialization.html>`_ for more details.

Automatic update of daily frequency data
----------------------------------------

  **It is recommended that users update the data manually once (\-\-trading_date 2021-05-25) and then set it to update automatically.**

  For more information refer to: `yahoo collector <https://github.com/microsoft/qlib/tree/main/scripts/data_collector/yahoo#Automatic-update-of-daily-frequency-data>`_

  - Automatic update of data to the "qlib" directory each trading day(Linux)
      - use *crontab*: `crontab -e`
      - set up timed tasks:

        .. code-block:: bash

            * * * * 1-5 python <script path> update_data_to_bin --qlib_data_1d_dir <user data dir>

        - **script path**: *scripts/data_collector/yahoo/collector.py*

  - Manual update of data

      .. code-block:: bash

        python scripts/data_collector/yahoo/collector.py update_data_to_bin --qlib_data_1d_dir <user data dir> --trading_date <start date> --end_date <end date>

      - *trading_date*: start of trading day
      - *end_date*: end of trading day(not included)



Converting CSV and Parquet Format into Qlib Format
--------------------------------------------------

``Qlib`` has provided the script ``scripts/dump_bin.py`` to convert **any** data in CSV or Parquet format into `.bin` files (``Qlib`` format) as long as they are in the correct format.

Besides downloading the prepared demo data, users could download demo data directly from the Collector as follows for reference to the CSV format.
Here are some example:

for daily data:
  .. code-block:: bash

    python scripts/get_data.py download_data --file_name csv_data_cn.zip --target_dir ~/.qlib/csv_data/cn_data

for 1min data:
  .. code-block:: bash

    python scripts/data_collector/yahoo/collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1min --region CN --start 2021-05-20 --end 2021-05-23 --delay 0.1 --interval 1min --limit_nums 10

Users can also provide their own data in CSV or Parquet format. However, the data **must satisfies** following criterions:

- CSV or Parquet file is named after a specific stock *or* the CSV or Parquet file includes a column of the stock name

    - Name the CSV or Parquet file after a stock: `SH600000.csv`, `AAPL.csv` or `SH600000.parquet`, `AAPL.parquet` (not case sensitive).

    - CSV or Parquet file includes a column of the stock name. User **must** specify the column name when dumping the data. Here is an example:

        .. code-block:: bash

            python scripts/dump_bin.py dump_all ... --symbol_field_name symbol --file_suffix <.csv or .parquet>

        where the data are in the following format:

            +-----------+-------+
            | symbol    | close |
            +===========+=======+
            | SH600000  | 120   |
            +-----------+-------+

- CSV or Parquet file **must** include a column for the date, and when dumping the data, user must specify the date column name. Here is an example:

    .. code-block:: bash

        python scripts/dump_bin.py dump_all ... --date_field_name date --file_suffix <.csv or .parquet>

    where the data are in the following format:

        +---------+------------+-------+------+----------+
        | symbol  | date       | close | open | volume   |
        +=========+============+=======+======+==========+
        | SH600000| 2020-11-01 | 120   | 121  | 12300000 |
        +---------+------------+-------+------+----------+
        | SH600000| 2020-11-02 | 123   | 120  | 12300000 |
        +---------+------------+-------+------+----------+


Supposed that users prepare their CSV or Parquet format data in the directory ``~/.qlib/my_data``, they can run the following command to start the conversion.

.. code-block:: bash

    python scripts/dump_bin.py dump_all --data_path  ~/.qlib/my_data --qlib_dir ~/.qlib/qlib_data/ --include_fields open,close,high,low,volume,factor --file_suffix <.csv or .parquet>

For other supported parameters when dumping the data into `.bin` file, users can refer to the information by running the following commands:

.. code-block:: bash

    python scripts/dump_bin.py dump_all --help

After conversion, users can find their Qlib format data in the directory `~/.qlib/qlib_data/`.

.. note::

    The arguments of `--include_fields` should correspond with the column names of CSV or Parquet files. The columns names of dataset provided by ``Qlib`` should include open, close, high, low, volume and factor at least.

    - `open`
        The adjusted opening price
    - `close`
        The adjusted closing price
    - `high`
        The adjusted highest price
    - `low`
        The adjusted lowest price
    - `volume`
        The adjusted trading volume
    - `factor`
        The Restoration factor. Normally, ``factor = adjusted_price / original_price``, `adjusted price` reference: `split adjusted <https://www.investopedia.com/terms/s/splitadjusted.asp>`_

    In the convention of `Qlib` data processing, `open, close, high, low, volume, money and factor` will be set to NaN if the stock is suspended.
    If you want to use your own alpha-factor which can't be calculate by OCHLV, like PE, EPS and so on, you could add it to the CSV or Parquet files with OHCLV together and then dump it to the Qlib format data.

Checking the health of the data
-------------------------------

``Qlib`` provides a script to check the health of the data.

- The main points to check are as follows

    - Check if any data is missing in the DataFrame.

    - Check if there are any large step changes above the threshold in the OHLCV columns.

    - Check if any of the required columns (OLHCV) are missing in the DataFrame.

    - Check if the 'factor' column is missing in the DataFrame.

- You can run the following commands to check whether the data is healthy or not.

    for daily data:
        .. code-block:: bash

            python scripts/check_data_health.py check_data --qlib_dir ~/.qlib/qlib_data/cn_data

    for 1min data:
        .. code-block:: bash

            python scripts/check_data_health.py check_data --qlib_dir ~/.qlib/qlib_data/cn_data_1min --freq 1min

- Of course, you can also add some parameters to adjust the test results.

    - The available parameters are these.

        - freq: Frequency of data.

        - large_step_threshold_price: Maximum permitted price change

        - large_step_threshold_volume: Maximum permitted volume change.

        - missing_data_num: Maximum value for which data is allowed to be null.

- You can run the following commands to check whether the data is healthy or not.

    for daily data:
        .. code-block:: bash

            python scripts/check_data_health.py check_data --qlib_dir ~/.qlib/qlib_data/cn_data --missing_data_num 30055 --large_step_threshold_volume 94485 --large_step_threshold_price 20

    for 1min data:
        .. code-block:: bash

            python scripts/check_data_health.py check_data --qlib_dir ~/.qlib/qlib_data/cn_data --freq 1min --missing_data_num 35806 --large_step_threshold_volume 3205452000000 --large_step_threshold_price 0.91

Stock Pool (Market)
-------------------

``Qlib`` defines `stock pool <https://github.com/microsoft/qlib/blob/main/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml#L4>`_ as stock list and their date ranges. Predefined stock pools (e.g. csi300) may be imported as follows.

.. code-block:: bash

    python collector.py --index_name CSI300 --qlib_dir <user qlib data dir> --method parse_instruments


Multiple Stock Modes
--------------------

``Qlib`` now provides two different stock modes for users: China-Stock Mode & US-Stock Mode. Here are some different settings of these two modes:

==============  =================  ================
Region          Trade Unit         Limit Threshold
==============  =================  ================
China           100                0.099

US              1                  None
==============  =================  ================

The `trade unit` defines the unit number of stocks can be used in a trade, and the `limit threshold` defines the bound set to the percentage of ups and downs of a stock.

- If users use ``Qlib`` in china-stock mode, china-stock data is required. Users can use ``Qlib`` in china-stock mode according to the following steps:
    - Download china-stock in qlib format, please refer to section `Qlib Format Dataset <#qlib-format-dataset>`_.
    - Initialize ``Qlib`` in china-stock mode
        Supposed that users download their Qlib format data in the directory ``~/.qlib/qlib_data/cn_data``. Users only need to initialize ``Qlib`` as follows.

        .. code-block:: python

            from qlib.constant import REG_CN
            qlib.init(provider_uri='~/.qlib/qlib_data/cn_data', region=REG_CN)


- If users use ``Qlib`` in US-stock mode, US-stock data is required. ``Qlib`` also provides a script to download US-stock data. Users can use ``Qlib`` in US-stock mode according to the following steps:
    - Download us-stock in qlib format, please refer to section `Qlib Format Dataset <#qlib-format-dataset>`_.
    - Initialize ``Qlib`` in US-stock mode
        Supposed that users prepare their Qlib format data in the directory ``~/.qlib/qlib_data/us_data``. Users only need to initialize ``Qlib`` as follows.

        .. code-block:: python

            from qlib.config import REG_US
            qlib.init(provider_uri='~/.qlib/qlib_data/us_data', region=REG_US)


.. note::

    PRs for new data source are highly welcome! Users could commit the code to crawl data as a PR like `the examples here  <https://github.com/microsoft/qlib/tree/main/scripts>`_. And then we will use the code to create data cache on our server which other users could use directly.


Data API
========

Data Retrieval
--------------
Users can use APIs in ``qlib.data`` to retrieve data, please refer to `Data Retrieval <../start/getdata.html>`_.

Feature
-------

``Qlib`` provides `Feature` and `ExpressionOps` to fetch the features according to users' needs.

- `Feature`
    Load data from the data provider. User can get the features like `$high`, `$low`, `$open`, `$close`, .etc, which should correspond with the arguments of `--include_fields`, please refer to section `Converting CSV Format into Qlib Format <#converting-csv-format-into-qlib-format>`_.

- `ExpressionOps`
    `ExpressionOps` will use operator for feature construction.
    To know more about  ``Operator``, please refer to `Operator API <../reference/api.html#module-qlib.data.ops>`_.
    Also, ``Qlib`` supports users to define their own custom ``Operator``, an example has been given in ``tests/test_register_ops.py``.

To know more about  ``Feature``, please refer to `Feature API <../reference/api.html#module-qlib.data.base>`_.

Filter
------
``Qlib`` provides `NameDFilter` and `ExpressionDFilter` to filter the instruments according to users' needs.

- `NameDFilter`
    Name dynamic instrument filter. Filter the instruments based on a regulated name format. A name rule regular expression is required.

- `ExpressionDFilter`
    Expression dynamic instrument filter. Filter the instruments based on a certain expression. An expression rule indicating a certain feature field is required.

    - `basic features filter`: rule_expression = '$close/$open>5'
    - `cross-sectional features filter` \: rule_expression = '$rank($close)<10'
    - `time-sequence features filter`: rule_expression = '$Ref($close, 3)>100'

Here is a simple example showing how to use filter in a basic ``Qlib`` workflow configuration file:

.. code-block:: yaml

    filter: &filter
        filter_type: ExpressionDFilter
        rule_expression: "Ref($close, -2) / Ref($close, -1) > 1"
        filter_start_time: 2010-01-01
        filter_end_time: 2010-01-07
        keep: False

    data_handler_config: &data_handler_config
        start_time: 2010-01-01
        end_time: 2021-01-22
        fit_start_time: 2010-01-01
        fit_end_time: 2015-12-31
        instruments: *market
        filter_pipe: [*filter]

To know more about ``Filter``, please refer to `Filter API <../reference/api.html#module-qlib.data.filter>`_.

Reference
---------

To know more about ``Data API``, please refer to `Data API <../reference/api.html#data>`_.


Data Loader
===========

``Data Loader`` in ``Qlib`` is designed to load raw data from the original data source. It will be loaded and used in the ``Data Handler`` module.

QlibDataLoader
--------------

The ``QlibDataLoader`` class in ``Qlib`` is such an interface that allows users to load raw data from the ``Qlib`` data source.

StaticDataLoader
----------------

The ``StaticDataLoader`` class in ``Qlib`` is such an interface that allows users to load raw data from file or as provided.


Interface
---------

Here are some interfaces of the ``QlibDataLoader`` class:

.. autoclass:: qlib.data.dataset.loader.DataLoader
    :members:
    :noindex:

API
---

To know more about ``Data Loader``, please refer to `Data Loader API <../reference/api.html#module-qlib.data.dataset.loader>`_.


Data Handler
============

The ``Data Handler`` module in ``Qlib`` is designed to handler those common data processing methods which will be used by most of the models.

Users can use ``Data Handler`` in an automatic workflow by ``qrun``, refer to `Workflow: Workflow Management <workflow.html>`_ for more details.

DataHandlerLP
-------------

In addition to use ``Data Handler`` in an automatic workflow with ``qrun``, ``Data Handler`` can be used as an independent module, by which users can easily preprocess data (standardization, remove NaN, etc.) and build datasets.

In order to achieve so, ``Qlib`` provides a base class `qlib.data.dataset.DataHandlerLP <../reference/api.html#qlib.data.dataset.handler.DataHandlerLP>`_. The core idea of this class is that: we will have some learnable ``Processors`` which can learn the parameters of data processing(e.g., parameters for zscore normalization). When new data comes in, these `trained` ``Processors`` can then process the new data and thus processing real-time data in an efficient way becomes possible. More information about ``Processors`` will be listed in the next subsection.


Interface
---------

Here are some important interfaces that ``DataHandlerLP`` provides:

.. autoclass:: qlib.data.dataset.handler.DataHandlerLP
    :members: __init__, fetch, get_cols
    :noindex:


If users want to load features and labels by config, users can define a new handler and call the static method `parse_config_to_fields` of ``qlib.contrib.data.handler.Alpha158``.

Also, users can pass ``qlib.contrib.data.processor.ConfigSectionProcessor`` that provides some preprocess methods for features defined by config into the new handler.


Processor
---------

The ``Processor`` module in ``Qlib`` is designed to be learnable and it is responsible for handling data processing such as `normalization` and `drop none/nan features/labels`.

``Qlib`` provides the following ``Processors``:

- ``DropnaProcessor``: `processor` that drops N/A features.
- ``DropnaLabel``: `processor` that drops N/A labels.
- ``TanhProcess``: `processor` that uses `tanh` to process noise data.
- ``ProcessInf``: `processor` that handles infinity values, it will be replaces by the mean of the column.
- ``Fillna``: `processor` that handles N/A values, which will fill the N/A value by 0 or other given number.
- ``MinMaxNorm``: `processor` that applies min-max normalization.
- ``ZscoreNorm``: `processor` that applies z-score normalization.
- ``RobustZScoreNorm``: `processor` that applies robust z-score normalization.
- ``CSZScoreNorm``: `processor` that applies cross sectional z-score normalization.
- ``CSRankNorm``: `processor` that applies cross sectional rank normalization.
- ``CSZFillna``: `processor` that fills N/A values in a cross sectional way by the mean of the column.

Users can also create their own `processor` by inheriting the base class of ``Processor``. Please refer to the implementation of all the processors for more information (`Processor Link <https://github.com/microsoft/qlib/blob/main/qlib/data/dataset/processor.py>`_).

To know more about ``Processor``, please refer to `Processor API <../reference/api.html#module-qlib.data.dataset.processor>`_.

Example
-------

``Data Handler`` can be run with ``qrun`` by modifying the configuration file, and can also be used as a single module.

Know more about how to run ``Data Handler`` with ``qrun``, please refer to `Workflow: Workflow Management <workflow.html>`_

Qlib provides implemented data handler `Alpha158`. The following example shows how to run `Alpha158` as a single module.

.. note:: Users need to initialize ``Qlib`` with `qlib.init` first, please refer to `initialization <../start/initialization.html>`_.

.. code-block:: Python

    import qlib
    from qlib.contrib.data.handler import Alpha158

    data_handler_config = {
        "start_time": "2008-01-01",
        "end_time": "2020-08-01",
        "fit_start_time": "2008-01-01",
        "fit_end_time": "2014-12-31",
        "instruments": "csi300",
    }

    if __name__ == "__main__":
        qlib.init()
        h = Alpha158(**data_handler_config)

        # get all the columns of the data
        print(h.get_cols())

        # fetch all the labels
        print(h.fetch(col_set="label"))

        # fetch all the features
        print(h.fetch(col_set="feature"))


.. note:: In the ``Alpha158``, ``Qlib`` uses the label `Ref($close, -2)/Ref($close, -1) - 1` that means the change from T+1 to T+2, rather than `Ref($close, -1)/$close - 1`, of which the reason is that when getting the T day close price of a china stock, the stock can be bought on T+1 day and sold on T+2 day.

API
---

To know more about ``Data Handler``, please refer to `Data Handler API <../reference/api.html#module-qlib.data.dataset.handler>`_.


Dataset
=======

The ``Dataset`` module in ``Qlib`` aims to prepare data for model training and inferencing.

The motivation of this module is that we want to maximize the flexibility of different models to handle data that are suitable for themselves. This module gives the model the flexibility to process their data in an unique way. For instance, models such as ``GBDT`` may work well on data that contains `nan` or `None` value, while neural networks such as ``MLP`` will break down on such data.

If user's model need process its data in a different way, user could implement his own ``Dataset`` class. If the model's
data processing is not special, ``DatasetH`` can be used directly.

The ``DatasetH`` class is the `dataset` with `Data Handler`. Here is the most important interface of the class:

.. autoclass:: qlib.data.dataset.__init__.DatasetH
    :members:
    :noindex:

API
---

To know more about ``Dataset``, please refer to `Dataset API <../reference/api.html#dataset>`_.


Cache
=====

``Cache`` is an optional module that helps accelerate providing data by saving some frequently-used data as cache file. ``Qlib`` provides a `Memcache` class to cache the most-frequently-used data in memory, an inheritable `ExpressionCache` class, and an inheritable `DatasetCache` class.

Global Memory Cache
-------------------

`Memcache` is a global memory cache mechanism that composes of three `MemCacheUnit` instances to cache **Calendar**, **Instruments**, and **Features**. The `MemCache` is defined globally in `cache.py` as `H`. Users can use `H['c'], H['i'], H['f']` to get/set `memcache`.

.. autoclass:: qlib.data.cache.MemCacheUnit
    :members:
    :noindex:

.. autoclass:: qlib.data.cache.MemCache
    :members:
    :noindex:


ExpressionCache
---------------

`ExpressionCache` is a cache mechanism that saves expressions such as **Mean($close, 5)**. Users can inherit this base class to define their own cache mechanism that saves expressions according to the following steps.

- Override `self._uri` method to define how the cache file path is generated
- Override `self._expression` method to define what data will be cached and how to cache it.

The following shows the details about the interfaces:

.. autoclass:: qlib.data.cache.ExpressionCache
    :members:
    :noindex:

``Qlib`` has currently provided implemented disk cache `DiskExpressionCache` which inherits from `ExpressionCache` . The expressions data will be stored in the disk.

DatasetCache
------------

`DatasetCache` is a cache mechanism that saves datasets. A certain dataset is regulated by a stock pool configuration (or a series of instruments, though not recommended), a list of expressions or static feature fields, the start time, and end time for the collected features and the frequency. Users can inherit this base class to define their own cache mechanism that saves datasets according to the following steps.

- Override `self._uri` method to define how their cache file path is generated
- Override `self._expression` method to define what data will be cached and how to cache it.

The following shows the details about the interfaces:

.. autoclass:: qlib.data.cache.DatasetCache
    :members:
    :noindex:

``Qlib`` has currently provided implemented disk cache `DiskDatasetCache` which inherits from `DatasetCache` . The datasets' data will be stored in the disk.



Data and Cache File Structure
=============================

We've specially designed a file structure to manage data and cache, please refer to the `File storage design section in Qlib paper <https://arxiv.org/abs/2009.11189>`_ for detailed information. The file structure of data and cache is listed as follows.

.. code-block::

    - data/
        [raw data] updated by data providers
        - calendars/
            - day.txt
        - instruments/
            - all.txt
            - csi500.txt
            - ...
        - features/
            - sh600000/
                - open.day.bin
                - close.day.bin
                - ...
            - ...
        [cached data] updated when raw data is updated
        - calculated features/
            - sh600000/
                - [hash(instrtument, field_expression, freq)]
                    - all-time expression -cache data file
                    - .meta : an assorted meta file recording the instrument name, field name, freq, and visit times
            - ...
        - cache/
            - [hash(stockpool_config, field_expression_list, freq)]
                - all-time Dataset-cache data file
                - .meta : an assorted meta file recording the stockpool config, field names and visit times
                - .index : an assorted index file recording the line index of all calendars
            - ...

```

## /docs/component/highfreq.rst

```rst path="/docs/component/highfreq.rst" 
.. _highfreq:

========================================================================
Design of Nested Decision Execution Framework for High-Frequency Trading
========================================================================
.. currentmodule:: qlib

Introduction
============

Daily trading (e.g. portfolio management) and intraday trading (e.g. orders execution) are two hot topics in Quant investment and are usually studied separately.

To get the join trading performance of daily and intraday trading, they must interact with each other and run backtest jointly.
In order to support the joint backtest strategies at multiple levels, a corresponding framework is required. None of the publicly available high-frequency trading frameworks considers multi-level joint trading, which makes the backtesting aforementioned inaccurate.

Besides backtesting, the optimization of strategies from different levels is not standalone and can be affected by each other.
For example, the best portfolio management strategy may change with the performance of order executions(e.g. a portfolio with higher turnover may become a better choice when we improve the order execution strategies).
To achieve overall good performance, it is necessary to consider the interaction of strategies at a different levels.

Therefore, building a new framework for trading on multiple levels becomes necessary to solve the various problems mentioned above, for which we designed a nested decision execution framework that considers the interaction of strategies.

.. image:: ../_static/img/framework.svg

The design of the framework is shown in the yellow part in the middle of the figure above. Each level consists of ``Trading Agent`` and ``Execution Env``. ``Trading Agent`` has its own data processing module (``Information Extractor``), forecasting module (``Forecast Model``) and decision generator (``Decision Generator``). The trading algorithm generates the decisions by the ``Decision Generator`` based on the forecast signals output by the ``Forecast Module``, and the decisions generated by the trading algorithm are passed to the ``Execution Env``, which returns the execution results.

The frequency of the trading algorithm, decision content and execution environment can be customized by users (e.g. intraday trading, daily-frequency trading, weekly-frequency trading), and the execution environment can be nested with finer-grained trading algorithm and execution environment inside (i.e. sub-workflow in the figure, e.g. daily-frequency orders can be turned into finer-grained decisions by splitting orders within the day). The flexibility of the nested decision execution framework makes it easy for users to explore the effects of combining different levels of trading strategies and break down the optimization barriers between different levels of the trading algorithm.

The optimization for the nested decision execution framework can be implemented with the support of `QlibRL <./rl/overall.html>`_. To know more about how to use the QlibRL, go to API Reference: `RL API <../reference/api.html#rl>`_. 

Example
=======

An example of a nested decision execution framework for high-frequency can be found `here <https://github.com/microsoft/qlib/blob/main/examples/nested_decision_execution/workflow.py>`_.


Besides, the above examples, here are some other related works about high-frequency trading in Qlib.

- `Prediction with high-frequency data <https://github.com/microsoft/qlib/tree/main/examples/highfreq#benchmarks-performance-predicting-the-price-trend-in-high-frequency-data>`_
- `Examples <https://github.com/microsoft/qlib/blob/main/examples/orderbook_data/>`_ to extract features from high-frequency data without fixed frequency.
- `A paper <https://github.com/microsoft/qlib/tree/high-freq-execution#high-frequency-execution>`_ for high-frequency trading.

```

## /docs/component/meta.rst

```rst path="/docs/component/meta.rst" 
.. _meta:

======================================================
Meta Controller: Meta-Task & Meta-Dataset & Meta-Model
======================================================
.. currentmodule:: qlib


Introduction
============
``Meta Controller`` provides guidance to ``Forecast Model``, which aims to learn regular patterns among a series of forecasting tasks and use learned patterns to guide forthcoming forecasting tasks. Users can implement their own meta-model instance based on ``Meta Controller`` module.

Meta Task
=========

A `Meta Task` instance is the basic element in the meta-learning framework. It saves the data that can be used for the `Meta Model`. Multiple `Meta Task` instances may share the same `Data Handler`, controlled by `Meta Dataset`. Users should use `prepare_task_data()` to obtain the data that can be directly fed into the `Meta Model`.

.. autoclass:: qlib.model.meta.task.MetaTask
    :members:

Meta Dataset
============

`Meta Dataset` controls the meta-information generating process. It is on the duty of providing data for training the `Meta Model`. Users should use `prepare_tasks` to retrieve a list of `Meta Task` instances.

.. autoclass:: qlib.model.meta.dataset.MetaTaskDataset
    :members:

Meta Model
==========

General Meta Model
------------------
`Meta Model` instance is the part that controls the workflow. The usage of the `Meta Model` includes:
1. Users train their `Meta Model` with the `fit` function.
2. The `Meta Model` instance guides the workflow by giving useful information via the `inference` function.

.. autoclass:: qlib.model.meta.model.MetaModel
    :members:

Meta Task Model
---------------
This type of meta-model may interact with task definitions directly. Then, the `Meta Task Model` is the class for them to inherit from. They guide the base tasks by modifying the base task definitions. The function `prepare_tasks` can be used to obtain the modified base task definitions.

.. autoclass:: qlib.model.meta.model.MetaTaskModel
    :members:

Meta Guide Model
----------------
This type of meta-model participates in the training process of the base forecasting model. The meta-model may guide the base forecasting models during their training to improve their performances.

.. autoclass:: qlib.model.meta.model.MetaGuideModel
    :members:


Example
=======
``Qlib`` provides an implementation of ``Meta Model`` module, ``DDG-DA``,
which adapts to the market dynamics.

``DDG-DA`` includes four steps:

1. Calculate meta-information and encapsulate it into ``Meta Task`` instances. All the meta-tasks form a ``Meta Dataset`` instance.
2. Train ``DDG-DA`` based on the training data of the meta-dataset.
3. Do the inference of the ``DDG-DA`` to get guide information.
4. Apply guide information to the forecasting models to improve their performances.

The `above example <https://github.com/microsoft/qlib/tree/main/examples/benchmarks_dynamic/DDG-DA>`_ can be found in ``examples/benchmarks_dynamic/DDG-DA/workflow.py``.

```

## /docs/component/model.rst

```rst path="/docs/component/model.rst" 
.. _model:

===========================================
Forecast Model: Model Training & Prediction
===========================================

Introduction
============

``Forecast Model`` is designed to make the `prediction score` about stocks. Users can use the ``Forecast Model`` in an automatic workflow by ``qrun``, please refer to `Workflow: Workflow Management <workflow.html>`_.

Because the components in ``Qlib`` are designed in a loosely-coupled way, ``Forecast Model`` can be used as an independent module also.

Base Class & Interface
======================

``Qlib`` provides a base class `qlib.model.base.Model <../reference/api.html#module-qlib.model.base>`_ from which all models should inherit.

The base class provides the following interfaces:

.. autoclass:: qlib.model.base.Model
    :members:
    :noindex:

``Qlib`` also provides a base class `qlib.model.base.ModelFT <../reference/api.html#qlib.model.base.ModelFT>`_, which includes the method for finetuning the model.

For other interfaces such as `finetune`, please refer to `Model API <../reference/api.html#module-qlib.model.base>`_.

Example
=======

``Qlib``'s `Model Zoo` includes models such as ``LightGBM``, ``MLP``, ``LSTM``, etc.. These models are treated as the baselines of ``Forecast Model``. The following steps show how to run`` LightGBM`` as an independent module.

- Initialize ``Qlib`` with `qlib.init` first, please refer to `Initialization <../start/initialization.html>`_.
- Run the following code to get the `prediction score` `pred_score`
    .. code-block:: Python

        from qlib.contrib.model.gbdt import LGBModel
        from qlib.contrib.data.handler import Alpha158
        from qlib.utils import init_instance_by_config, flatten_dict
        from qlib.workflow import R
        from qlib.workflow.record_temp import SignalRecord, PortAnaRecord

        market = "csi300"
        benchmark = "SH000300"

        data_handler_config = {
            "start_time": "2008-01-01",
            "end_time": "2020-08-01",
            "fit_start_time": "2008-01-01",
            "fit_end_time": "2014-12-31",
            "instruments": market,
        }

        task = {
            "model": {
                "class": "LGBModel",
                "module_path": "qlib.contrib.model.gbdt",
                "kwargs": {
                    "loss": "mse",
                    "colsample_bytree": 0.8879,
                    "learning_rate": 0.0421,
                    "subsample": 0.8789,
                    "lambda_l1": 205.6999,
                    "lambda_l2": 580.9768,
                    "max_depth": 8,
                    "num_leaves": 210,
                    "num_threads": 20,
                },
            },
            "dataset": {
                "class": "DatasetH",
                "module_path": "qlib.data.dataset",
                "kwargs": {
                    "handler": {
                        "class": "Alpha158",
                        "module_path": "qlib.contrib.data.handler",
                        "kwargs": data_handler_config,
                    },
                    "segments": {
                        "train": ("2008-01-01", "2014-12-31"),
                        "valid": ("2015-01-01", "2016-12-31"),
                        "test": ("2017-01-01", "2020-08-01"),
                    },
                },
            },
        }

        # model initialization
        model = init_instance_by_config(task["model"])
        dataset = init_instance_by_config(task["dataset"])

        # start exp
        with R.start(experiment_name="workflow"):
            # train
            R.log_params(**flatten_dict(task))
            model.fit(dataset)

            # prediction
            recorder = R.get_recorder()
            sr = SignalRecord(model, dataset, recorder)
            sr.generate()

    .. note::

        `Alpha158` is the data handler provided by ``Qlib``, please refer to `Data Handler <data.html#data-handler>`_.
        `SignalRecord` is the `Record Template` in ``Qlib``, please refer to `Workflow <recorder.html#record-template>`_.

Also, the above example has been given in ``examples/train_backtest_analyze.ipynb``.
Technically, the meaning of the model prediction depends on the label setting designed by user.
By default, the meaning of the score is normally the rating of the instruments by the forecasting model. The higher the score, the more profit the instruments.


Custom Model
============

Qlib supports custom models. If users are interested in customizing their own models and integrating the models into ``Qlib``, please refer to `Custom Model Integration <../start/integration.html>`_.


API
===
Please refer to `Model API <../reference/api.html#module-qlib.model.base>`_.

```

## /docs/component/online.rst

```rst path="/docs/component/online.rst" 
.. _online_serving:

==============
Online Serving
==============
.. currentmodule:: qlib


Introduction
============

.. image:: ../_static/img/online_serving.png
    :align: center


In addition to backtesting, one way to test a model is effective is to make predictions in real market conditions or even do real trading based on those predictions.
``Online Serving`` is a set of modules for online models using the latest data,
which including `Online Manager <#Online Manager>`_, `Online Strategy <#Online Strategy>`_, `Online Tool <#Online Tool>`_, `Updater <#Updater>`_.

`Here <https://github.com/microsoft/qlib/tree/main/examples/online_srv>`_ are several examples for reference, which demonstrate different features of ``Online Serving``.
If you have many models or `task` needs to be managed, please consider `Task Management <../advanced/task_management.html>`_.
The `examples <https://github.com/microsoft/qlib/tree/main/examples/online_srv>`_ are based on some components in `Task Management <../advanced/task_management.html>`_ such as ``TrainerRM`` or ``Collector``.

**NOTE**: User should keep his data source updated to support online serving. For example, Qlib provides `a batch of scripts <https://github.com/microsoft/qlib/blob/main/scripts/data_collector/yahoo/README.md#automatic-update-of-daily-frequency-datafrom-yahoo-finance>`_ to help users update Yahoo daily data.

Known limitations currently
- Currently, the daily updating prediction for the next trading day is supported. But generating orders for the next trading day is not supported due to the `limitations of public data <https://github.com/microsoft/qlib/issues/215#issuecomment-766293563>_`


Online Manager
==============

.. automodule:: qlib.workflow.online.manager
    :members:
    :noindex:

Online Strategy
===============

.. automodule:: qlib.workflow.online.strategy
    :members:
    :noindex:

Online Tool
===========

.. automodule:: qlib.workflow.online.utils
    :members:
    :noindex:

Updater
=======

.. automodule:: qlib.workflow.online.update
    :members:
    :noindex:

```

## /docs/component/recorder.rst

```rst path="/docs/component/recorder.rst" 
.. _recorder:

====================================
Qlib Recorder: Experiment Management
====================================
.. currentmodule:: qlib

Introduction
============
``Qlib`` contains an experiment management system named ``QlibRecorder``, which is designed to help users handle experiment and analyse results in an efficient way.

There are three components of the system:

- `ExperimentManager`
    a class that manages experiments.

- `Experiment`
    a class of experiment, and each instance of it is responsible for a single experiment.

- `Recorder`
    a class of recorder, and each instance of it is responsible for a single run.

Here is a general view of the structure of the system:

.. code-block::

    ExperimentManager
        - Experiment 1
            - Recorder 1
            - Recorder 2
            - ...
        - Experiment 2
            - Recorder 1
            - Recorder 2
            - ...
        - ...

This experiment management system defines a set of interface and provided a concrete implementation ``MLflowExpManager``, which is based on the machine learning platform: ``MLFlow`` (`link <https://mlflow.org/>`_).

If users set the implementation of ``ExpManager`` to be ``MLflowExpManager``, they can use the command `mlflow ui` to visualize and check the experiment results. For more information, please refer to the related documents `here <https://www.mlflow.org/docs/latest/cli.html#mlflow-ui>`_.

Qlib Recorder
=============
``QlibRecorder`` provides a high level API for users to use the experiment management system. The interfaces are wrapped in the variable ``R`` in ``Qlib``, and users can directly use ``R`` to interact with the system. The following command shows how to import ``R`` in Python:

.. code-block:: Python

        from qlib.workflow import R

``QlibRecorder`` includes several common API for managing `experiments` and `recorders` within a workflow. For more available APIs, please refer to the following section about `Experiment Manager`, `Experiment` and `Recorder`.

Here are the available interfaces of ``QlibRecorder``:

.. autoclass:: qlib.workflow.__init__.QlibRecorder
    :members:

Experiment Manager
==================

The ``ExpManager`` module in ``Qlib`` is responsible for managing different experiments. Most of the APIs of ``ExpManager`` are similar to ``QlibRecorder``, and the most important API will be the ``get_exp`` method. User can directly refer to the documents above for some detailed information about how to use the ``get_exp`` method.

.. autoclass:: qlib.workflow.expm.ExpManager
    :members: get_exp, list_experiments
    :noindex:

For other interfaces such as `create_exp`, `delete_exp`, please refer to `Experiment Manager API <../reference/api.html#experiment-manager>`_.

Experiment
==========

The ``Experiment`` class is solely responsible for a single experiment, and it will handle any operations that are related to an experiment. Basic methods such as `start`, `end` an experiment are included. Besides, methods related to `recorders` are also available: such methods include `get_recorder` and `list_recorders`.

.. autoclass:: qlib.workflow.exp.Experiment
    :members: get_recorder, list_recorders
    :noindex:

For other interfaces such as `search_records`, `delete_recorder`, please refer to `Experiment API <../reference/api.html#experiment>`_.

``Qlib`` also provides a default ``Experiment``, which will be created and used under certain situations when users use the APIs such as `log_metrics` or `get_exp`. If the default ``Experiment`` is used, there will be related logged information when running ``Qlib``. Users are able to change the name of the default ``Experiment`` in the config file of ``Qlib`` or during ``Qlib``'s `initialization <../start/initialization.html#parameters>`_, which is set to be '`Experiment`'.

Recorder
========

The ``Recorder`` class is responsible for a single recorder. It will handle some detailed operations such as ``log_metrics``, ``log_params`` of a single run. It is designed to help user to easily track results and things being generated during a run.

Here are some important APIs that are not included in the ``QlibRecorder``:

.. autoclass:: qlib.workflow.recorder.Recorder
    :members: list_artifacts, list_metrics, list_params, list_tags
    :noindex:

For other interfaces such as `save_objects`, `load_object`, please refer to `Recorder API <../reference/api.html#recorder>`_.

Record Template
===============

The ``RecordTemp`` class is a class that enables generate experiment results such as IC and backtest in a certain format. We have provided three different `Record Template` class:

- ``SignalRecord``: This class generates the `prediction` results of the model.
- ``SigAnaRecord``: This class generates the `IC`, `ICIR`, `Rank IC` and `Rank ICIR` of the model.

Here is a simple example of what is done in ``SigAnaRecord``, which users can refer to if they want to calculate IC, Rank IC, Long-Short Return with their own prediction and label.

.. code-block:: Python

    from qlib.contrib.eva.alpha import calc_ic, calc_long_short_return

    ic, ric = calc_ic(pred.iloc[:, 0], label.iloc[:, 0])
    long_short_r, long_avg_r = calc_long_short_return(pred.iloc[:, 0], label.iloc[:, 0])

- ``PortAnaRecord``: This class generates the results of `backtest`. The detailed information about `backtest` as well as the available `strategy`, users can refer to `Strategy <../component/strategy.html>`_ and `Backtest <../component/backtest.html>`_.

Here is a simple example of what is done in ``PortAnaRecord``, which users can refer to if they want to do backtest based on their own prediction and label.

.. code-block:: Python

    from qlib.contrib.strategy.strategy import TopkDropoutStrategy
    from qlib.contrib.evaluate import (
        backtest as normal_backtest,
        risk_analysis,
    )

    # backtest
    STRATEGY_CONFIG = {
        "topk": 50,
        "n_drop": 5,
    }
    BACKTEST_CONFIG = {
        "limit_threshold": 0.095,
        "account": 100000000,
        "benchmark": BENCHMARK,
        "deal_price": "close",
        "open_cost": 0.0005,
        "close_cost": 0.0015,
        "min_cost": 5,
    }

    strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
    report_normal, positions_normal = normal_backtest(pred_score, strategy=strategy, **BACKTEST_CONFIG)

    # analysis
    analysis = dict()
    analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
    analysis["excess_return_with_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"] - report_normal["cost"])
    analysis_df = pd.concat(analysis)  # type: pd.DataFrame
    print(analysis_df)

For more information about the APIs, please refer to `Record Template API <../reference/api.html#module-qlib.workflow.record_temp>`_.



Known Limitations
=================
- The Python objects are saved based on pickle, which may results in issues when the environment dumping objects and loading objects are different.

```

## /docs/component/report.rst

```rst path="/docs/component/report.rst" 
.. _report:

=======================================
Analysis: Evaluation & Results Analysis
=======================================

Introduction
============

``Analysis`` is designed to show the graphical reports of ``Intraday Trading`` , which helps users to evaluate and analyse investment portfolios visually. The following are some graphics to view:

- analysis_position
    - report_graph
    - score_ic_graph
    - cumulative_return_graph
    - risk_analysis_graph
    - rank_label_graph

- analysis_model
    - model_performance_graph


All of the accumulated profit metrics(e.g. return, max drawdown) in Qlib are calculated by summation.
This avoids the metrics or the plots being skewed exponentially over time.

Graphical Reports
=================

Users can run the following code to get all supported reports.

.. code-block:: python

    >> import qlib.contrib.report as qcr
    >> print(qcr.GRAPH_NAME_LIST)
    ['analysis_position.report_graph', 'analysis_position.score_ic_graph', 'analysis_position.cumulative_return_graph', 'analysis_position.risk_analysis_graph', 'analysis_position.rank_label_graph', 'analysis_model.model_performance_graph']

.. note::

    For more details, please refer to the function document: similar to ``help(qcr.analysis_position.report_graph)``



Usage & Example
===============

Usage of `analysis_position.report`
-----------------------------------

API
~~~

.. automodule:: qlib.contrib.report.analysis_position.report
    :members:
    :noindex:

Graphical Result
~~~~~~~~~~~~~~~~

.. note::

    - Axis X: Trading day
    - Axis Y:
        - `cum bench`
            Cumulative returns series of benchmark
        - `cum return wo cost`
            Cumulative returns series of portfolio without cost
        - `cum return w cost`
            Cumulative returns series of portfolio with cost
        - `return wo mdd`
            Maximum drawdown series of cumulative return without cost
        - `return w cost mdd`:
            Maximum drawdown series of cumulative return with cost
        - `cum ex return wo cost`
            The `CAR` (cumulative abnormal return) series of the portfolio compared to the benchmark without cost.
        - `cum ex return w cost`
            The `CAR` (cumulative abnormal return) series of the portfolio compared to the benchmark with cost.
        - `turnover`
            Turnover rate series
        - `cum ex return wo cost mdd`
            Drawdown series of `CAR` (cumulative abnormal return) without cost
        - `cum ex return w cost mdd`
            Drawdown series of `CAR` (cumulative abnormal return) with cost
    - The shaded part above: Maximum drawdown corresponding to `cum return wo cost`
    - The shaded part below: Maximum drawdown corresponding to `cum ex return wo cost`

.. image:: ../_static/img/analysis/report.png


Usage of `analysis_position.score_ic`
-------------------------------------

API
~~~

.. automodule:: qlib.contrib.report.analysis_position.score_ic
    :members:
    :noindex:


Graphical Result
~~~~~~~~~~~~~~~~

.. note::

    - Axis X: Trading day
    - Axis Y:
        - `ic`
            The `Pearson correlation coefficient` series between `label` and `prediction score`.
            In the above example, the `label` is formulated as `Ref($close, -2)/Ref($close, -1)-1`. Please refer to `Data Feature <data.html#feature>`_ for more details.

        - `rank_ic`
            The `Spearman's rank correlation coefficient` series between `label` and `prediction score`.

.. image:: ../_static/img/analysis/score_ic.png


.. Usage of `analysis_position.cumulative_return`
.. ----------------------------------------------
..
.. API
.. ~~~~~~~~~~~~~~~~
..
.. .. automodule:: qlib.contrib.report.analysis_position.cumulative_return
..     :members:
..
.. Graphical Result
.. ~~~~~~~~~~~~~~~~~
..
.. .. note::
..
..     - Axis X: Trading day
..     - Axis Y:
..         - Above axis Y: `(((Ref($close, -1)/$close - 1) * weight).sum() / weight.sum()).cumsum()`
..         - Below axis Y: Daily weight sum
..     - In the **sell** graph, `y < 0` stands for profit; in other cases, `y > 0` stands for profit.
..     - In the **buy_minus_sell** graph, the **y** value of the **weight** graph at the bottom is `buy_weight + sell_weight`.
..     - In each graph, the **red line** in the histogram on the right represents the average.
..
.. .. image:: ../_static/img/analysis/cumulative_return_buy.png
..
.. .. image:: ../_static/img/analysis/cumulative_return_sell.png
..
.. .. image:: ../_static/img/analysis/cumulative_return_buy_minus_sell.png
..
.. .. image:: ../_static/img/analysis/cumulative_return_hold.png


Usage of `analysis_position.risk_analysis`
------------------------------------------

API
~~~

.. automodule:: qlib.contrib.report.analysis_position.risk_analysis
    :members:
    :noindex:


Graphical Result
~~~~~~~~~~~~~~~~

.. note::

    - general graphics
        - `std`
            - `excess_return_without_cost`
                The `Standard Deviation` of `CAR` (cumulative abnormal return) without cost.
            - `excess_return_with_cost`
                The `Standard Deviation` of `CAR` (cumulative abnormal return) with cost.
        - `annualized_return`
            - `excess_return_without_cost`
                The `Annualized Rate` of `CAR` (cumulative abnormal return) without cost.
            - `excess_return_with_cost`
                The `Annualized Rate` of `CAR` (cumulative abnormal return) with cost.
        -  `information_ratio`
            - `excess_return_without_cost`
                The `Information Ratio` without cost.
            - `excess_return_with_cost`
                The `Information Ratio` with cost.

            To know more about `Information Ratio`, please refer to `Information Ratio – IR <https://www.investopedia.com/terms/i/informationratio.asp>`_.
        -  `max_drawdown`
            - `excess_return_without_cost`
                The `Maximum Drawdown` of `CAR` (cumulative abnormal return) without cost.
            - `excess_return_with_cost`
                The `Maximum Drawdown` of `CAR` (cumulative abnormal return) with cost.


.. image:: ../_static/img/analysis/risk_analysis_bar.png
    :align: center

.. note::

    - annualized_return/max_drawdown/information_ratio/std graphics
        - Axis X: Trading days grouped by month
        - Axis Y:
            - annualized_return graphics
                - `excess_return_without_cost_annualized_return`
                    The `Annualized Rate` series of monthly `CAR` (cumulative abnormal return) without cost.
                - `excess_return_with_cost_annualized_return`
                    The `Annualized Rate` series of monthly `CAR` (cumulative abnormal return) with cost.
            - max_drawdown graphics
                - `excess_return_without_cost_max_drawdown`
                    The `Maximum Drawdown` series of monthly `CAR` (cumulative abnormal return) without cost.
                - `excess_return_with_cost_max_drawdown`
                    The `Maximum Drawdown` series of monthly `CAR` (cumulative abnormal return) with cost.
            - information_ratio graphics
                - `excess_return_without_cost_information_ratio`
                    The `Information Ratio` series of monthly `CAR` (cumulative abnormal return) without cost.
                - `excess_return_with_cost_information_ratio`
                    The `Information Ratio` series of monthly `CAR` (cumulative abnormal return) with cost.
            - std graphics
                - `excess_return_without_cost_max_drawdown`
                    The `Standard Deviation` series of monthly `CAR` (cumulative abnormal return) without cost.
                - `excess_return_with_cost_max_drawdown`
                    The `Standard Deviation` series of monthly `CAR` (cumulative abnormal return) with cost.


.. image:: ../_static/img/analysis/risk_analysis_annualized_return.png
    :align: center

.. image:: ../_static/img/analysis/risk_analysis_max_drawdown.png
    :align: center

.. image:: ../_static/img/analysis/risk_analysis_information_ratio.png
    :align: center

.. image:: ../_static/img/analysis/risk_analysis_std.png
    :align: center

..
.. Usage of `analysis_position.rank_label`
.. ---------------------------------------
..
.. API
.. ~~~
..
.. .. automodule:: qlib.contrib.report.analysis_position.rank_label
..     :members:
..
..
.. Graphical Result
.. ~~~~~~~~~~~~~~~~
..
.. .. note::
..
..     - hold/sell/buy graphics:
..         - Axis X: Trading day
..         - Axis Y:
..             Average `ranking ratio`of `label` for stocks that is held/sold/bought on the trading day.
..
..             In the above example, the `label` is formulated as `Ref($close, -1)/$close - 1`. The `ranking ratio` can be formulated as follows.
..             .. math::
..
..                 ranking\ ratio = \frac{Ascending\ Ranking\ of\ label}{Number\ of\ Stocks\ in\ the\ Portfolio}
..
.. .. image:: ../_static/img/analysis/rank_label_hold.png
..     :align: center
..
.. .. image:: ../_static/img/analysis/rank_label_buy.png
..     :align: center
..
.. .. image:: ../_static/img/analysis/rank_label_sell.png
..     :align: center
..
..

Usage of `analysis_model.analysis_model_performance`
----------------------------------------------------

API
~~~

.. automodule:: qlib.contrib.report.analysis_model.analysis_model_performance
    :members:
    :noindex:


Graphical Results
~~~~~~~~~~~~~~~~~

.. note::

    - cumulative return graphics
        - `Group1`:
            The `Cumulative Return` series of stocks group with (`ranking ratio` of label <= 20%)
        - `Group2`:
            The `Cumulative Return` series of stocks group with (20% < `ranking ratio` of label <= 40%)
        - `Group3`:
            The `Cumulative Return` series of stocks group with (40% < `ranking ratio` of label <= 60%)
        - `Group4`:
            The `Cumulative Return` series of stocks group with (60% < `ranking ratio` of label <= 80%)
        - `Group5`:
            The `Cumulative Return` series of stocks group with (80% < `ranking ratio` of label)
        - `long-short`:
            The Difference series between `Cumulative Return` of `Group1` and of `Group5`
        - `long-average`
            The Difference series between `Cumulative Return` of `Group1` and average `Cumulative Return` for all stocks.

        The `ranking ratio` can be formulated as follows.
            .. math::

                ranking\ ratio = \frac{Ascending\ Ranking\ of\ label}{Number\ of\ Stocks\ in\ the\ Portfolio}

.. image:: ../_static/img/analysis/analysis_model_cumulative_return.png
    :align: center

.. note::
    - long-short/long-average
        The distribution of long-short/long-average returns on each trading day


.. image:: ../_static/img/analysis/analysis_model_long_short.png
    :align: center

.. TODO: ask xiao yang for detial

.. note::
    - Information Coefficient
        - The `Pearson correlation coefficient` series between `labels` and `prediction scores` of stocks in portfolio.
        - The graphics reports can be used to evaluate the `prediction scores`.

.. image:: ../_static/img/analysis/analysis_model_IC.png
    :align: center

.. note::
    - Monthly IC
        Monthly average of the `Information Coefficient`

.. image:: ../_static/img/analysis/analysis_model_monthly_IC.png
    :align: center

.. note::
    - IC
        The distribution of the `Information Coefficient` on each trading day.
    - IC Normal Dist. Q-Q
        The `Quantile-Quantile Plot` is used for the normal distribution of `Information Coefficient` on each trading day.

.. image:: ../_static/img/analysis/analysis_model_NDQ.png
    :align: center

.. note::
    - Auto Correlation
        - The `Pearson correlation coefficient` series between the latest `prediction scores` and the `prediction scores` `lag` days ago of stocks in portfolio on each trading day.
        - The graphics reports can be used to estimate the turnover rate.


.. image:: ../_static/img/analysis/analysis_model_auto_correlation.png
    :align: center

```

## /docs/component/rl/framework.rst

```rst path="/docs/component/rl/framework.rst" 
The Framework of QlibRL
=======================

QlibRL contains a full set of components that cover the entire lifecycle of an RL pipeline, including building the simulator of the market, shaping states & actions, training policies (strategies), and backtesting strategies in the simulated environment.

QlibRL is basically implemented with the support of Tianshou and Gym frameworks. The high-level structure of QlibRL is demonstrated below:

.. image:: ../../_static/img/QlibRL_framework.png
   :width: 600
   :align: center

Here, we briefly introduce each component in the figure.

EnvWrapper
------------
EnvWrapper is the complete capsulation of the simulated environment. It receives actions from outside (policy/strategy/agent), simulates the changes in the market, and then replies rewards and updated states, thus forming an interaction loop.

In QlibRL, EnvWrapper is a subclass of gym.Env, so it implements all necessary interfaces of gym.Env. Any classes or pipelines that accept gym.Env should also accept EnvWrapper. Developers do not need to implement their own EnvWrapper to build their own environment. Instead, they only need to implement 4 components of the EnvWrapper:

- `Simulator`
    The simulator is the core component responsible for the environment simulation. Developers could implement all the logic that is directly related to the environment simulation in the Simulator in any way they like. In QlibRL, there are already two implementations of Simulator for single asset trading: 1) ``SingleAssetOrderExecution``, which is built based on Qlib's backtest toolkits and hence considers a lot of practical trading details but is slow. 2) ``SimpleSingleAssetOrderExecution``, which is built based on a simplified trading simulator, which ignores a lot of details (e.g. trading limitations, rounding) but is quite fast.
- `State interpreter` 
    The state interpreter is responsible for "interpret" states in the original format (format provided by the simulator) into states in a format that the policy could understand. For example, transform unstructured raw features into numerical tensors.
- `Action interpreter` 
    The action interpreter is similar to the state interpreter. But instead of states, it interprets actions generated by the policy, from the format provided by the policy to the format that is acceptable to the simulator.
- `Reward function` 
    The reward function returns a numerical reward to the policy after each time the policy takes an action. 

EnvWrapper will organically organize these components. Such decomposition allows for better flexibility in development. For example, if the developers want to train multiple types of policies in the same environment, they only need to design one simulator and design different state interpreters/action interpreters/reward functions for different types of policies.

QlibRL has well-defined base classes for all these 4 components. All the developers need to do is define their own components by inheriting the base classes and then implementing all interfaces required by the base classes. The API for the above base components can be found `here <../../reference/api.html#module-qlib.rl>`__.

Policy
------------
QlibRL directly uses Tianshou's policy. Developers could use policies provided by Tianshou off the shelf, or implement their own policies by inheriting Tianshou's policies.

Training Vessel & Trainer
-------------------------
As stated by their names, training vessels and trainers are helper classes used in training. A training vessel is a ship that contains a simulator/interpreters/reward function/policy, and it controls algorithm-related parts of training. Correspondingly, the trainer is responsible for controlling the runtime parts of training.

As you may have noticed, a training vessel itself holds all the required components to build an EnvWrapper rather than holding an instance of EnvWrapper directly. This allows the training vessel to create duplicates of EnvWrapper dynamically when necessary (for example, under parallel training).

With a training vessel, the trainer could finally launch the training pipeline by simple, Scikit-learn-like interfaces (i.e., ``trainer.fit()``).

The API for Trainer and TrainingVessel and can be found `here <../../reference/api.html#module-qlib.rl.trainer>`__.

The RL module is designed in a loosely-coupled way. Currently, RL examples are integrated with concrete business logic.
But the core part of RL is much simpler than what you see.
To demonstrate the simple core of RL, `a dedicated notebook <https://github.com/microsoft/qlib/tree/main/examples/rl/simple_example.ipynb>`__ for RL without business loss is created.

```

## /docs/component/rl/guidance.rst

```rst path="/docs/component/rl/guidance.rst" 

========
Guidance
========
.. currentmodule:: qlib

QlibRL can help users quickly get started and conveniently implement quantitative strategies based on reinforcement learning(RL) algorithms. For different user groups, we recommend the following guidance to use QlibRL.

Beginners to Reinforcement Learning Algorithms
==============================================
Whether you are a quantitative researcher who wants to understand what RL can do in trading or a learner who wants to get started with RL algorithms in trading scenarios, if you have limited knowledge of RL and want to shield various detailed settings to quickly get started with RL algorithms, we recommend the following sequence to learn qlibrl:
 - Learn the fundamentals of RL in `part1 <https://qlib.readthedocs.io/en/latest/component/rl/overall.html#reinforcement-learning>`_.
 - Understand the trading scenarios where RL methods can be applied in `part2 <https://qlib.readthedocs.io/en/latest/component/rl/overall.html#potential-application-scenarios-in-quantitative-trading>`_.
 - Run the examples in `part3 <https://qlib.readthedocs.io/en/latest/component/rl/quickstart.html>`_ to solve trading problems using RL.
 - If you want to further explore QlibRL and make some customizations, you need to first understand the framework of QlibRL in `part4 <https://qlib.readthedocs.io/en/latest/component/rl/framework.html>`_ and rewrite specific components according to your needs.

Reinforcement Learning Algorithm Researcher
==============================================
If you are already familiar with existing RL algorithms and dedicated to researching RL algorithms but lack domain knowledge in the financial field, and you want to validate the effectiveness of your algorithms in financial trading scenarios, we recommend the following steps to get started with QlibRL:
 - Understand the trading scenarios where RL methods can be applied in `part2 <https://qlib.readthedocs.io/en/latest/component/rl/overall.html#potential-application-scenarios-in-quantitative-trading>`_.
 - Choose an RL application scenario (currently, QlibRL has implemented two scenario examples: order execution and algorithmic trading). Run the example in `part3 <https://qlib.readthedocs.io/en/latest/component/rl/quickstart.html>`_ to get it working.
 - Modify the `policy <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/policy.py>`_ part to incorporate your own RL algorithm.

Quantitative Researcher
=======================
If you have a certain level of financial domain knowledge and coding skills, and you want to explore the application of RL algorithms in the investment field, we recommend the following steps to explore QlibRL:
 - Learn the fundamentals of RL in `part1 <https://qlib.readthedocs.io/en/latest/component/rl/overall.html#reinforcement-learning>`_.
 - Understand the trading scenarios where RL methods can be applied in `part2 <https://qlib.readthedocs.io/en/latest/component/rl/overall.html#potential-application-scenarios-in-quantitative-trading>`_.
 - Run the examples in `part3 <https://qlib.readthedocs.io/en/latest/component/rl/quickstart.html>`_ to solve trading problems using RL.
 - Understand the framework of QlibRL in `part4 <https://qlib.readthedocs.io/en/latest/component/rl/framework.html>`_.
 - Choose a suitable RL algorithm based on the characteristics of the problem you want to solve (currently, QlibRL supports PPO and DQN algorithms based on tianshou).
 - Design the MDP (Markov Decision Process) process based on market trading rules and the problem you want to solve. Refer to the example in order execution and make corresponding modifications to the following modules: `State <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/state.py#L70>`_, `Metrics <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/state.py#L18>`_, `ActionInterpreter <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/interpreter.py#L199>`_, `StateInterpreter <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/interpreter.py#L68>`_, `Reward <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/reward.py>`_, `Observation <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/interpreter.py#L44>`_, `Simulator <https://github.com/microsoft/qlib/blob/main/qlib/rl/order_execution/simulator_simple.py>`_.
```

## /docs/component/rl/overall.rst

```rst path="/docs/component/rl/overall.rst" 
=====================================================
Reinforcement Learning in Quantitative Trading
=====================================================

Reinforcement Learning
======================
Different from supervised learning tasks such as classification tasks and regression tasks. Another important paradigm in machine learning is Reinforcement Learning(RL), 
which attempts to optimize an accumulative numerical reward signal by directly interacting with the environment under a few assumptions such as Markov Decision Process(MDP).

As demonstrated in the following figure, an RL system consists of four elements, 1)the agent 2) the environment the agent interacts with 3) the policy that the agent follows to take actions on the environment and 4)the reward signal from the environment to the agent. 
In general, the agent can perceive and interpret its environment, take actions and learn through reward, to seek long-term and maximum overall reward to achieve an optimal solution.

.. image:: ../../_static/img/RL_framework.png
   :width: 300
   :align: center 

RL attempts to learn to produce actions by trial and error. 
By sampling actions and then observing which one leads to our desired outcome, a policy is obtained to generate optimal actions. 
In contrast to supervised learning, RL learns this not from a label but from a time-delayed label called a reward. 
This scalar value lets us know whether the current outcome is good or bad. 
In a word, the target of RL is to take actions to maximize reward.

The Qlib Reinforcement Learning toolkit (QlibRL) is an RL platform for quantitative investment, which provides support to implement the RL algorithms in Qlib.


Potential Application Scenarios in Quantitative Trading
=======================================================
RL methods have demonstrated remarkable achievements in various applications, including game playing, resource allocation, recommendation systems, marketing, and advertising.
In the context of investment, which involves continuous decision-making, let's consider the example of the stock market. Investors strive to optimize their investment returns by effectively managing their positions and stock holdings through various buying and selling behaviors.
Furthermore, investors carefully evaluate market conditions and stock-specific information before making each buying or selling decision. From an investor's perspective, this process can be viewed as a continuous decision-making process driven by interactions with the market. RL algorithms offer a promising approach to tackle such challenges.
Here are several scenarios where RL holds potential for application in quantitative investment.

Order Execution
---------------
The order execution task is to execute orders efficiently while considering multiple factors, including optimal prices, minimizing trading costs, reducing market impact, maximizing order fullfill rates, and achieving execution within a specified time frame. RL can be applied to such tasks by incorporating these objectives into the reward function and action selection process. Specifically, the RL agent interacts with the market environment, observes the state from market information, and makes decisions on next step execution. The RL algorithm learns an optimal execution strategy through trial and error, aiming to maximize the expected cumulative reward, which incorporates the desired objectives.

 - General Setting
    - Environment: The environment represents the financial market where order execution takes place. It encompasses variables such as the order book dynamics, liquidity, price movements, and market conditions.

    - State: The state refers to the information available to the RL agent at a given time step. It typically includes features such as the current order book state (bid-ask spread, order depth), historical price data, historical trading volume, market volatility, and any other relevant information that can aid in decision-making.

    - Action: The action is the decision made by the RL agent based on the observed state. In order execution, actions can include selecting the order size, price, and timing of execution.

    - Reward: The reward is a scalar signal that indicates the performance of the RL agent's action in the environment. The reward function is designed to encourage actions that lead to efficient and cost-effective order execution. It typically considers multiple objectives, such as maximizing price advantages, minimizing trading costs (including transaction fees and slippage), reducing market impact (the effect of the order on the market price) and maximizing order fullfill rates. 

 - Scenarios
    - Single-asset order execution: Single-asset order execution focuses on the task of executing a single order for a specific asset, such as a stock or a cryptocurrency. The primary objective is to execute the order efficiently while considering factors such as maximizing price advantages, minimizing trading costs, reducing market impact, and achieving a high fullfill rate. The RL agent interacts with the market environment and makes decisions on order size, price, and timing of execution for that particular asset. The goal is to learn an optimal execution strategy for the single asset, maximizing the expected cumulative reward while considering the specific dynamics and characteristics of that asset.

    - Multi-asset order execution: Multi-asset order execution expands the order execution task to involve multiple assets or securities. It typically involves executing a portfolio of orders across different assets simultaneously or sequentially. Unlike single-asset order execution, the focus is not only on the execution of individual orders but also on managing the interactions and dependencies between different assets within the portfolio. The RL agent needs to make decisions on the order sizes, prices, and timings for each asset in the portfolio, considering their interdependencies, cash constraints, market conditions, and transaction costs. The goal is to learn an optimal execution strategy that balances the execution efficiency for each asset while considering the overall performance and objectives of the portfolio as a whole.
   
The choice of settings and RL algorithm depends on the specific requirements of the task, available data, and desired performance objectives. 

Portfolio Construction
----------------------
Portfolio construction is a process of selecting and allocating assets in an investment portfolio. RL provides a framework to optimize portfolio management decisions by learning from interactions with the market environment and maximizing long-term returns while considering risk management.
 - General Setting
    - State: The state represents the current information about the market and the portfolio. It typically includes historical prices and volumes, technical indicators, and other relevant data.

    - Action: The action corresponds to the decision of allocating capital to different assets in the portfolio. It determines the weights or proportions of investments in each asset.

    - Reward: The reward is a metric that evaluates the performance of the portfolio. It can be defined in various ways, such as total return, risk-adjusted return, or other objectives like maximizing Sharpe ratio or minimizing drawdown.

 - Scenarios
    - Stock market: RL can be used to construct portfolios of stocks, where the agent learns to allocate capital among different stocks.

    - Cryptocurrency market: RL can be applied to construct portfolios of cryptocurrencies, where the agent learns to make allocation decisions.

    - Foreign exchange (Forex) market: RL can be used to construct portfolios of currency pairs, where the agent learns to allocate capital across different currencies based on exchange rate data, economic indicators, and other factors.

Similarly, the choice of basic setting and algorithm depends on the specific requirements of the problem and the characteristics of the market.
```

## /docs/component/rl/quickstart.rst

```rst path="/docs/component/rl/quickstart.rst" 

Quick Start
============
.. currentmodule:: qlib

QlibRL provides an example of an implementation of a single asset order execution task and the following is an example of the config file to train with QlibRL.

.. code-block:: yaml

    simulator:
        # Each step contains 30mins
        time_per_step: 30
        # Upper bound of volume, should be null or a float between 0 and 1, if it is a float, represent upper bound is calculated by the percentage of the market volume
        vol_limit: null
    env:
        # Concurrent environment workers.
        concurrency: 1
        # dummy or subproc or shmem. Corresponding to `parallelism in tianshou <https://tianshou.readthedocs.io/en/master/api/tianshou.env.html#vectorenv>`_.
        parallel_mode: dummy
    action_interpreter:
        class: CategoricalActionInterpreter
        kwargs:
            # Candidate actions, it can be a list with length L: [a_1, a_2,..., a_L] or an integer n, in which case the list of length n+1 is auto-generated, i.e., [0, 1/n, 2/n,..., n/n].
            values: 14
            # Total number of steps (an upper-bound estimation)
            max_step: 8
        module_path: qlib.rl.order_execution.interpreter
    state_interpreter:
        class: FullHistoryStateInterpreter
        kwargs:
            # Number of dimensions in data.
            data_dim: 6
            # Equal to the total number of records. For example, in SAOE per minute, data_ticks is the length of the day in minutes.
            data_ticks: 240
            # The total number of steps (an upper-bound estimation). For example, 390min / 30min-per-step = 13 steps.
            max_step: 8
            # Provider of the processed data.
            processed_data_provider:
                class: PickleProcessedDataProvider
                module_path: qlib.rl.data.pickle_styled
                kwargs:
                    data_dir: ./data/pickle_dataframe/feature
        module_path: qlib.rl.order_execution.interpreter
    reward:
        class: PAPenaltyReward
        kwargs:
            # The penalty for a large volume in a short time.
            penalty: 100.0
        module_path: qlib.rl.order_execution.reward
    data:
        source:
            order_dir: ./data/training_order_split
            data_dir: ./data/pickle_dataframe/backtest
            # number of time indexes
            total_time: 240
            # start time index
            default_start_time: 0
            # end time index
            default_end_time: 240
            proc_data_dim: 6
        num_workers: 0
        queue_size: 20
    network:
        class: Recurrent
        module_path: qlib.rl.order_execution.network
    policy:
        class: PPO
        kwargs:
            lr: 0.0001
        module_path: qlib.rl.order_execution.policy
    runtime:
        seed: 42
        use_cuda: false
    trainer:
        max_epoch: 2
        # Number of episodes collected in each training iteration
        repeat_per_collect: 5
        earlystop_patience: 2
        # Episodes per collect at training.
        episode_per_collect: 20
        batch_size: 16
        # Perform validation every n iterations
        val_every_n_epoch: 1
        checkpoint_path: ./checkpoints
        checkpoint_every_n_iters: 1


And the config file for backtesting:

.. code-block:: yaml

    order_file: ./data/backtest_orders.csv
    start_time: "9:45"
    end_time: "14:44"
    qlib:
        provider_uri_1min: ./data/bin
        feature_root_dir: ./data/pickle
        # feature generated by today's information
        feature_columns_today: [
            "$open", "$high", "$low", "$close", "$vwap", "$volume",
        ]
        # feature generated by yesterday's information
        feature_columns_yesterday: [
            "$open_v1", "$high_v1", "$low_v1", "$close_v1", "$vwap_v1", "$volume_v1",
        ]
    exchange:
        # the expression for buying and selling stock limitation
        limit_threshold: ['$close == 0', '$close == 0']
        # deal price for buying and selling
        deal_price: ["If($close == 0, $vwap, $close)", "If($close == 0, $vwap, $close)"]
    volume_threshold:
        # volume limits are both buying and selling, "cum" means that this is a cumulative value over time
        all: ["cum", "0.2 * DayCumsum($volume, '9:45', '14:44')"]
        # the volume limits of buying
        buy: ["current", "$close"]
        # the volume limits of selling, "current" means that this is a real-time value and will not accumulate over time
        sell: ["current", "$close"]
    strategies: 
        30min: 
            class: TWAPStrategy
            module_path: qlib.contrib.strategy.rule_strategy
            kwargs: {}
        1day: 
            class: SAOEIntStrategy
            module_path: qlib.rl.order_execution.strategy
            kwargs:
            state_interpreter:
                class: FullHistoryStateInterpreter
                module_path: qlib.rl.order_execution.interpreter
                kwargs:
                max_step: 8
                data_ticks: 240
                data_dim: 6
                processed_data_provider:
                    class: PickleProcessedDataProvider
                    module_path: qlib.rl.data.pickle_styled
                    kwargs:
                    data_dir: ./data/pickle_dataframe/feature
            action_interpreter: 
                class: CategoricalActionInterpreter
                module_path: qlib.rl.order_execution.interpreter
                kwargs: 
                values: 14
                max_step: 8
            network: 
                class: Recurrent
                module_path: qlib.rl.order_execution.network
                kwargs: {}
            policy: 
                class: PPO
                module_path: qlib.rl.order_execution.policy
                kwargs: 
                    lr: 1.0e-4
                    # Local path to the latest model. The model is generated during training, so please run training first if you want to run backtest with a trained policy. You could also remove this parameter file to run backtest with a randomly initialized policy.
                    weight_file: ./checkpoints/latest.pth
    # Concurrent environment workers.
    concurrency: 5

With the above config files, you can start training the agent by the following command:

.. code-block:: console

    $ python -m qlib.rl.contrib.train_onpolicy.py --config_path train_config.yml

After the training, you can backtest with the following command:

.. code-block:: console

    $ python -m qlib.rl.contrib.backtest.py --config_path backtest_config.yml

In that case, :class:`~qlib.rl.order_execution.simulator_qlib.SingleAssetOrderExecution` and :class:`~qlib.rl.order_execution.simulator_simple.SingleAssetOrderExecutionSimple` as examples for simulator, :class:`qlib.rl.order_execution.interpreter.FullHistoryStateInterpreter` and :class:`qlib.rl.order_execution.interpreter.CategoricalActionInterpreter` as examples for interpreter, :class:`qlib.rl.order_execution.policy.PPO` as an example for policy, and :class:`qlib.rl.order_execution.reward.PAPenaltyReward` as an example for reward.
For the single asset order execution task, if developers have already defined their simulator/interpreters/reward function/policy, they could launch the training and backtest pipeline by simply modifying the corresponding settings in the config files.
The details about the example can be found `here <https://github.com/microsoft/qlib/blob/main/examples/rl/README.md>`_. 

In the future, we will provide more examples for different scenarios such as RL-based portfolio construction.

```

## /docs/component/rl/toctree.rst

```rst path="/docs/component/rl/toctree.rst" 
.. _rl:

========================================================================
Reinforcement Learning in Quantitative Trading
========================================================================

.. toctree::
    Guidance <guidance>
    Overall <overall>
    Quick Start <quickstart>
    Framework <framework>

```

## /docs/component/strategy.rst

```rst path="/docs/component/strategy.rst" 
.. _strategy:

========================================
Portfolio Strategy: Portfolio Management
========================================
.. currentmodule:: qlib

Introduction
============

``Portfolio Strategy`` is designed to adopt different portfolio strategies, which means that users can adopt different algorithms to generate investment portfolios based on the prediction scores of the ``Forecast Model``. Users can use the ``Portfolio Strategy`` in an automatic workflow by ``Workflow`` module, please refer to `Workflow: Workflow Management <workflow.html>`_.

Because the components in ``Qlib`` are designed in a loosely-coupled way, ``Portfolio Strategy`` can be used as an independent module also.

``Qlib`` provides several implemented portfolio strategies. Also, ``Qlib`` supports custom strategy, users can customize strategies according to their own requirements.

After users specifying the models(forecasting signals) and strategies, running backtest will help users to check the performance of a custom model(forecasting signals)/strategy.

Base Class & Interface
======================

BaseStrategy
------------

Qlib provides a base class ``qlib.strategy.base.BaseStrategy``. All strategy classes need to inherit the base class and implement its interface.

- `generate_trade_decision`
    generate_trade_decision is a key interface that generates trade decisions in each trading bar.
    The frequency to call this method depends on the executor frequency("time_per_step"="day" by default). But the trading frequency can be decided by users' implementation.
    For example, if the user wants to trading in weekly while the `time_per_step` is "day" in executor, user can return non-empty TradeDecision weekly(otherwise return empty like `this <https://github.com/microsoft/qlib/blob/main/qlib/contrib/strategy/signal_strategy.py#L132>`_ ).

Users can inherit `BaseStrategy` to customize their strategy class.

WeightStrategyBase
------------------

Qlib also provides a class ``qlib.contrib.strategy.WeightStrategyBase`` that is a subclass of `BaseStrategy`.

`WeightStrategyBase` only focuses on the target positions, and automatically generates an order list based on positions. It provides the `generate_target_weight_position` interface.

- `generate_target_weight_position`
    - According to the current position and trading date to generate the target position. The cash is not considered in
      the output weight distribution.
    - Return the target position.

    .. note::
        Here the `target position` means the target percentage of total assets.

`WeightStrategyBase` implements the interface `generate_order_list`, whose processions is as follows.

- Call `generate_target_weight_position` method to generate the target position.
- Generate the target amount of stocks from the target position.
- Generate the order list from the target amount

Users can inherit `WeightStrategyBase` and implement the interface `generate_target_weight_position` to customize their strategy class, which only focuses on the target positions.

Implemented Strategy
====================

Qlib provides a implemented strategy classes named `TopkDropoutStrategy`.

TopkDropoutStrategy
-------------------
`TopkDropoutStrategy` is a subclass of `BaseStrategy` and implement the interface `generate_order_list` whose process is as follows.

- Adopt the ``Topk-Drop`` algorithm to calculate the target amount of each stock

    .. note::
        There are two parameters for the ``Topk-Drop`` algorithm:

        - `Topk`: The number of stocks held
        - `Drop`: The number of stocks sold on each trading day

        In general, the number of stocks currently held is `Topk`, with the exception of being zero at the beginning period of trading.
        For each trading day, let $d$ be the number of the instruments currently held and with a rank $\gt K$ when ranked by the prediction scores from high to low.
        Then `d` number of stocks currently held with the worst `prediction score` will be sold, and the same number of unheld stocks with the best `prediction score` will be bought.

        In general, $d=$`Drop`, especially when the pool of the candidate instruments is large, $K$ is large, and `Drop` is small.

        In most cases, ``TopkDrop`` algorithm sells and buys `Drop` stocks every trading day, which yields a turnover rate of 2$\times$`Drop`/$K$.

        The following images illustrate a typical scenario.

        .. image:: ../_static/img/topk_drop.png
            :alt: Topk-Drop



- Generate the order list from the target amount

EnhancedIndexingStrategy
------------------------
`EnhancedIndexingStrategy` Enhanced indexing combines the arts of active management and passive management,
with the aim of outperforming a benchmark index (e.g., S&P 500) in terms of portfolio return while controlling
the risk exposure (a.k.a. tracking error).

For more information, please refer to `qlib.contrib.strategy.signal_strategy.EnhancedIndexingStrategy`
and `qlib.contrib.strategy.optimizer.enhanced_indexing.EnhancedIndexingOptimizer`.


Usage & Example
===============

First, user can create a model to get trading signals(the variable name is ``pred_score`` in following cases).

Prediction Score
----------------

The `prediction score` is a pandas DataFrame. Its index is <datetime(pd.Timestamp), instrument(str)> and it must
contains a `score` column.

A prediction sample is shown as follows.

.. code-block:: python

      datetime instrument     score
    2019-01-04   SH600000 -0.505488
    2019-01-04   SZ002531 -0.320391
    2019-01-04   SZ000999  0.583808
    2019-01-04   SZ300569  0.819628
    2019-01-04   SZ001696 -0.137140
                 ...            ...
    2019-04-30   SZ000996 -1.027618
    2019-04-30   SH603127  0.225677
    2019-04-30   SH603126  0.462443
    2019-04-30   SH603133 -0.302460
    2019-04-30   SZ300760 -0.126383

``Forecast Model`` module can make predictions, please refer to `Forecast Model: Model Training & Prediction <model.html>`_.

Normally, the prediction score is the output of the models. But some models are learned from a label with a different scale. So the scale of the prediction score may be different from your expectation(e.g. the return of instruments).

Qlib didn't add a step to scale the prediction score to a unified scale due to the following reasons.
- Because not every trading strategy cares about the scale(e.g. TopkDropoutStrategy only cares about the order).  So the strategy is responsible for rescaling the prediction score(e.g. some portfolio-optimization-based strategies may require a meaningful scale).
- The model has the flexibility to define the target, loss, and data processing. So we don't think there is a silver bullet to rescale it back directly barely based on the model's outputs. If you want to scale it back to some meaningful values(e.g. stock returns.), an intuitive solution is to create a regression model for the model's recent outputs and your recent target values.

Running backtest
----------------

- In most cases, users could backtest their portfolio management strategy  with ``backtest_daily``.

    .. code-block:: python

        from pprint import pprint

        import qlib
        import pandas as pd
        from qlib.utils.time import Freq
        from qlib.utils import flatten_dict
        from qlib.contrib.evaluate import backtest_daily
        from qlib.contrib.evaluate import risk_analysis
        from qlib.contrib.strategy import TopkDropoutStrategy

        # init qlib
        qlib.init(provider_uri=<qlib data dir>)

        CSI300_BENCH = "SH000300"
        STRATEGY_CONFIG = {
            "topk": 50,
            "n_drop": 5,
            # pred_score, pd.Series
            "signal": pred_score,
        }


        strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
        report_normal, positions_normal = backtest_daily(
            start_time="2017-01-01", end_time="2020-08-01", strategy=strategy_obj
        )
        analysis = dict()
        # default frequency will be daily (i.e. "day")
        analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
        analysis["excess_return_with_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"] - report_normal["cost"])

        analysis_df = pd.concat(analysis)  # type: pd.DataFrame
        pprint(analysis_df)



- If users would like to control their strategies in a more detailed(e.g. users have a more advanced version of executor), user could follow this example.

    .. code-block:: python

        from pprint import pprint

        import qlib
        import pandas as pd
        from qlib.utils.time import Freq
        from qlib.utils import flatten_dict
        from qlib.backtest import backtest, executor
        from qlib.contrib.evaluate import risk_analysis
        from qlib.contrib.strategy import TopkDropoutStrategy

        # init qlib
        qlib.init(provider_uri=<qlib data dir>)

        CSI300_BENCH = "SH000300"
        # Benchmark is for calculating the excess return of your strategy.
        # Its data format will be like **ONE normal instrument**.
        # For example, you can query its data with the code below
        # `D.features(["SH000300"], ["$close"], start_time='2010-01-01', end_time='2017-12-31', freq='day')`
        # It is different from the argument `market`, which indicates a universe of stocks (e.g. **A SET** of stocks like csi300)
        # For example, you can query all data from a stock market with the code below.
        # ` D.features(D.instruments(market='csi300'), ["$close"], start_time='2010-01-01', end_time='2017-12-31', freq='day')`

        FREQ = "day"
        STRATEGY_CONFIG = {
            "topk": 50,
            "n_drop": 5,
            # pred_score, pd.Series
            "signal": pred_score,
        }

        EXECUTOR_CONFIG = {
            "time_per_step": "day",
            "generate_portfolio_metrics": True,
        }

        backtest_config = {
            "start_time": "2017-01-01",
            "end_time": "2020-08-01",
            "account": 100000000,
            "benchmark": CSI300_BENCH,
            "exchange_kwargs": {
                "freq": FREQ,
                "limit_threshold": 0.095,
                "deal_price": "close",
                "open_cost": 0.0005,
                "close_cost": 0.0015,
                "min_cost": 5,
            },
        }

        # strategy object
        strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
        # executor object
        executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
        # backtest
        portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
        analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))
        # backtest info
        report_normal, positions_normal = portfolio_metric_dict.get(analysis_freq)

        # analysis
        analysis = dict()
        analysis["excess_return_without_cost"] = risk_analysis(
            report_normal["return"] - report_normal["bench"], freq=analysis_freq
        )
        analysis["excess_return_with_cost"] = risk_analysis(
            report_normal["return"] - report_normal["bench"] - report_normal["cost"], freq=analysis_freq
        )

        analysis_df = pd.concat(analysis)  # type: pd.DataFrame
        # log metrics
        analysis_dict = flatten_dict(analysis_df["risk"].unstack().T.to_dict())
        # print out results
        pprint(f"The following are analysis results of benchmark return({analysis_freq}).")
        pprint(risk_analysis(report_normal["bench"], freq=analysis_freq))
        pprint(f"The following are analysis results of the excess return without cost({analysis_freq}).")
        pprint(analysis["excess_return_without_cost"])
        pprint(f"The following are analysis results of the excess return with cost({analysis_freq}).")
        pprint(analysis["excess_return_with_cost"])


Result
------

The backtest results are in the following form:

.. code-block:: python

                                                      risk
    excess_return_without_cost mean               0.000605
                               std                0.005481
                               annualized_return  0.152373
                               information_ratio  1.751319
                               max_drawdown      -0.059055
    excess_return_with_cost    mean               0.000410
                               std                0.005478
                               annualized_return  0.103265
                               information_ratio  1.187411
                               max_drawdown      -0.075024


- `excess_return_without_cost`
    - `mean`
        Mean value of the `CAR` (cumulative abnormal return) without cost
    - `std`
        The `Standard Deviation` of `CAR` (cumulative abnormal return) without cost.
    - `annualized_return`
        The `Annualized Rate` of `CAR` (cumulative abnormal return) without cost.
    - `information_ratio`
        The `Information Ratio` without cost. please refer to `Information Ratio – IR <https://www.investopedia.com/terms/i/informationratio.asp>`_.
    - `max_drawdown`
        The `Maximum Drawdown` of `CAR` (cumulative abnormal return) without cost, please refer to `Maximum Drawdown (MDD) <https://www.investopedia.com/terms/m/maximum-drawdown-mdd.asp>`_.

- `excess_return_with_cost`
    - `mean`
        Mean value of the `CAR` (cumulative abnormal return) series with cost
    - `std`
        The `Standard Deviation` of `CAR` (cumulative abnormal return) series with cost.
    - `annualized_return`
        The `Annualized Rate` of `CAR` (cumulative abnormal return) with cost.
    - `information_ratio`
        The `Information Ratio` with cost. please refer to `Information Ratio – IR <https://www.investopedia.com/terms/i/informationratio.asp>`_.
    - `max_drawdown`
        The `Maximum Drawdown` of `CAR` (cumulative abnormal return) with cost, please refer to `Maximum Drawdown (MDD) <https://www.investopedia.com/terms/m/maximum-drawdown-mdd.asp>`_.


Reference
=========
To know more about the `prediction score` `pred_score` output by ``Forecast Model``, please refer to `Forecast Model: Model Training & Prediction <model.html>`_.

```

## /docs/conf.py

```py path="/docs/conf.py" 
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.


# QLib documentation build configuration file, created by
# sphinx-quickstart on Wed Sep 27 15:16:05 2017.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys

import pkg_resources


# -- General configuration ------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    "sphinx.ext.autodoc",
    "sphinx.ext.todo",
    "sphinx.ext.mathjax",
    "sphinx.ext.napoleon",
]

# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = ".rst"

# The master toctree document.
master_doc = "index"


# General information about the project.
project = "QLib"
copyright = "Microsoft"
author = "Microsoft"

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = pkg_resources.get_distribution("pyqlib").version
# The full version, including alpha/beta/rc tags.
release = pkg_resources.get_distribution("pyqlib").version

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = "en_US"

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "hidden"]

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"

# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False

# If true, '()' will be appended to :func: etc. cross-reference text.
add_function_parentheses = False

# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
add_module_names = True

# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = True


# -- Options for HTML output ----------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
html_theme = "sphinx_rtd_theme"

html_logo = "_static/img/logo/1.png"


# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
# html_context = {
#     "display_github": False,
#     "last_updated": True,
#     "commit": True,
#     "github_user": "Microsoft",
#     "github_repo": "QLib",
#     'github_version': 'master',
#     'conf_py_path': '/docs/',

# }
#
html_theme_options = {
    "logo_only": True,
    "collapse_navigation": False,
    "navigation_depth": 4,
}

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
# html_static_path = ['_static']

# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# This is required for the alabaster theme
# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
html_sidebars = {
    "**": [
        "about.html",
        "navigation.html",
        "relations.html",  # needs 'show_related': True theme option to display
        "searchbox.html",
    ]
}


# -- Options for HTMLHelp output ------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = "qlibdoc"


# -- Options for LaTeX output ---------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',
    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',
    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',
    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
    (master_doc, "qlib.tex", "QLib Documentation", "Microsoft", "manual"),
]


# -- Options for manual page output ---------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, "qlib", "QLib Documentation", [author], 1)]


# -- Options for Texinfo output -------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (
        master_doc,
        "QLib",
        "QLib Documentation",
        author,
        "QLib",
        "One line description of project.",
        "Miscellaneous",
    ),
]


# -- Options for Epub output ----------------------------------------------

# Bibliographic Dublin Core info.
epub_title = project
epub_author = author
epub_publisher = author
epub_copyright = copyright

# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''

# A unique identification for the text.
#
# epub_uid = ''

# A list of files that should not be packed into the epub file.
epub_exclude_files = ["search.html"]


autodoc_member_order = "bysource"
autodoc_default_flags = ["members"]
autodoc_default_options = {
    "members": True,
    "member-order": "bysource",
    "special-members": "__init__",
}

```

## /docs/make.bat

```bat path="/docs/make.bat" 
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.https://www.sphinx-doc.org/
	exit /b 1
)

if "%1" == "" goto help

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%

:end
popd

```

## /docs/requirements.txt

Cython
cmake
numpy
scipy
scikit-learn
pandas
tianshou
sphinx_rtd_theme



The content has been capped at 50000 tokens. The user could consider applying other filters to refine the result. The better and more specific the context, the better the LLM can follow instructions. If the context seems verbose, the user can refine the filter using uithub. Thank you for using https://uithub.com - Perfect LLM context for any GitHub repo.
Copied!