InternLM · Harold-lkk · Apr 29, 2026 · Apr 29, 2026 · Apr 30, 2026 · May 1, 2026
diff --git a/examples/v1/config/agent_rl_qwen35_30b_grpo.py b/examples/v1/config/agent_rl_qwen35_30b_grpo.py
@@ -48,7 +48,7 @@
 max_concurrent_groups = 512
 
 max_prompt_length = 4096
-pack_max_length = 68 * 1024
+pack_max_length = 256 * 1024
 max_response_length = 64 * 1024
 
 train_ep_size = 1
@@ -66,7 +66,7 @@
 lr = 1e-6
 hf_interval = 5
 total_epochs = 10
-sp_size = 1
+sp_size = 4
 # evaluation settings
 enable_evaluate = False
 enable_initial_evaluate = False
@@ -218,8 +218,14 @@ def convert_rollout_tractory_to_train(env, group_data_items):
 model_cfg.text_config.z_loss_cfg = None
 model_cfg.text_config.balancing_loss_cfg = None
 model_cfg.text_config.freeze_routers = True
+# model_cfg.text_config.mtp_config = MTPConfig(
+#     num_layers=1, 
+#     loss_scaling_factor=1.0,
+#     detach_mtp_lm_head_weight=True,
+#     detach_mtp_inputs=True,
+#     share_weights=False,
+# )
 model_cfg.text_config.vocab_size = 251392
-# model_cfg.text_config.embed_grad_max_token_id = 251173
 
 optim_cfg = AdamWConfig(
     lr=lr,