Train transformer language models with reinforcement learning.
1723 matches across 15 categories. Click a row to expand file-level details.
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_skills_cli.py | 26 | def test_cmd_list_without_target(self, capsys): |
| LOW | tests/test_skills_cli.py | 38 | def test_cmd_list_with_target(self, tmp_path, capsys): |
| LOW | tests/test_skills_cli.py | 51 | def test_cmd_list_empty_target(self, tmp_path, capsys): |
| LOW | tests/test_skills_cli.py | 61 | def test_cmd_install_single_skill(self, tmp_path, capsys): |
| LOW | tests/test_skills_cli.py | 73 | def test_cmd_install_all_skills(self, tmp_path, capsys): |
| LOW | tests/test_skills_cli.py | 85 | def test_cmd_install_no_skill_or_all(self, capsys): |
| LOW | tests/test_skills_cli.py | 95 | def test_cmd_install_both_skill_and_all(self, capsys): |
| LOW | tests/test_skills_cli.py | 105 | def test_cmd_install_nonexistent_skill(self, tmp_path, capsys): |
| LOW | tests/test_skills_cli.py | 116 | def test_cmd_install_already_exists(self, tmp_path, capsys): |
| LOW | tests/test_skills_cli.py | 130 | def test_cmd_install_with_force(self, tmp_path, capsys): |
| LOW | tests/test_skills_cli.py | 144 | def test_cmd_uninstall_success(self, tmp_path, capsys): |
| LOW | tests/test_skills_cli.py | 159 | def test_cmd_uninstall_not_installed(self, tmp_path, capsys): |
| LOW | tests/test_skills_cli.py | 170 | def test_cmd_install_creates_target_directory(self, tmp_path, capsys): |
| LOW | tests/test_skills_cli.py | 187 | def test_cmd_uninstall_invalid_target(self, capsys): |
| LOW | tests/test_skills_cli.py | 201 | def test_add_skills_subcommands_creates_parsers(self): |
| LOW | tests/test_skills_cli.py | 222 | def test_list_command_optional_target(self): |
| LOW | tests/test_skills_cli.py | 236 | def test_install_command_requires_target(self): |
| LOW | tests/test_skills_cli.py | 282 | def test_default_scope_is_project(self): |
| LOW | tests/test_rloo_trainer.py | 102 | def test_train_with_num_generations_eval(self): |
| LOW | tests/test_rloo_trainer.py | 126 | def test_train_multiple_iterations(self): |
| LOW | tests/test_rloo_trainer.py | 233 | def test_train_peft_with_gradient_checkpointing(self): |
| LOW | tests/test_rloo_trainer.py | 269 | def test_train_different_reward_model(self): |
| LOW | tests/test_rloo_trainer.py | 308 | def test_train_reward_func_standard(self): |
| LOW | tests/test_rloo_trainer.py | 342 | def test_train_reward_func_conversational(self): |
| LOW | tests/test_rloo_trainer.py | 377 | def test_train_multiple_reward_funcs(self): |
| LOW | tests/test_rloo_trainer.py | 415 | def test_train_sync_and_async_reward_funcs(self): |
| LOW | tests/test_rloo_trainer.py | 464 | def non_applicable_reward_func(completions, **kwargs): |
| LOW | tests/test_rloo_trainer.py | 500 | def test_train_multiple_reward_funcs_with_weights(self): |
| LOW | tests/test_rloo_trainer.py | 544 | def test_reward_metric_reflects_reward_weights(self): |
| LOW | tests/test_rloo_trainer.py | 581 | def test_train_multiple_mixed_reward_funcs(self): |
| LOW | tests/test_rloo_trainer.py | 615 | def test_train_reward_func_additional_column(self): |
| LOW | tests/test_rloo_trainer.py | 655 | def test_train_with_sync_ref_model(self): |
| LOW | tests/test_rloo_trainer.py | 720 | def test_train_with_pad_to_multiple_of(self): |
| LOW | tests/test_rloo_trainer.py | 800 | def test_train_vllm_structured_outputs(self): |
| LOW | tests/test_rloo_trainer.py | 832 | def test_train_with_additional_generation_kwargs(self): |
| LOW | tests/test_rloo_trainer.py | 869 | def test_train_vllm_with_additional_generation_kwargs(self): |
| LOW | tests/test_rloo_trainer.py | 905 | def test_train_with_normalized_advantages(self): |
| LOW | tests/test_rloo_trainer.py | 935 | def test_train_with_clipped_rewards(self): |
| LOW | tests/test_rloo_trainer.py | 966 | def test_train_with_mask_truncated_completions(self, mock_generate): |
| LOW | tests/test_rloo_trainer.py | 1015 | def test_train_with_mask_truncated_completions_all_masked(self): |
| LOW | tests/test_rloo_trainer.py | 1053 | def test_warning_raised_all_rewards_none(self, caplog): |
| LOW | tests/test_rloo_trainer.py | 1082 | def test_train_num_generations_larger_than_batch_size(self): |
| LOW | tests/test_rloo_trainer.py | 1112 | def test_train_multiple_dataloader_workers(self): |
| LOW | tests/test_rloo_trainer.py | 1153 | def test_train_with_generation_kwargs(self): |
| LOW | tests/test_rloo_trainer.py | 1184 | def test_train_with_reward_func_accessing_trainer_state(self): |
| LOW | tests/test_rloo_trainer.py | 1209 | def test_train_reward_func_with_log_extra(self): |
| LOW | tests/test_rloo_trainer.py | 1235 | def test_train_reward_func_with_log_metric(self): |
| LOW | tests/test_rloo_trainer.py | 1262 | def test_prepare_input_called_with_correct_data(self): |
| LOW | tests/test_rloo_trainer.py | 1389 | def test_train_vlm_with_pad_to_multiple_of(self): |
| LOW | tests/test_rloo_trainer.py | 1433 | def test_train_vlm_beta_non_zero(self, model_id): |
| LOW | tests/test_rloo_trainer.py | 1575 | def test_train_vlm_multi_image(self, model_id): |
| LOW | tests/test_rloo_trainer.py | 1608 | def test_train_with_chat_template_kwargs(self): |
| LOW | tests/test_rloo_trainer.py | 1638 | def test_mismatched_reward_processing_classes_length(self): |
| LOW | tests/test_rloo_trainer.py | 1664 | def test_correct_reward_processing_classes_list(self): |
| LOW | tests/test_rloo_trainer.py | 1695 | def test_single_reward_model_with_single_processing_class(self): |
| LOW | tests/test_utils.py | 181 | def test_pad_to_multiple_of_side_left(self): |
| LOW | tests/test_utils.py | 189 | def test_pad_to_multiple_of_no_extra_padding(self): |
| LOW | tests/test_utils.py | 199 | def test_hash_module_deterministic_across_order(self): |
| LOW | tests/test_utils.py | 216 | def test_hash_module_changes_with_value(self): |
| LOW | tests/test_utils.py | 224 | def test_hash_module_includes_dtype(self): |
| 692 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_skills_cli.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_rloo_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_utils.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/conftest.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_callbacks.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_sft_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/testing_constants.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_chat_template_utils.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_grpo_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_model_utils.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/testing_utils.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_data_utils.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/__init__.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_vllm_client_server.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_rich_progress_callback.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_activation_offloading.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_rewards.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_cli_utils.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_skills.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_dpo_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_cli.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/test_reward_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/_openreward_echo_env.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_utils.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_gkd_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_nash_md_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_minillm_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_tpo_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_gspo_token_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_sdft_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/__init__.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_sdpo_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_xpo_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_gold_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_prm_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_online_dpo_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_openreward.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_openreward.py | 201 | # If a task spec ever shipped a `prompt` key, the metadata loop must |
| LOW | tests/experimental/test_async_grpo_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_ssd_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_orpo_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_kto_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | …s/experimental/test_grpo_with_replay_buffer_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_bco_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_cpo_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_distillation_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_modeling_value_head.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_dppo_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_ppo_trainer.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/experimental/test_merge_model_callback.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/invariant/__init__.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/invariant/test_invariant.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/distributed/__init__.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | tests/distributed/test_distributed.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | examples/accelerate_configs/alst_ulysses_4gpu.yaml | 1 | # ALST/Ulysses Sequence Parallelism with 2D Parallelism (DP + SP) for 4 GPUs |
| LOW | examples/datasets/tldr.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | examples/datasets/deepmath_103k.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | examples/datasets/ultrafeedback.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | examples/datasets/lm-human-preferences-sentiment.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| LOW | examples/datasets/prm800k.py | 1 | # Copyright 2020-2026 The HuggingFace Team. All rights reserved. |
| 276 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | AGENTS.md | 0 | brief one-line description of what this does. args: param1 (`str`): description of required param. param2 (`int`, *optio |
| HIGH | CLAUDE.md | 0 | brief one-line description of what this does. args: param1 (`str`): description of required param. param2 (`int`, *optio |
| HIGH | .cursor/BUGBOT.md | 0 | brief one-line description of what this does. args: param1 (`str`): description of required param. param2 (`int`, *optio |
| HIGH | .ai/AGENTS.md | 0 | brief one-line description of what this does. args: param1 (`str`): description of required param. param2 (`int`, *optio |
| HIGH | tests/test_rloo_trainer.py | 0 | reward function that rewards completions with more unique letters. |
| HIGH | tests/test_rloo_trainer.py | 0 | reward function that rewards completions with more unique letters. |
| HIGH | tests/test_grpo_trainer.py | 0 | reward function that rewards completions with more unique letters. |
| HIGH | tests/test_grpo_trainer.py | 0 | reward function that rewards completions with more unique letters. |
| HIGH | tests/test_grpo_trainer.py | 0 | reward function that rewards completions with more unique letters. |
| HIGH | tests/test_rloo_trainer.py | 0 | test that training works with vllm for generation with structured outputs. |
| HIGH | tests/test_grpo_trainer.py | 0 | test that training works with vllm for generation with structured outputs. |
| HIGH | tests/test_grpo_trainer.py | 0 | test that training works with vllm for generation with structured outputs. |
| HIGH | tests/test_callbacks.py | 0 | test that bema callback respects the update frequency. |
| HIGH | tests/test_callbacks.py | 0 | test that bema callback respects the update frequency. |
| HIGH | tests/test_callbacks.py | 0 | test that bema callback respects the update frequency. |
| HIGH | tests/test_grpo_trainer.py | 0 | multiplies two integers. args: a: the first integer. b: the second integer. returns: the product of the two integers. |
| HIGH | tests/test_vllm_client_server.py | 0 | multiplies two integers. args: a: the first integer. b: the second integer. returns: the product of the two integers. |
| HIGH | tests/test_vllm_client_server.py | 0 | multiplies two integers. args: a: the first integer. b: the second integer. returns: the product of the two integers. |
| HIGH | tests/test_vllm_client_server.py | 0 | multiplies two integers. args: a: the first integer. b: the second integer. returns: the product of the two integers. |
| HIGH | tests/test_vllm_client_server.py | 0 | multiplies two integers. args: a: the first integer. b: the second integer. returns: the product of the two integers. |
| HIGH | docs/source/grpo_trainer.md | 0 | multiplies two integers. args: a: the first integer. b: the second integer. returns: the product of the two integers. |
| HIGH | tests/test_data_utils.py | 0 | \ <|start|>system<|message|>you are hugginggpt. knowledge cutoff: 2024-06 current date: {strftime("%y-%m-%d")} reasoning |
| HIGH | tests/test_data_utils.py | 0 | \ <|start|>system<|message|>you are hugginggpt. knowledge cutoff: 2024-06 current date: {strftime("%y-%m-%d")} reasoning |
| HIGH | tests/test_data_utils.py | 0 | \ <|start|>system<|message|>you are hugginggpt. knowledge cutoff: 2024-06 current date: {strftime("%y-%m-%d")} reasoning |
| HIGH | tests/test_data_utils.py | 0 | \ <|start|>system<|message|>you are hugginggpt. knowledge cutoff: 2024-06 current date: {strftime("%y-%m-%d")} reasoning |
| HIGH | tests/test_cli_utils.py | 0 | test that config defaults are applied to all subparsers. |
| HIGH | tests/test_cli_utils.py | 0 | test that config defaults are applied to all subparsers. |
| HIGH | tests/test_cli_utils.py | 0 | test that config defaults are applied to all subparsers. |
| HIGH | tests/test_cli_utils.py | 0 | test that config defaults are applied to all subparsers. |
| HIGH | tests/experimental/test_ppo_trainer.py | 0 | test if we instantiate a model by adding `summary_drop_prob` to the config it will be added to the v_head |
| HIGH | tests/experimental/test_ppo_trainer.py | 0 | test if we instantiate a model by adding `summary_drop_prob` to the config it will be added to the v_head |
| HIGH | tests/experimental/test_ppo_trainer.py | 0 | test if we instantiate a model by adding `summary_drop_prob` to the config it will be added to the v_head |
| HIGH | tests/experimental/test_ppo_trainer.py | 0 | test if we instantiate a model by adding `summary_drop_prob` to the config it will be added to the v_head |
| HIGH | tests/experimental/test_ppo_trainer.py | 0 | simply creates a peft model and checks that it can be loaded. |
| HIGH | tests/experimental/test_ppo_trainer.py | 0 | simply creates a peft model and checks that it can be loaded. |
| HIGH | tests/experimental/test_ppo_trainer.py | 0 | simply creates a peft model and checks that it can be loaded. |
| HIGH | docs/source/openenv.md | 0 | make a guess in the wordle environment. args: guess: the guessed word, formatted as '[abcde]' returns: the feedback mess |
| HIGH | examples/scripts/openenv/multi_env.py | 0 | make a guess in the wordle environment. args: guess: the guessed word, formatted as '[abcde]' returns: the feedback mess |
| HIGH | examples/scripts/openenv/wordle.py | 0 | make a guess in the wordle environment. args: guess: the guessed word, formatted as '[abcde]' returns: the feedback mess |
| HIGH | trl/experimental/cpo/cpo_trainer.py | 0 | generate samples from the model and reference model for the given batch of inputs. |
| HIGH | trl/experimental/bco/bco_trainer.py | 0 | generate samples from the model and reference model for the given batch of inputs. |
| HIGH | trl/experimental/orpo/orpo_trainer.py | 0 | generate samples from the model and reference model for the given batch of inputs. |
| HIGH | trl/experimental/cpo/cpo_trainer.py | 0 | overriding built-in evaluation loop to store metrics for each batch. prediction/evaluation loop, shared by `trainer.eval |
| HIGH | trl/experimental/bco/bco_trainer.py | 0 | overriding built-in evaluation loop to store metrics for each batch. prediction/evaluation loop, shared by `trainer.eval |
| HIGH | trl/experimental/orpo/orpo_trainer.py | 0 | overriding built-in evaluation loop to store metrics for each batch. prediction/evaluation loop, shared by `trainer.eval |
| HIGH | trl/experimental/cpo/cpo_trainer.py | 0 | log `logs` on the various objects watching training, including stored metrics. args: logs (`dict[str, float]`): the valu |
| HIGH | trl/experimental/bco/bco_trainer.py | 0 | log `logs` on the various objects watching training, including stored metrics. args: logs (`dict[str, float]`): the valu |
| HIGH | trl/experimental/orpo/orpo_trainer.py | 0 | log `logs` on the various objects watching training, including stored metrics. args: logs (`dict[str, float]`): the valu |
| HIGH | trl/experimental/dppo/dppo_trainer.py | 0 | tokenize prompts and extract images/multimodal fields for generation. |
| HIGH | trl/trainer/rloo_trainer.py | 0 | tokenize prompts and extract images/multimodal fields for generation. |
| HIGH | trl/trainer/grpo_trainer.py | 0 | tokenize prompts and extract images/multimodal fields for generation. |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/conftest.py | 54 | # ============================================================================ |
| MEDIUM | tests/conftest.py | 56 | # ============================================================================ |
| MEDIUM | tests/conftest.py | 68 | # ============================================================================ |
| MEDIUM | examples/scripts/grpo_agent.py | 184 | # ------------------------ |
| MEDIUM | examples/scripts/grpo_agent.py | 186 | # ------------------------ |
| MEDIUM | examples/scripts/grpo_agent.py | 231 | # ------------------------ |
| MEDIUM | examples/scripts/grpo_agent.py | 233 | # ------------------------ |
| MEDIUM | examples/scripts/grpo_agent.py | 260 | # ------------------------ |
| MEDIUM | examples/scripts/grpo_agent.py | 262 | # ------------------------ |
| MEDIUM | examples/scripts/grpo_agent.py | 267 | # ------------------------ |
| MEDIUM | examples/scripts/grpo_agent.py | 269 | # ------------------------ |
| MEDIUM | examples/scripts/grpo_agent.py | 284 | # ------------------------ |
| MEDIUM | examples/scripts/grpo_agent.py | 286 | # ------------------------ |
| MEDIUM | examples/scripts/grpo_agent.py | 298 | # ------------------------ |
| MEDIUM | examples/scripts/grpo_agent.py | 300 | # ------------------------ |
| MEDIUM | examples/scripts/grpo_agent.py | 310 | # ------------------------ |
| MEDIUM | examples/scripts/grpo_agent.py | 312 | # ------------------------ |
| MEDIUM | examples/scripts/grpo_agent.py | 315 | # ------------------------ |
| MEDIUM | examples/scripts/grpo_agent.py | 317 | # ------------------------ |
| MEDIUM | examples/scripts/openenv/browsergym_llm.py | 234 | # --------------------------------------------------------------------------- |
| MEDIUM | examples/scripts/openenv/browsergym_llm.py | 236 | # --------------------------------------------------------------------------- |
| MEDIUM | examples/scripts/openenv/browsergym_llm.py | 253 | # --------------------------------------------------------------------------- |
| MEDIUM | examples/scripts/openenv/browsergym_llm.py | 255 | # --------------------------------------------------------------------------- |
| MEDIUM | examples/scripts/openenv/browsergym_llm.py | 263 | # --------------------------------------------------------------------------- |
| MEDIUM | examples/scripts/openenv/browsergym_llm.py | 265 | # --------------------------------------------------------------------------- |
| MEDIUM | examples/scripts/openenv/sudoku.py | 105 | # --------------------------------------------------------------------------- |
| MEDIUM | examples/scripts/openenv/sudoku.py | 107 | # --------------------------------------------------------------------------- |
| MEDIUM | examples/scripts/openenv/sudoku.py | 187 | # --------------------------------------------------------------------------- |
| MEDIUM | examples/scripts/openenv/sudoku.py | 189 | # --------------------------------------------------------------------------- |
| MEDIUM | examples/scripts/openenv/sudoku.py | 339 | # --------------------------------------------------------------------------- |
| MEDIUM | examples/scripts/openenv/sudoku.py | 341 | # --------------------------------------------------------------------------- |
| MEDIUM | examples/scripts/openenv/sudoku.py | 369 | # --------------------------------------------------------------------------- |
| MEDIUM | examples/scripts/openenv/sudoku.py | 371 | # --------------------------------------------------------------------------- |
| MEDIUM | trl/experimental/ssd/ssd_trainer.py | 269 | # ------------------------------------------------------------------ |
| MEDIUM | trl/experimental/ssd/ssd_trainer.py | 271 | # ------------------------------------------------------------------ |
| MEDIUM | trl/experimental/ssd/ssd_trainer.py | 319 | # ------------------------------------------------------------------ |
| MEDIUM | trl/experimental/ssd/ssd_trainer.py | 321 | # ------------------------------------------------------------------ |
| MEDIUM | trl/experimental/ssd/ssd_trainer.py | 338 | # ------------------------------------------------------------------ |
| MEDIUM | trl/experimental/ssd/ssd_trainer.py | 340 | # ------------------------------------------------------------------ |
| MEDIUM | trl/experimental/ssd/ssd_trainer.py | 421 | # ------------------------------------------------------------------ |
| MEDIUM | trl/experimental/ssd/ssd_trainer.py | 423 | # ------------------------------------------------------------------ |
| MEDIUM | trl/experimental/ssd/ssd_trainer.py | 472 | # ------------------------------------------------------------------ |
| MEDIUM | trl/experimental/ssd/ssd_trainer.py | 474 | # ------------------------------------------------------------------ |
| MEDIUM | trl/experimental/distillation/distillation_trainer.py | 702 | # ────────────────────────────────────────────────────────────────────── |
| MEDIUM | trl/experimental/distillation/distillation_trainer.py | 704 | # ────────────────────────────────────────────────────────────────────── |
| MEDIUM | trl/experimental/distillation/distillation_trainer.py | 764 | # ────────────────────────────────────────────────────────────────────── |
| MEDIUM | trl/experimental/distillation/distillation_trainer.py | 766 | # ────────────────────────────────────────────────────────────────────── |
| MEDIUM | trl/experimental/distillation/distillation_trainer.py | 1022 | # ────────────────────────────────────────────────────────────────────── |
| MEDIUM | trl/experimental/distillation/distillation_trainer.py | 1024 | # ────────────────────────────────────────────────────────────────────── |
| MEDIUM | trl/experimental/distillation/distillation_trainer.py | 1576 | # ────────────────────────────────────────────────────────────────────── |
| MEDIUM | trl/experimental/distillation/distillation_trainer.py | 1578 | # ────────────────────────────────────────────────────────────────────── |
| MEDIUM | trl/experimental/openreward/_spec.py | 144 | # ── public surface ────────────────────────────────────────────── |
| MEDIUM | trl/experimental/openreward/_spec.py | 251 | # ── internals ─────────────────────────────────────────────────── |
| MEDIUM | trl/experimental/openreward/environment.py | 101 | # ──────────────────────────────────────────────────────────────────── |
| MEDIUM | trl/experimental/openreward/environment.py | 204 | # ── TRL contract ───────────────────────────────────────────────── |
| MEDIUM | trl/experimental/openreward/environment.py | 252 | # ── helpers ────────────────────────────────────────────────────── |
| MEDIUM | trl/experimental/openreward/environment.py | 299 | # ── dynamic tool binding ───────────────────────────────────────────── |
| MEDIUM | trl/experimental/openreward/environment.py | 366 | # ── small utilities ────────────────────────────────────────────────── |
| MEDIUM | trl/trainer/rloo_trainer.py | 645 | # <-───────> per_device_train_batch_size=3 |
| MEDIUM | trl/trainer/grpo_trainer.py | 930 | # <-───────> per_device_train_batch_size=3 |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/test_rloo_trainer.py | 1648 | # Create a single processing class (tokenizer) |
| MEDIUM | tests/test_rloo_trainer.py | 1702 | # Create a single processing class (tokenizer) |
| MEDIUM | tests/test_grpo_trainer.py | 93 | # Create a mock trainer with minimal setup |
| MEDIUM | tests/test_grpo_trainer.py | 96 | # Create a mock accelerator |
| MEDIUM | tests/test_grpo_trainer.py | 100 | # Create a minimal trainer instance just to access the method |
| MEDIUM | tests/test_grpo_trainer.py | 2704 | # Create a single processing class (tokenizer) |
| MEDIUM | tests/test_grpo_trainer.py | 2758 | # Create a single processing class (tokenizer) |
| MEDIUM | tests/test_grpo_trainer.py | 3146 | # Create a VLM processor |
| MEDIUM | tests/test_cli_utils.py | 176 | # Create the main parser |
| MEDIUM | tests/test_cli_utils.py | 182 | # Create a subparser for a specific command |
| MEDIUM | tests/test_cli_utils.py | 202 | # Create the main parser |
| MEDIUM | tests/test_cli_utils.py | 208 | # Create a subparser for a specific command |
| MEDIUM | tests/test_cli_utils.py | 225 | # Create the main parser |
| MEDIUM | tests/test_cli_utils.py | 231 | # Create a subparser for a specific command |
| MEDIUM | tests/test_cli_utils.py | 247 | # Create the main parser |
| MEDIUM | tests/test_cli_utils.py | 253 | # Create a subparser for a specific command |
| MEDIUM | tests/test_skills.py | 188 | # Create a file instead of directory |
| MEDIUM | tests/test_skills.py | 199 | # Create a skill with subdirectories |
| MEDIUM | tests/test_skills.py | 264 | # Create a skill with multiple files |
| MEDIUM | tests/test_skills.py | 171 | # Create a symlink |
| MEDIUM | tests/test_skills.py | 217 | # Create a skill |
| MEDIUM | tests/test_skills.py | 435 | # Create a real skill |
| MEDIUM | tests/test_cli.py | 101 | # Create a temporary config file |
| MEDIUM | tests/experimental/test_prm_trainer.py | 126 | # Define the input features |
| MEDIUM | tests/experimental/test_prm_trainer.py | 150 | # Define the input features |
| MEDIUM | tests/experimental/test_prm_trainer.py | 173 | # Define the input features |
| MEDIUM | tests/experimental/test_prm_trainer.py | 197 | # Define the input features |
| MEDIUM | tests/experimental/test_prm_trainer.py | 221 | # Define the input features |
| MEDIUM | scripts/generate_zen_multi_image_dataset.py | 76 | # Create the images |
| MEDIUM | scripts/generate_zen_multi_image_dataset.py | 106 | # Create the images |
| MEDIUM | scripts/generate_zen_multi_image_dataset.py | 157 | # Create the images |
| MEDIUM | scripts/generate_zen_multi_image_dataset.py | 229 | # Create the images |
| MEDIUM | trl/data_utils.py | 741 | # Create a new bin |
| MEDIUM | trl/experimental/merge_model_callback.py | 37 | # Create the repository if it doesn't exist |
| MEDIUM | trl/experimental/merge_model_callback.py | 118 | # Create the merge configuration dictionary |
| MEDIUM | trl/experimental/merge_model_callback.py | 128 | # Create the MergeConfiguration from the dictionary |
| MEDIUM | trl/experimental/merge_model_callback.py | 137 | # Create the TIES merge configuration dictionary |
| MEDIUM | trl/experimental/merge_model_callback.py | 172 | # Create the MergeConfiguration from the dictionary |
| MEDIUM | trl/experimental/merge_model_callback.py | 181 | # Create the DARE TIES merge configuration dictionary |
| MEDIUM | trl/experimental/merge_model_callback.py | 216 | # Create the MergeConfiguration from the dictionary |
| MEDIUM | trl/experimental/merge_model_callback.py | 226 | # Create the SLERP merge configuration dictionary |
| MEDIUM | trl/experimental/merge_model_callback.py | 255 | # Create the MergeConfiguration from the dictionary |
| MEDIUM | trl/experimental/utils.py | 15 | # This file contains utility classes and functions that are used across more than one experimental trainer or feature. |
| MEDIUM | trl/experimental/cpo/cpo_trainer.py | 174 | # Initialize this variable to False. This helps tracking the case when `peft_module_casting_to_bf16` |
| MEDIUM | trl/experimental/bco/bco_trainer.py | 473 | # Initialize this variable to False. This helps tracking the case when `peft_module_casting_to_bf16` |
| MEDIUM | trl/experimental/orpo/orpo_trainer.py | 183 | # Initialize this variable to False. This helps tracking the case when `peft_module_casting_to_bf16` |
| MEDIUM | trl/experimental/online_dpo/online_dpo_trainer.py | 382 | # Define the collator if not provided |
| MEDIUM | trl/experimental/online_dpo/online_dpo_trainer.py | 886 | # Create the output dict with required fields |
| MEDIUM | trl/experimental/gold/gold_trainer.py | 1465 | # Create a completion mask |
| MEDIUM | trl/experimental/prm/prm_trainer.py | 337 | # Create the label |
| MEDIUM | trl/experimental/ppo/ppo_trainer.py | 394 | # Define the collator if not provided |
| MEDIUM | trl/scripts/vllm_serve.py | 109 | # Create a stateless process group to manage communication between training processes and vLLM workers. |
| MEDIUM | trl/scripts/vllm_serve.py | 491 | # Define the endpoints for the model server |
| MEDIUM | trl/trainer/utils.py | 160 | # Create an output tensor filled with the padding value |
| MEDIUM | trl/trainer/utils.py | 171 | # Define the slices |
| MEDIUM | trl/trainer/utils.py | 719 | self.generator = torch.Generator() # Create a local random generator |
| MEDIUM | trl/trainer/utils.py | 950 | # This function is intentionally not used internally. It is provided as a utility for users whose datasets contain |
| MEDIUM | trl/trainer/callbacks.py | 218 | # Create a table per group |
| MEDIUM | trl/trainer/callbacks.py | 693 | # Create a new instance and load state_dict |
| MEDIUM | trl/trainer/grpo_trainer.py | 494 | # Create the environments and extract their methods to be used as tools. We create one environment per rollout |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | examples/scripts/openenv/browsergym_llm.py | 64 | |
| LOW | examples/scripts/openenv/sudoku.py | 84 | |
| LOW | examples/scripts/openenv/browsergym.py | 50 | |
| LOW | trl/__init__.py | 19 | |
| LOW | trl/__init__.py | 77 | |
| LOW | trl/__init__.py | 77 | |
| LOW | trl/__init__.py | 77 | |
| LOW | trl/__init__.py | 77 | |
| LOW | trl/__init__.py | 83 | |
| LOW | trl/__init__.py | 83 | |
| LOW | trl/__init__.py | 83 | |
| LOW | trl/__init__.py | 83 | |
| LOW | trl/__init__.py | 83 | |
| LOW | trl/__init__.py | 83 | |
| LOW | trl/__init__.py | 83 | |
| LOW | trl/__init__.py | 83 | |
| LOW | trl/__init__.py | 83 | |
| LOW | trl/__init__.py | 83 | |
| LOW | trl/__init__.py | 83 | |
| LOW | trl/__init__.py | 83 | |
| LOW | trl/__init__.py | 97 | |
| LOW | trl/__init__.py | 98 | |
| LOW | trl/__init__.py | 98 | |
| LOW | trl/__init__.py | 98 | |
| LOW | trl/__init__.py | 98 | |
| LOW | trl/__init__.py | 98 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/__init__.py | 99 | |
| LOW | trl/experimental/utils.py | 47 | |
| LOW | trl/experimental/ssd/__init__.py | 15 | |
| LOW | trl/experimental/ssd/__init__.py | 16 | |
| LOW | trl/experimental/ssd/ssd_eval.py | 59 | |
| LOW | trl/experimental/ssd/ssd_trainer.py | 23 | |
| LOW | trl/experimental/gfpo/__init__.py | 15 | |
| LOW | trl/experimental/gfpo/__init__.py | 16 | |
| LOW | trl/experimental/sdpo/__init__.py | 15 | |
| LOW | trl/experimental/sdpo/__init__.py | 16 | |
| LOW | trl/experimental/cpo/__init__.py | 15 | |
| LOW | trl/experimental/cpo/__init__.py | 16 | |
| LOW | trl/experimental/papo/__init__.py | 16 | |
| LOW | trl/experimental/papo/__init__.py | 17 | |
| 122 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_utils.py | 809 | |
| LOW | tests/test_utils.py | 840 | |
| LOW | tests/test_grpo_trainer.py | 3002 | |
| LOW | tests/test_grpo_trainer.py | 3118 | |
| LOW | tests/test_grpo_trainer.py | 3266 | |
| LOW | examples/datasets/math_shepherd.py | 52 | |
| LOW | examples/scripts/sft_vlm_gemma3.py | 78 | |
| LOW | examples/scripts/sft_vlm_gemma3.py | 97 | |
| LOW | examples/scripts/grpo_agent.py | 50 | |
| LOW | examples/scripts/grpo_agent.py | 145 | |
| LOW | examples/scripts/sft_video_llm.py | 68 | |
| LOW | examples/scripts/openenv/multi_env.py | 127 | |
| LOW | examples/scripts/openenv/catch.py | 205 | |
| LOW | examples/scripts/openenv/sudoku.py | 208 | |
| LOW | examples/scripts/openenv/sudoku.py | 290 | |
| LOW | examples/scripts/openenv/sudoku.py | 374 | |
| LOW | examples/scripts/openenv/sudoku.py | 543 | |
| LOW | scripts/log_reports.py | 34 | |
| LOW | scripts/generate_tiny_models/_common.py | 256 | |
| LOW | trl/chat_template_utils.py | 355 | |
| LOW | trl/data_utils.py | 33 | |
| LOW | trl/data_utils.py | 127 | |
| LOW | trl/data_utils.py | 922 | |
| LOW | trl/_compat.py | 86 | |
| LOW | trl/import_utils.py | 29 | |
| LOW | trl/experimental/merge_model_callback.py | 82 | |
| LOW | trl/experimental/merge_model_callback.py | 260 | |
| LOW | trl/experimental/utils.py | 537 | |
| LOW | trl/experimental/utils.py | 557 | |
| LOW | trl/experimental/utils.py | 65 | |
| LOW | trl/experimental/utils.py | 150 | |
| LOW | trl/experimental/gfpo/gfpo_trainer.py | 69 | |
| LOW | trl/experimental/sdpo/sdpo_trainer.py | 96 | |
| LOW | trl/experimental/sdpo/sdpo.py | 163 | |
| LOW | trl/experimental/cpo/cpo_trainer.py | 130 | |
| LOW | trl/experimental/cpo/cpo_trainer.py | 446 | |
| LOW | trl/experimental/cpo/cpo_trainer.py | 579 | |
| LOW | trl/experimental/cpo/cpo_trainer.py | 643 | |
| LOW | trl/experimental/papo/papo_trainer.py | 154 | |
| LOW | trl/experimental/bco/bco_trainer.py | 407 | |
| LOW | trl/experimental/bco/bco_trainer.py | 1030 | |
| LOW | trl/experimental/bco/bco_trainer.py | 1233 | |
| LOW | trl/experimental/bco/bco_trainer.py | 1362 | |
| LOW | trl/experimental/orpo/orpo_trainer.py | 139 | |
| LOW | trl/experimental/orpo/orpo_trainer.py | 431 | |
| LOW | trl/experimental/orpo/orpo_trainer.py | 574 | |
| LOW | trl/experimental/online_dpo/online_dpo_trainer.py | 169 | |
| LOW | trl/experimental/online_dpo/online_dpo_trainer.py | 764 | |
| LOW | trl/experimental/online_dpo/online_dpo_trainer.py | 840 | |
| LOW | trl/experimental/online_dpo/online_dpo_trainer.py | 902 | |
| LOW | trl/experimental/online_dpo/online_dpo_trainer.py | 1096 | |
| LOW | trl/experimental/bema_for_ref_model/callback.py | 202 | |
| LOW | trl/experimental/sdft/sdft.py | 133 | |
| LOW | trl/experimental/sdft/sdft_trainer.py | 160 | |
| LOW | trl/experimental/distillation/distillation_trainer.py | 233 | |
| LOW | trl/experimental/distillation/distillation_trainer.py | 385 | |
| LOW | trl/experimental/distillation/distillation_trainer.py | 671 | |
| LOW | trl/experimental/distillation/distillation_trainer.py | 823 | |
| LOW | trl/experimental/distillation/distillation_trainer.py | 865 | |
| LOW | trl/experimental/distillation/distillation_trainer.py | 1049 | |
| 85 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | CONTRIBUTING.md | 268 | Replicate a string `n` times with a separator. Args: string (`str`): String to replicate. |
| HIGH | AGENTS.md | 71 | Brief one-line description of what this does. Args: param1 (`str`): Description of require |
| HIGH | CLAUDE.md | 71 | Brief one-line description of what this does. Args: param1 (`str`): Description of require |
| HIGH | .cursor/BUGBOT.md | 71 | Brief one-line description of what this does. Args: param1 (`str`): Description of require |
| HIGH | trl/chat_template_utils.py | 356 | Adds the appropriate response schema to the given tokenizer based on its chat template. At the time of initial |
| HIGH | trl/data_utils.py | 35 | Convert messages into a structured multimodal format and inject the provided images into the message contents. |
| HIGH | trl/data_utils.py | 161 | Check if the example is in a conversational format. Args: example (`dict[str, Any]`): A si |
| HIGH | trl/data_utils.py | 341 | If the example is in a conversational format, apply a chat template to it. Args: example (`dict[str, l |
| HIGH | trl/data_utils.py | 505 | Extracts the shared prompt from a preference data example, where the prompt is implicit within both the chosen and |
| HIGH | trl/data_utils.py | 883 | Check if the example is in a conversational format (from/value). Note that this format isn't recommended. Prefer |
| HIGH | trl/experimental/gold/gold_trainer.py | 520 | Merge probabilities based on alignment groups with corrected conditional probability handling. For a g |
| HIGH | trl/skills/skills.py | 73 | Resolve target to a concrete directory path. Converts semantic agent names (e.g., 'claude') with scope to actu |
| HIGH | trl/skills/skills.py | 184 | Install a skill to target directory. Args: skill_name (`str`): Name of skill to install. targe |
| HIGH | trl/skills/skills.py | 251 | Install a skill. Args: skill_name (`str`): Name of skill to install. target (`str | Path`): Ag |
| HIGH | trl/skills/skills.py | 295 | Uninstall a skill from target directory. Args: skill_name (`str`): Name of skill to uninstall. |
| HIGH | trl/skills/skills.py | 327 | Uninstall a skill. Args: skill_name (`str`): Name of skill to uninstall. target (`str | Path`) |
| HIGH | trl/trainer/utils.py | 119 | Pads a list of tensors to the same shape along the first dimension. Args: tensors (`list[torch.Tensor] |
| HIGH | .ai/AGENTS.md | 71 | Brief one-line description of what this does. Args: param1 (`str`): Description of require |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_chat_template_utils.py | 45 | # Check if special tokens are correctly set |
| LOW | tests/test_chat_template_utils.py | 112 | # Check if special tokens are correctly set |
| LOW | tests/test_grpo_trainer.py | 3213 | # Check if signature columns were set properly |
| LOW | tests/experimental/test_gkd_trainer.py | 52 | # Set temperature to 0 for deterministic output |
| LOW | tests/experimental/test_gkd_trainer.py | 71 | # Check if the generated texts start with the original prompts |
| LOW | tests/experimental/test_gkd_trainer.py | 84 | # Check if the two generations are identical |
| LOW | tests/experimental/test_online_dpo_trainer.py | 228 | # Check if training loss is available |
| LOW | …s/experimental/test_grpo_with_replay_buffer_trainer.py | 45 | # Check if the buffer contains the correct number of elements |
| LOW | …s/experimental/test_grpo_with_replay_buffer_trainer.py | 48 | # Check if the buffer maintains the min-heap property |
| LOW | …s/experimental/test_grpo_with_replay_buffer_trainer.py | 67 | # Check if the buffer contains the correct number of elements |
| LOW | …s/experimental/test_grpo_with_replay_buffer_trainer.py | 70 | # Check if the buffer maintains the min-heap property |
| LOW | …s/experimental/test_grpo_with_replay_buffer_trainer.py | 90 | # Check if the sampled elements are from the buffer |
| LOW | tests/experimental/test_ppo_trainer.py | 750 | # Check if critic weights have been updated |
| LOW | tests/experimental/test_ppo_trainer.py | 757 | # Check if policy weights have been updated |
| LOW | tests/experimental/test_ppo_trainer.py | 812 | # Check if critic weights have been updated |
| LOW | tests/experimental/test_ppo_trainer.py | 819 | # Check if policy weights have been updated - for PEFT we check the LoRA weights |
| LOW | tests/experimental/test_ppo_trainer.py | 191 | # Check if the weights are the same |
| LOW | tests/experimental/test_ppo_trainer.py | 206 | # Check if the weights are the same |
| LOW | tests/experimental/test_ppo_trainer.py | 224 | # Check if the weights are the same |
| LOW | tests/experimental/test_ppo_trainer.py | 245 | # Check if the weights are the same |
| LOW | tests/experimental/test_ppo_trainer.py | 251 | # Check if the trl model has the same keys as the transformers model |
| LOW | tests/experimental/test_ppo_trainer.py | 290 | # Check if the outputs are of the right size - here |
| LOW | tests/experimental/test_ppo_trainer.py | 303 | # Check if v head of the model has the same dropout as the config |
| LOW | tests/experimental/test_ppo_trainer.py | 315 | # Check if v head of the model has the same dropout as the config |
| LOW | tests/experimental/test_ppo_trainer.py | 320 | # Check if v head of the model has the same dropout as the config |
| LOW | tests/experimental/test_ppo_trainer.py | 405 | # Check if the outputs are of the right size - here |
| LOW | tests/experimental/test_ppo_trainer.py | 418 | # Check if v head of the model has the same dropout as the config |
| LOW | tests/experimental/test_ppo_trainer.py | 430 | # Check if v head of the model has the same dropout as the config |
| LOW | tests/experimental/test_ppo_trainer.py | 435 | # Check if v head of the model has the same dropout as the config |
| LOW | scripts/add_copyrights.py | 61 | # Check if the exact copyright header exists |
| LOW | .github/workflows/tests.yml | 90 | if: github.ref == 'refs/heads/main' && always() # Check if the branch is main |
| LOW | .github/workflows/tests.yml | 146 | if: github.ref == 'refs/heads/main' && always() # Check if the branch is main |
| LOW | .github/workflows/tests.yml | 197 | if: github.ref == 'refs/heads/main' && always() # Check if the branch is main |
| LOW | .github/workflows/tests.yml | 252 | if: github.ref == 'refs/heads/main' && always() # Check if the branch is main |
| LOW | .github/workflows/tests.yml | 305 | if: github.ref == 'refs/heads/main' && always() # Check if the branch is main |
| LOW | trl/experimental/utils.py | 486 | # Check if model is sharded (FSDP/DS-Zero3) |
| LOW | trl/experimental/gold/gold_trainer.py | 1456 | # Check if the tokenized prompt starts with the tokenized prompt+completion |
| LOW | trl/experimental/tpo/tpo_trainer.py | 569 | # Check if the tokenized prompt starts with the tokenized prompt+completion |
| LOW | trl/models/activation_offloading.py | 221 | # Check if tensor is on CPU (skip offloading) |
| LOW | trl/models/activation_offloading.py | 226 | # Check if tensor is too small |
| LOW | trl/models/activation_offloading.py | 231 | # Check if tensor is a parameter or buffer |
| LOW | trl/models/activation_offloading.py | 238 | # Check if tensor is an FP8 tensor (TorchAO) - skip offloading as they're already compressed |
| LOW | trl/models/activation_offloading.py | 244 | # Check if tensor storage is a model parameter (for FSDP compatibility) |
| LOW | trl/models/activation_offloading.py | 286 | # Check if tensor has broadcast dimensions (stride == 0) |
| LOW | trl/models/activation_offloading.py | 495 | # Check if tensor still exists (might have been cleaned up by a previous node) |
| LOW | trl/models/utils.py | 275 | # Check if the model is already a FSDP model due to `Manual Wrapping` and if so, don't wrap it again |
| LOW | trl/scripts/vllm_serve.py | 733 | # Check if adding this item would exceed the token budget |
| LOW | trl/skills/skills.py | 105 | # Check if it's a predefined agent |
| LOW | trl/skills/skills.py | 205 | # Check if source skill exists |
| LOW | trl/skills/skills.py | 218 | # Check if already exists |
| LOW | trl/trainer/sft_trainer.py | 1478 | # Check if the tokenized prompt starts with the tokenized prompt+completion |
| LOW | trl/trainer/dpo_trainer.py | 960 | # Check if the tokenized prompt starts with the tokenized prompt+completion |
| LOW | trl/generation/vllm_client.py | 186 | # Check if the total timeout duration has passed |
| LOW | trl/extras/profiling.py | 199 | # Check if self is a Trainer-like object with required attributes |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_sft_trainer.py | 2186 | except Exception: |
| LOW | tests/test_grpo_trainer.py | 3106 | except Exception as e: |
| LOW | tests/test_grpo_trainer.py | 3233 | except Exception as e: |
| LOW | tests/test_grpo_trainer.py | 3300 | except Exception as e: |
| LOW | tests/test_rewards.py | 143 | except Exception as e: |
| MEDIUM | tests/test_rewards.py | 140 | def target(): |
| MEDIUM | tests/experimental/test_gold_trainer.py | 28 | def openr1_examples(): |
| MEDIUM | tests/experimental/test_gold_trainer.py | 41 | def countdown_examples(): |
| LOW | tests/experimental/test_gold_trainer.py | 35 | except Exception as exc: # pragma: no cover - network/environment dependent |
| LOW | tests/experimental/test_gold_trainer.py | 48 | except Exception as exc: # pragma: no cover - network/environment dependent |
| LOW | tests/experimental/test_online_dpo_trainer.py | 199 | except Exception: |
| LOW | docs/source/openenv.md | 398 | except Exception: |
| LOW | docs/source/openenv.md | 409 | except Exception: |
| LOW | examples/scripts/sft_vlm_gemma3.py | 107 | except Exception as e: |
| MEDIUM | examples/scripts/sft_vlm_gemma3.py | 108 | print(f"Error processing image {img_path}: {e}") |
| LOW | examples/scripts/sft_video_llm.py | 137 | except Exception as e: |
| LOW | examples/scripts/openenv/multi_env.py | 136 | except Exception: |
| LOW | examples/scripts/openenv/multi_env.py | 147 | except Exception: |
| LOW | examples/scripts/openenv/carla_vlm_gemma.py | 165 | except Exception as e: |
| LOW | examples/scripts/openenv/catch.py | 162 | except Exception as e: |
| LOW | scripts/log_reports.py | 59 | except Exception as e: |
| MEDIUM | scripts/add_copyrights.py | 48 | print(f"Error fetching tracked files: {e}") |
| LOW | trl/_lazy_module.py | 72 | except Exception as e: |
| LOW | trl/_compat.py | 45 | except Exception as e: |
| LOW | trl/_compat.py | 69 | except Exception as e: |
| LOW | trl/_compat.py | 126 | except Exception as e: |
| LOW | trl/_compat.py | 155 | except Exception as e: |
| LOW | trl/experimental/sdft/sdft.py | 213 | except Exception: |
| LOW | trl/experimental/openreward/_spec.py | 175 | except Exception: # noqa: BLE001 |
| LOW | trl/experimental/openreward/_spec.py | 282 | except Exception as e: # noqa: BLE001 |
| LOW | trl/experimental/openreward/_spec.py | 312 | except Exception as e: # noqa: BLE001 |
| LOW | trl/experimental/openreward/environment.py | 261 | except Exception as e: # noqa: BLE001 |
| LOW | trl/experimental/openreward/environment.py | 273 | except Exception as e: # noqa: BLE001 |
| LOW | trl/experimental/async_grpo/async_rollout_worker.py | 646 | except Exception as error: |
| LOW | trl/experimental/dppo/dppo_trainer.py | 463 | except Exception as err: |
| LOW | trl/experimental/ppo/modeling_value_head.py | 459 | except Exception: |
| LOW | trl/experimental/ppo/modeling_value_head.py | 469 | except Exception as exc: |
| MEDIUM | trl/cli/commands/skills.py | 37 | print("Error: Unknown skills command") |
| LOW | trl/scripts/vllm_serve.py | 784 | except Exception as e: |
| LOW | trl/scripts/vllm_serve.py | 789 | except Exception as e: |
| LOW | trl/scripts/utils.py | 400 | except Exception as e: |
| MEDIUM | trl/scripts/utils.py | 381 | def get_git_commit_hash(package_name): |
| MEDIUM | trl/skills/cli.py | 94 | print("Error: Either provide a skill name or use --all to install all skills") |
| MEDIUM | trl/skills/cli.py | 100 | print("Error: Cannot specify both a skill name and --all") |
| MEDIUM | trl/skills/cli.py | 158 | print(f"Error: {e}") |
| MEDIUM | trl/skills/cli.py | 193 | print(f"Error: {e}") |
| LOW | trl/trainer/callbacks.py | 541 | except Exception as scorer_e: |
| LOW | trl/trainer/callbacks.py | 547 | except Exception as pred_e: |
| LOW | trl/trainer/callbacks.py | 564 | except Exception as summary_e: |
| LOW | trl/trainer/callbacks.py | 569 | except Exception as finish_e: |
| LOW | trl/trainer/grpo_trainer.py | 1527 | except Exception as e: |
| Severity | File | Line | Snippet |
|---|---|---|---|
| CRITICAL | trl/experimental/ppo/modeling_value_head.py | 700 | self.v_head.summary.weight.data.normal_(mean=0.0, std=initializer_range) |
| CRITICAL | trl/experimental/ppo/modeling_value_head.py | 701 | self.v_head.summary.bias.data.zero_() |
| CRITICAL | trl/experimental/ppo/modeling_value_head.py | 966 | self.v_head.summary.weight.data.normal_(mean=0.0, std=initializer_range) |
| CRITICAL | trl/experimental/ppo/modeling_value_head.py | 967 | self.v_head.summary.bias.data.zero_() |
| CRITICAL | trl/models/activation_offloading.py | 397 | torch.autograd.variable.Variable._execution_engine.queue_callback(wait_and_del_remaining_references) |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_dpo_trainer.py | 415 | # purposes we will just just use the same checkpoint |
| MEDIUM | trl/experimental/cpo/cpo_trainer.py | 464 | # 2. https://github.com/EleutherAI/lm-evaluation-harness/pull/531#issuecomment-1595586257 |
| MEDIUM | trl/experimental/bco/bco_trainer.py | 258 | # 2. https://github.com/EleutherAI/lm-evaluation-harness/pull/531#issuecomment-1595586257 |
| MEDIUM | trl/experimental/orpo/orpo_trainer.py | 449 | # 2. https://github.com/EleutherAI/lm-evaluation-harness/pull/531#issuecomment-1595586257 |
| MEDIUM | trl/generation/vllm_client.py | 138 | # This is not strictly required for correctness, but it helps make training more robust to rare, transient |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | examples/scripts/openenv/wordle.py | 77 | uv venv && source .venv/bin/activate |
| HIGH | trl/data_utils.py | 637 | elif not first_column_offsets.equals(column.offsets): |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | examples/cli_configs/example_config.yaml | 1 | # This is an example configuration file of TRL CLI, you can use it for |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | trl/generation/vllm_client.py | 748 | # Example usage |