Ongoing research training transformer models at scale
6402 matches across 19 categories. Click a row to expand file-level details.
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | pretrain_vlm.py | 227 | def train_valid_test_datasets_provider(train_val_test_num_samples): |
| LOW | pretrain_vlm.py | 267 | def _preprocess_data_for_llava(data): |
| LOW | pretrain_vlm.py | 465 | def llava_position_embedding_ranks(pp_ranks): |
| LOW | train_rl.py | 339 | def train_valid_test_datasets_provider(train_val_test_num_samples): |
| LOW | pretrain_gpt.py | 267 | def core_gpt_dataset_config_from_args(args: Any) -> GPTDatasetConfig: |
| LOW | pretrain_gpt.py | 332 | def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None): |
| LOW | model_provider.py | 62 | def count_parameters_in_layer(model, layer_name): |
| LOW | pretrain_hybrid.py | 259 | def core_gpt_dataset_config_from_args(args: Any) -> GPTDatasetConfig: |
| LOW | pretrain_hybrid.py | 301 | def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None): |
| LOW | gpt_builders.py | 103 | def _get_transformer_layer_spec(use_te, config): |
| LOW | tasks/finetune_utils.py | 53 | def _cross_entropy_forward_step(batch, model): |
| LOW | tasks/finetune_utils.py | 95 | def _build_infinite_size_dataloader(dataloader): |
| LOW | tasks/finetune_utils.py | 106 | def _build_train_valid_dataloaders(train_dataset, valid_dataset, |
| LOW | tasks/data_utils.py | 35 | def build_tokens_types_paddings_from_text(text_a, text_b, |
| LOW | tasks/data_utils.py | 49 | def build_tokens_types_paddings_from_ids(text_a_ids, text_b_ids, max_seq_length, |
| LOW | tasks/eval_utils.py | 65 | def calculate_correct_answers(name, model, dataloader, |
| LOW | tasks/eval_utils.py | 111 | def correct_answers_forward_step(batch, model): |
| LOW | tools/run_vlm_text_generation.py | 161 | def generate_and_write_samples(model): |
| LOW | tools/prepare_cache.py | 54 | def _normalize_prepare_cache_args(args: Any) -> None: |
| LOW | tools/prepare_cache.py | 65 | def _validate_prepare_cache_args(args: Any) -> None: |
| LOW | tools/prepare_cache.py | 82 | def _disable_cache_load_only_flags(args: Any) -> Dict[str, bool]: |
| LOW | tools/prepare_cache.py | 102 | def _print_effective_configuration( |
| LOW | tools/prepare_cache.py | 120 | def core_gpt_dataset_config_from_args(args: Any) -> GPTDatasetConfig: |
| LOW | tools/run_inference_performance_test.py | 48 | def add_inference_benchmarking_args(parser): |
| LOW | tools/build_sequences_per_dataset.py | 60 | def build_sequences_per_dataset(args): |
| LOW | tools/run_dynamic_text_generation_server.py | 21 | def add_text_generation_server_args(parser: argparse.ArgumentParser): |
| LOW | tools/run_dynamic_text_generation_server.py | 37 | async def run_text_generation_server( |
| LOW | tools/checkpoint/saver_hf_llava.py | 176 | def receive_vision_projection(self): |
| LOW | tools/checkpoint/saver_hf_llava.py | 352 | def save_state_dict_to_hf_checkpoint(self): |
| LOW | tools/checkpoint/schema_core.py | 10 | def get_core_transformer_block_key(model_key): |
| LOW | tools/checkpoint/schema_hf.py | 173 | def get_language_model_schema( |
| LOW | tools/checkpoint/loader_base.py | 30 | def _maybe_parse_additional_megatron_args(self, margs, checkpoint_args): |
| LOW | tools/checkpoint/loader_base.py | 90 | def _maybe_ensure_additional_required_arguments(self): |
| LOW | tools/checkpoint/loader_base.py | 107 | def ensure_required_arguments(self): |
| LOW | tools/checkpoint/loader_base.py | 193 | def get_models_for_pipeline_stage(count, dtype): |
| LOW | tools/checkpoint/loader_base.py | 425 | def build_checkpoint_metadata(self, true_vocab_size): |
| LOW | tools/checkpoint/gpt_hybrid_conversion.py | 404 | def initialize_ssm_layer_params( |
| LOW | tools/checkpoint/gpt_hybrid_conversion.py | 152 | def parse_hybrid_layer_pattern(pattern): |
| LOW | tools/checkpoint/gpt_hybrid_conversion.py | 183 | def build_layer_index_mapping(layer_types, direction): |
| LOW | tools/checkpoint/gpt_hybrid_conversion.py | 316 | def validate_pattern_gpt_compatible(layer_types, direction): |
| LOW | tools/checkpoint/gpt_hybrid_conversion.py | 361 | def validate_source_args_gpt_compatible(source_args, direction): |
| LOW | tools/checkpoint/dist_checkpoint_io.py | 69 | def resolve_checkpoint_subdir(load_dir): |
| LOW | tools/checkpoint/dist_checkpoint_io.py | 113 | def ensure_single_rank_process_group(): |
| LOW | tools/checkpoint/dist_checkpoint_io.py | 159 | def load_dist_checkpoint_full(load_dir): |
| LOW | tools/checkpoint/dist_checkpoint_io.py | 221 | def save_dist_checkpoint_full( |
| LOW | tools/checkpoint/dist_checkpoint_io.py | 260 | def write_latest_iteration_marker(save_dir, iteration): |
| LOW | tools/checkpoint/loader_mixtral_hf.py | 28 | def load_args_from_checkpoint(args): |
| LOW | tools/checkpoint/loader_mixtral_hf.py | 63 | def verify_transformers_version(): |
| LOW | tools/checkpoint/saver_llava.py | 134 | def _maybe_parse_additional_megatron_args(self, margs): |
| LOW | tools/checkpoint/saver_llava.py | 320 | def receive_vision_projection(self): |
| LOW | tools/checkpoint/checkpoint_inspector.py | 288 | def save_checkpoint_with_pickle_protocol(state_dict, output_dir, pickle_protocol=4): |
| LOW | tools/checkpoint/checkpoint_inspector.py | 292 | def transform_object_override(write_item, obj): |
| LOW | tools/checkpoint/checkpoint_inspector.py | 831 | def convert_torch_dist_to_fsdp_dtensor( |
| LOW | tools/checkpoint/saver_base.py | 31 | def _maybe_parse_additional_megatron_args(self, margs): |
| LOW | tools/checkpoint/saver_base.py | 38 | def insert_megatron_path_and_check_te(self): |
| LOW | tools/checkpoint/saver_base.py | 277 | def receive_checkpoint_metadata(self): |
| LOW | tools/checkpoint/saver_base.py | 341 | def save_local_models_to_checkpoint(self): |
| LOW | tools/checkpoint/loader_llava.py | 54 | def _maybe_parse_additional_megatron_args(self, margs, checkpoint_args): |
| LOW | tools/checkpoint/loader_llava.py | 88 | def _maybe_ensure_additional_required_arguments(self): |
| LOW | tools/checkpoint/loader_llava.py | 104 | def build_checkpoint_metadata(self, true_vocab_size): |
| 3408 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 400 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 402 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 132 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 134 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 225 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 227 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 486 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 488 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 530 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 532 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 627 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 629 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 698 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 700 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 722 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/gpt_hybrid_conversion.py | 724 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/checkpoint/saver_llava.py | 225 | #----------- |
| MEDIUM | tools/checkpoint/saver_llava.py | 186 | #----------- |
| MEDIUM | tools/checkpoint/saver_base.py | 377 | #----------- |
| MEDIUM | tools/checkpoint/saver_base.py | 420 | # -------------- |
| MEDIUM | tools/checkpoint/saver_base.py | 432 | # ------------------ |
| MEDIUM | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 360 | # ================================================================ |
| MEDIUM | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 362 | # ================================================================ |
| MEDIUM | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 386 | # ================================================================ |
| MEDIUM | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 388 | # ================================================================ |
| MEDIUM | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 410 | # ================================================================ |
| MEDIUM | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 412 | # ================================================================ |
| MEDIUM | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 433 | # ================================================================ |
| MEDIUM | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 435 | # ================================================================ |
| MEDIUM | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 474 | # ================================================================ |
| MEDIUM | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 476 | # ================================================================ |
| MEDIUM | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 496 | # ================================================================ |
| MEDIUM | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 498 | # ================================================================ |
| MEDIUM | …ts/performance_tests/shell_test_utils/run_perf_test.sh | 20 | # ── Parse KEY=VALUE positional args ─────────────────────────────────────────── |
| MEDIUM | …ts/performance_tests/shell_test_utils/run_perf_test.sh | 50 | # ── Read model_config.yaml ──────────────────────────────────────────────────── |
| MEDIUM | …ts/performance_tests/shell_test_utils/run_perf_test.sh | 83 | # ── Build model args (substituting ${CHECKPOINT_LOAD_PATH}) ─────────────────── |
| MEDIUM | …ts/performance_tests/shell_test_utils/run_perf_test.sh | 99 | # ── Make image-bundled extras (mamba-ssm) visible to the cog venv ───────────── |
| MEDIUM | …ts/performance_tests/shell_test_utils/run_perf_test.sh | 128 | # ── Launch the inference server in the background ───────────────────────────── |
| MEDIUM | …ts/performance_tests/shell_test_utils/run_perf_test.sh | 185 | # ── Wait for server readiness ───────────────────────────────────────────────── |
| MEDIUM | …ts/performance_tests/shell_test_utils/run_perf_test.sh | 205 | # ── Benchmark sweep ─────────────────────────────────────────────────────────── |
| MEDIUM | …ts/performance_tests/shell_test_utils/run_perf_test.sh | 224 | # ── Baseline comparison or recording ────────────────────────────────────────── |
| MEDIUM | tests/unit_tests/test_optimizer_param_scheduler.py | 302 | # ── get_canonical_lr_for_logging tests ────────────────────────────────────── |
| MEDIUM | tests/unit_tests/test_emerging_optimizers.py | 63 | # =========================================================================== |
| MEDIUM | tests/unit_tests/test_emerging_optimizers.py | 65 | # =========================================================================== |
| MEDIUM | tests/unit_tests/test_emerging_optimizers.py | 841 | # =========================================================================== |
| MEDIUM | tests/unit_tests/test_emerging_optimizers.py | 843 | # =========================================================================== |
| MEDIUM | tests/unit_tests/test_emerging_optimizers.py | 1288 | # =========================================================================== |
| MEDIUM | tests/unit_tests/test_emerging_optimizers.py | 1290 | # =========================================================================== |
| MEDIUM | tests/unit_tests/test_emerging_optimizers.py | 1575 | # =========================================================================== |
| MEDIUM | tests/unit_tests/test_emerging_optimizers.py | 1577 | # =========================================================================== |
| MEDIUM | tests/unit_tests/test_argument_utils.py | 654 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/unit_tests/test_argument_utils.py | 656 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/unit_tests/tokenizers/test_tokenizer.py | 640 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/unit_tests/tokenizers/test_tokenizer.py | 642 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/unit_tests/tokenizers/test_tokenizer.py | 472 | # ------------------------------------------------------------------------ |
| MEDIUM | tests/unit_tests/tokenizers/test_tokenizer.py | 474 | # ------------------------------------------------------------------------ |
| MEDIUM | tests/unit_tests/tokenizers/test_tokenizer.py | 512 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/unit_tests/tokenizers/test_tokenizer.py | 514 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/unit_tests/fusions/test_bias_dropout_fusion.py | 8 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/unit_tests/fusions/test_bias_dropout_fusion.py | 10 | # --------------------------------------------------------------------------- |
| 636 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | pretrain_gpt.py | 0 | build the train test and validation datasets. args: train_val_test_num_samples : a list containing the number of samples |
| HIGH | pretrain_hybrid.py | 0 | build the train test and validation datasets. args: train_val_test_num_samples : a list containing the number of samples |
| HIGH | examples/post_training/modelopt/finetune.py | 0 | build the train test and validation datasets. args: train_val_test_num_samples : a list containing the number of samples |
| HIGH | examples/t5/pretrain_t5.py | 0 | build the train test and validation datasets. args: train_val_test_num_samples : a list containing the number of samples |
| HIGH | megatron/elastification/pretrain_hybrid_flex.py | 0 | build the train test and validation datasets. args: train_val_test_num_samples : a list containing the number of samples |
| HIGH | tools/checkpoint/saver_hf_llava.py | 0 | required top-level function that creates the saver and calls its .save(). |
| HIGH | tools/checkpoint/saver_core.py | 0 | required top-level function that creates the saver and calls its .save(). |
| HIGH | tools/checkpoint/saver_llava.py | 0 | required top-level function that creates the saver and calls its .save(). |
| HIGH | tools/checkpoint/loader_base.py | 0 | orchestrates loading a megatron checkpoint and sending model parameters over a given multiprocessing queue. args: args: |
| HIGH | tools/checkpoint/loader_core.py | 0 | orchestrates loading a megatron checkpoint and sending model parameters over a given multiprocessing queue. args: args: |
| HIGH | tools/checkpoint/loader_llava.py | 0 | orchestrates loading a megatron checkpoint and sending model parameters over a given multiprocessing queue. args: args: |
| HIGH | tools/checkpoint/loader_base.py | 0 | parse megatron arguments by forcibly overwriting sys.argv. populates self.margs and self.checkpoint_args. |
| HIGH | tools/checkpoint/saver_base.py | 0 | parse megatron arguments by forcibly overwriting sys.argv. populates self.margs and self.checkpoint_args. |
| HIGH | tools/checkpoint/loader_llava.py | 0 | parse megatron arguments by forcibly overwriting sys.argv. populates self.margs and self.checkpoint_args. |
| HIGH | tools/checkpoint/loader_base.py | 0 | construct a sys.argv list for megatron's argument parser. this centralizes the hack of overwriting sys.argv. |
| HIGH | tools/checkpoint/loader_core.py | 0 | construct a sys.argv list for megatron's argument parser. this centralizes the hack of overwriting sys.argv. |
| HIGH | tools/checkpoint/saver_base.py | 0 | construct a sys.argv list for megatron's argument parser. this centralizes the hack of overwriting sys.argv. |
| HIGH | tools/checkpoint/loader_llava.py | 0 | construct a sys.argv list for megatron's argument parser. this centralizes the hack of overwriting sys.argv. |
| HIGH | tests/unit_tests/test_fp8_param.py | 0 | dp_overlap: (overlap_param_gather, overlap_grad_reduce) |
| HIGH | tests/unit_tests/test_fp8_param.py | 0 | dp_overlap: (overlap_param_gather, overlap_grad_reduce) |
| HIGH | tests/unit_tests/test_fp8_param.py | 0 | dp_overlap: (overlap_param_gather, overlap_grad_reduce) |
| HIGH | tests/unit_tests/test_hyper_comm_grid.py | 0 | set up distributed environment for the entire test class. |
| HIGH | …it_tests/pipeline_parallel/test_bridge_communicator.py | 0 | set up distributed environment for the entire test class. |
| HIGH | …sts/pipeline_parallel/test_multimodule_communicator.py | 0 | set up distributed environment for the entire test class. |
| HIGH | tests/unit_tests/test_argument_utils.py | 0 | config with argparse_meta metadata for testing overrides. |
| HIGH | tests/unit_tests/test_argument_utils.py | 0 | config with argparse_meta metadata for testing overrides. |
| HIGH | tests/unit_tests/test_argument_utils.py | 0 | config with argparse_meta metadata for testing overrides. |
| HIGH | …sts/unit_tests/transformer/test_submodule_callables.py | 0 | runs the model in reference mode and captures outputs and gradients. args: model: the transformer model to run. input_te |
| HIGH | …sts/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py | 0 | runs the model in reference mode and captures outputs and gradients. args: model: the transformer model to run. input_te |
| HIGH | …sts/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py | 0 | runs the model in reference mode and captures outputs and gradients. args: model: the transformer model to run. input_te |
| HIGH | …sts/unit_tests/transformer/test_submodule_callables.py | 0 | runs the model with all-to-all overlap optimization and captures outputs and gradients. args: model: the transformer mod |
| HIGH | …sts/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py | 0 | runs the model with all-to-all overlap optimization and captures outputs and gradients. args: model: the transformer mod |
| HIGH | …sts/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py | 0 | runs the model with all-to-all overlap optimization and captures outputs and gradients. args: model: the transformer mod |
| HIGH | …it_tests/pipeline_parallel/test_bridge_communicator.py | 0 | destroy all tracked grids and bridge communicator pgs. |
| HIGH | …_tests/pipeline_parallel/test_multimodule_schedules.py | 0 | destroy all tracked grids and bridge communicator pgs. |
| HIGH | tests/unit_tests/models/test_mimo_1f1b_schedule.py | 0 | destroy all tracked grids and bridge communicator pgs. |
| HIGH | tests/unit_tests/pipeline_parallel/test_helpers.py | 0 | helper method to get the model chunk id given the iteration number. |
| HIGH | tests/unit_tests/pipeline_parallel/test_helpers.py | 0 | helper method to get the model chunk id given the iteration number. |
| HIGH | megatron/core/pipeline_parallel/schedules.py | 0 | helper method to get the model chunk id given the iteration number. |
| HIGH | tests/unit_tests/pipeline_parallel/test_helpers.py | 0 | helper method to get the microbatch_id within model chunk given the iteration number. |
| HIGH | tests/unit_tests/pipeline_parallel/test_helpers.py | 0 | helper method to get the microbatch_id within model chunk given the iteration number. |
| HIGH | megatron/core/pipeline_parallel/schedules.py | 0 | helper method to get the microbatch_id within model chunk given the iteration number. |
| HIGH | tests/unit_tests/pipeline_parallel/test_helpers.py | 0 | check if an iteration is the first for a model chunk. |
| HIGH | tests/unit_tests/pipeline_parallel/test_helpers.py | 0 | check if an iteration is the first for a model chunk. |
| HIGH | megatron/core/pipeline_parallel/schedules.py | 0 | check if an iteration is the first for a model chunk. |
| HIGH | tests/unit_tests/pipeline_parallel/test_helpers.py | 0 | check if an iteration is the last for a model chunk. |
| HIGH | tests/unit_tests/pipeline_parallel/test_helpers.py | 0 | check if an iteration is the last for a model chunk. |
| HIGH | megatron/core/pipeline_parallel/schedules.py | 0 | check if an iteration is the last for a model chunk. |
| HIGH | tests/unit_tests/models/test_mimo_1f1b_schedule.py | 0 | return a transformerconfig for the vision projection mlp. |
| HIGH | examples/mimo/configs/llava_vlm.py | 0 | return a transformerconfig for the vision projection mlp. |
| HIGH | examples/mimo/configs/llava_avlm.py | 0 | return a transformerconfig for the vision projection mlp. |
| HIGH | …unit_tests/dist_checkpointing/models/test_gpt_model.py | 0 | test model loading with different vocab size (caused by tp padding). |
| HIGH | …nit_tests/dist_checkpointing/models/test_bert_model.py | 0 | test model loading with different vocab size (caused by tp padding). |
| HIGH | tests/unit_tests/dist_checkpointing/models/common.py | 0 | test model loading with different vocab size (caused by tp padding). |
| HIGH | …ultimodal/evaluation/evaluate_video_phys_game_bench.py | 0 | merge input files to a format compatible with the evaluator. |
| HIGH | examples/multimodal/evaluation/evaluate_realworldqa.py | 0 | merge input files to a format compatible with the evaluator. |
| HIGH | examples/multimodal/evaluation/evaluate_ai2d.py | 0 | merge input files to a format compatible with the evaluator. |
| HIGH | …es/multimodal/evaluation/evaluate_video_motionbench.py | 0 | merge input files to a format compatible with the evaluator. |
| HIGH | examples/multimodal/evaluation/evaluate_spdocvqa.py | 0 | merge input files to a format compatible with the evaluator. |
| HIGH | examples/multimodal/evaluation/evaluate_textvqa.py | 0 | merge input files to a format compatible with the evaluator. |
| 108 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | train_rl.py | 4 | |
| LOW | mamba_builders.py | 14 | |
| LOW | mamba_builders.py | 15 | |
| LOW | gpt_builders.py | 19 | |
| LOW | tools/merge_datasets.py | 3 | |
| LOW | tools/preprocess_data_nmt.py | 6 | |
| LOW | tools/run_inference_performance_test.py | 11 | |
| LOW | tools/run_inference_performance_test.py | 12 | |
| LOW | tools/run_inference_performance_test.py | 30 | |
| LOW | tools/run_inference_performance_test.py | 36 | |
| LOW | tools/run_inference_performance_test.py | 40 | |
| LOW | tools/run_inference_performance_test.py | 41 | |
| LOW | tools/linter.py | 11 | |
| LOW | tools/preprocess_mmdata.py | 12 | |
| LOW | tools/checkpoint/saver_core.py | 3 | |
| LOW | tools/checkpoint/saver_core.py | 5 | |
| LOW | tools/checkpoint/saver_core.py | 7 | |
| LOW | tools/checkpoint/saver_core.py | 8 | |
| LOW | tools/checkpoint/remap_gpt_dsa_to_mamba.py | 38 | |
| LOW | tools/checkpoint/remap_gpt_dsa_to_mamba.py | 39 | |
| LOW | tools/checkpoint/remap_gpt_dsa_to_mamba.py | 108 | |
| LOW | tools/checkpoint/gpt_hybrid_conversion.py | 122 | |
| LOW | tools/checkpoint/dist_checkpoint_io.py | 20 | |
| LOW | tools/checkpoint/dist_checkpoint_io.py | 31 | |
| LOW | tools/checkpoint/utils.py | 3 | |
| LOW | tools/checkpoint/hybrid_conversion.py | 10 | |
| LOW | tools/checkpoint/loader_mixtral_hf.py | 3 | |
| LOW | tools/checkpoint/loader_mixtral_hf.py | 167 | |
| LOW | tools/checkpoint/saver_llava.py | 2 | |
| LOW | tools/checkpoint/saver_llava.py | 4 | |
| LOW | tools/checkpoint/saver_llava.py | 173 | |
| LOW | tools/checkpoint/saver_llava.py | 173 | |
| LOW | tools/checkpoint/checkpoint_inspector.py | 37 | |
| LOW | tools/checkpoint/loader_core.py | 3 | |
| LOW | tools/checkpoint/loader_core.py | 4 | |
| LOW | tools/checkpoint/loader_core.py | 5 | |
| LOW | tools/checkpoint/loader_core.py | 6 | |
| LOW | tools/checkpoint/loader_core.py | 7 | |
| LOW | tools/checkpoint/saver_base.py | 2 | |
| LOW | tools/checkpoint/loader_llava.py | 3 | |
| LOW | tools/checkpoint/loader_llava.py | 7 | |
| LOW | tools/bert_embedding/embed.py | 3 | |
| LOW | tools/bert_embedding/embed.py | 8 | |
| LOW | tools/bert_embedding/embed.py | 15 | |
| LOW | tools/bert_embedding/embed.py | 16 | |
| LOW | tools/bert_embedding/embed.py | 19 | |
| LOW | tools/bert_embedding/embed.py | 21 | |
| LOW | tools/bert_embedding/embed.py | 21 | |
| LOW | tools/bert_embedding/__init__.py | 3 | |
| LOW | tools/bert_embedding/__init__.py | 3 | |
| LOW | …formance_tests/shell_test_utils/compare_to_baseline.py | 23 | |
| LOW | tests/unit_tests/test_fp8_param.py | 3 | |
| LOW | tests/unit_tests/test_fp8_param.py | 38 | |
| LOW | tests/unit_tests/test_inference.py | 3 | |
| LOW | tests/unit_tests/test_basic.py | 2 | |
| LOW | tests/unit_tests/conftest.py | 4 | |
| LOW | tests/unit_tests/test_emerging_optimizers.py | 11 | |
| LOW | tests/unit_tests/test_imports.py | 14 | |
| LOW | tests/unit_tests/test_fp4_param.py | 5 | |
| LOW | tests/unit_tests/test_fp4_param.py | 11 | |
| 735 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | gpt_builders.py | 24 | |
| LOW | tasks/finetune_utils.py | 147 | |
| LOW | tasks/eval_utils.py | 65 | |
| LOW | .gitlab/scripts/check_imports.py | 66 | |
| LOW | .gitlab/scripts/check_imports.py | 123 | |
| LOW | tools/preprocess_data.py | 310 | |
| LOW | tools/run_vlm_text_generation.py | 88 | |
| LOW | tools/run_text_generation_server.py | 115 | |
| LOW | tools/checkpoint/saver_hf_llava.py | 69 | |
| LOW | tools/checkpoint/loader_base.py | 254 | |
| LOW | tools/checkpoint/gpt_hybrid_conversion.py | 631 | |
| LOW | tools/checkpoint/gpt_hybrid_conversion.py | 726 | |
| LOW | tools/checkpoint/dist_checkpoint_io.py | 131 | |
| LOW | tools/checkpoint/hybrid_conversion.py | 59 | |
| LOW | tools/checkpoint/hybrid_conversion.py | 114 | |
| LOW | tools/checkpoint/hybrid_conversion.py | 170 | |
| LOW | tools/checkpoint/hybrid_conversion.py | 222 | |
| LOW | tools/checkpoint/saver_llava.py | 183 | |
| LOW | tools/checkpoint/checkpoint_inspector.py | 74 | |
| LOW | tools/checkpoint/checkpoint_inspector.py | 335 | |
| LOW | tools/checkpoint/checkpoint_inspector.py | 951 | |
| LOW | tools/checkpoint/saver_base.py | 365 | |
| LOW | tools/checkpoint/loader_llava.py | 161 | |
| LOW | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 63 | |
| LOW | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 109 | |
| LOW | …formance_tests/shell_test_utils/compare_to_baseline.py | 36 | |
| LOW | tests/unit_tests/test_fp8_param.py | 214 | |
| LOW | tests/unit_tests/test_optimizer.py | 574 | |
| LOW | tests/unit_tests/test_utils.py | 311 | |
| LOW | tests/unit_tests/test_imports.py | 63 | |
| LOW | tests/unit_tests/test_layer_wise_optimizer.py | 254 | |
| LOW | tests/unit_tests/test_layer_wise_optimizer.py | 724 | |
| LOW | tests/unit_tests/ssm/ops/test_ssd_bmm.py | 68 | |
| LOW | tests/unit_tests/tokenizers/test_tokenizer.py | 290 | |
| LOW | …t_tests/tools/checkpoint/test_gpt_hybrid_conversion.py | 461 | |
| LOW | tests/unit_tests/extension/test_kitchen_sdpa.py | 60 | |
| LOW | …egatron_fsdp/test_mcore_fully_sharded_data_parallel.py | 1067 | |
| LOW | …egatron_fsdp/test_mcore_fully_sharded_data_parallel.py | 1139 | |
| LOW | …egatron_fsdp/test_mcore_fully_sharded_data_parallel.py | 1142 | |
| LOW | …ts/distributed/megatron_fsdp/test_mfsdp_fully_shard.py | 176 | |
| LOW | …ts/distributed/megatron_fsdp/test_mfsdp_fully_shard.py | 888 | |
| LOW | …ts/distributed/megatron_fsdp/test_mfsdp_fully_shard.py | 1058 | |
| LOW | tests/unit_tests/transformer/test_cuda_graphs.py | 450 | |
| LOW | tests/unit_tests/transformer/test_cuda_graphs.py | 642 | |
| LOW | tests/unit_tests/transformer/test_transformer_block.py | 405 | |
| LOW | …mental_attention_variant/test_attention_variant_dsa.py | 411 | |
| LOW | …mental_attention_variant/test_attention_variant_dsa.py | 997 | |
| LOW | …mental_attention_variant/test_attention_variant_dsa.py | 1031 | |
| LOW | …mental_attention_variant/test_attention_variant_dsa.py | 1117 | |
| LOW | …mental_attention_variant/test_attention_variant_dsa.py | 1245 | |
| LOW | …mental_attention_variant/test_attention_variant_dsa.py | 1282 | |
| LOW | …mental_attention_variant/test_attention_variant_dsa.py | 1496 | |
| LOW | tests/unit_tests/training/config/test_container_base.py | 248 | |
| LOW | …it_tests/pipeline_parallel/test_bridge_communicator.py | 311 | |
| LOW | …sts/pipeline_parallel/test_multimodule_communicator.py | 170 | |
| LOW | …sts/pipeline_parallel/test_multimodule_communicator.py | 609 | |
| LOW | …_tests/pipeline_parallel/test_multimodule_schedules.py | 258 | |
| LOW | tests/unit_tests/pipeline_parallel/test_schedules.py | 89 | |
| LOW | …ne_parallel/test_fine_grained_activation_offloading.py | 335 | |
| LOW | …ts/unit_tests/models/test_dsa_gpt_mamba_equivalence.py | 221 | |
| 484 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| CRITICAL | tools/checkpoint/loader_mixtral_hf.py | 69 | model.embedding.word_embeddings.weight.data.copy_( |
| CRITICAL | tools/checkpoint/loader_mixtral_hf.py | 74 | model.decoder.final_layernorm.weight.data.copy_(hf_model.model.norm.weight) |
| CRITICAL | tools/checkpoint/loader_mixtral_hf.py | 103 | layer.mlp.router.weight.data.copy_(hf_layer.block_sparse_moe.gate.weight) |
| CRITICAL | tools/checkpoint/loader_mixtral_hf.py | 127 | layer.self_attention.linear_qkv.layer_norm_weight.data.copy_(hf_layer.input_layernorm.weight) |
| CRITICAL | tools/checkpoint/saver_llava.py | 199 | model.vision_model.conv1.weight.data.copy_(vit_embeddings_msg["conv1 weight"]) |
| CRITICAL | tools/checkpoint/saver_llava.py | 201 | model.vision_model.conv1.bias.data.copy_(vit_embeddings_msg["conv1 bias"]) |
| CRITICAL | tools/checkpoint/saver_llava.py | 202 | model.vision_model.position_embeddings.weight.data.copy_(vit_embeddings_msg["position embeddings"]) |
| CRITICAL | tools/checkpoint/saver_llava.py | 208 | model.vision_model.embedder.weight.data.copy_(embedder_weight[tp_rank]) |
| CRITICAL | tools/checkpoint/saver_llava.py | 210 | model.vision_model.embedder.bias.data.copy_(embedder_bias[tp_rank]) |
| CRITICAL | tools/checkpoint/saver_llava.py | 214 | model.vision_model.ln_pre.weight.data.copy_(vit_embeddings_msg["ln pre weight"]) |
| CRITICAL | tools/checkpoint/saver_llava.py | 215 | model.vision_model.ln_pre.bias.data.copy_(vit_embeddings_msg["ln pre bias"]) |
| CRITICAL | tools/checkpoint/saver_llava.py | 218 | model.vision_model.ln_post.weight.data.copy_(vit_embeddings_msg["ln post weight"]) |
| CRITICAL | tools/checkpoint/saver_llava.py | 219 | model.vision_model.ln_post.bias.data.copy_(vit_embeddings_msg["ln post bias"]) |
| CRITICAL | tools/checkpoint/saver_llava.py | 343 | model.vision_projection.encoder.linear_fc1.weight.data.copy_( |
| CRITICAL | tools/checkpoint/saver_llava.py | 345 | model.vision_projection.encoder.linear_fc2.weight.data.copy_( |
| CRITICAL | tools/checkpoint/saver_llava.py | 348 | model.vision_projection.encoder.linear_fc1.layer_norm_weight.data.copy_( |
| CRITICAL | tools/checkpoint/saver_llava.py | 351 | model.vision_projection.encoder.linear_fc1.layer_norm_bias.data.copy_( |
| CRITICAL | tools/checkpoint/saver_llava.py | 354 | model.vision_projection.encoder.linear_fc1.bias.data.copy_( |
| CRITICAL | tools/checkpoint/saver_llava.py | 356 | model.vision_projection.encoder.linear_fc2.bias.data.copy_(vision_projection_l1_bias) |
| CRITICAL | tests/unit_tests/transformer/moe/test_routers.py | 115 | assert self.sequential_mlp.router.weight.grad.abs().sum() == 0 |
| CRITICAL | tests/unit_tests/transformer/moe/test_routers.py | 121 | assert self.sequential_mlp.router.weight.grad.abs().sum() > 0 |
| CRITICAL | tests/unit_tests/transformer/moe/test_routers.py | 126 | self.sequential_mlp.router.weight.grad.fill_(0) |
| CRITICAL | tests/unit_tests/transformer/moe/test_routers.py | 129 | assert self.sequential_mlp.router.weight.grad.abs().sum() > 0 |
| CRITICAL | tests/unit_tests/models/test_gpt_model.py | 87 | assert self.gpt_model.embedding.word_embeddings.weight.std().cpu().item() == approx( |
| CRITICAL | tests/unit_tests/models/test_gpt_model.py | 90 | assert self.gpt_model.embedding.word_embeddings.weight.mean().cpu().item() == approx( |
| CRITICAL | megatron/core/parallel_state.py | 2158 | and torch.distributed.distributed_c10d._world.pg_map.get(_DATA_PARALLEL_GROUP_GLOO, None) |
| CRITICAL | megatron/core/parallel_state.py | 2167 | and torch.distributed.distributed_c10d._world.pg_map.get( |
| CRITICAL | megatron/core/parallel_state.py | 2206 | and torch.distributed.distributed_c10d._world.pg_map.get( |
| CRITICAL | megatron/core/parallel_state.py | 2220 | and torch.distributed.distributed_c10d._world.pg_map.get( |
| CRITICAL | megatron/core/transformer/attention.py | 1731 | self.linear_qkv.weight.main_param.data.copy_( |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | gpt_builders.py | 43 | # Define the decoder block spec |
| MEDIUM | gpt_builders.py | 55 | # Define the decoder layer spec |
| MEDIUM | gpt_builders.py | 68 | # Define the decoder block spec |
| MEDIUM | .gitlab/stages/05.publish.yml | 149 | # Define the full refspec for the branch |
| MEDIUM | tools/run_inference_performance_test.py | 112 | # Create a list of valid token IDs |
| MEDIUM | tools/common_pile_dataset/setup_common_pile_dataset.sh | 87 | # Create a virtual environment to avoid system package conflicts |
| MEDIUM | tools/common_pile_dataset/setup_common_pile_dataset.sh | 122 | # Create the output directory |
| MEDIUM | tests/unit_tests/test_optimizer.py | 790 | # Create a new state_dict with all params set to 3. |
| MEDIUM | tests/unit_tests/test_optimizer.py | 879 | # Create a simple model for testing |
| MEDIUM | tests/unit_tests/test_optimizer.py | 961 | # Create a simple model |
| MEDIUM | tests/unit_tests/test_utils.py | 454 | # Create a straggler_detector with enabled set to false. |
| MEDIUM | tests/unit_tests/test_hyper_comm_grid.py | 431 | # Create a process group |
| MEDIUM | tests/unit_tests/test_hyper_comm_grid.py | 434 | # Create a tensor for communication test |
| MEDIUM | tests/unit_tests/test_hyper_comm_grid.py | 535 | # Create a unique tensor based on rank |
| MEDIUM | tests/unit_tests/test_emerging_optimizers.py | 70 | # Create a simple linear model for testing |
| MEDIUM | tests/unit_tests/test_emerging_optimizers.py | 640 | # Create a model with QKV-like parameter |
| MEDIUM | tests/unit_tests/test_training.py | 143 | # Create a mock state_dict with gradients (use deterministic values for reproducibility). |
| MEDIUM | tests/unit_tests/ssm/ops/test_ssm_kernel.py | 83 | # Create the Mixer instance directly |
| MEDIUM | tests/unit_tests/fusions/test_torch_softmax.py | 197 | # Create a padding mask |
| MEDIUM | …/unit_tests/post_training/test_modelopt_module_spec.py | 238 | # Define the expected signature |
| MEDIUM | …/unit_tests/post_training/test_modelopt_module_spec.py | 277 | # Define the expected signature |
| MEDIUM | …tests/distributed/test_torch_fully_sharded_parallel.py | 56 | # Create a dummy model and configs. |
| MEDIUM | …tests/distributed/test_torch_fully_sharded_parallel.py | 63 | # Create the sharded model. |
| MEDIUM | …tests/distributed/test_torch_fully_sharded_parallel.py | 84 | # Create a dummy model and configs. |
| MEDIUM | …tests/distributed/test_torch_fully_sharded_parallel.py | 90 | # Create a custom process group (using the default world for testing) |
| MEDIUM | …tests/distributed/test_torch_fully_sharded_parallel.py | 93 | # Create the sharded model with explicit process group |
| MEDIUM | …distributed/megatron_fsdp/test_mfsdp_uneven_dtensor.py | 675 | # Create a manual uneven sharding along dim 1 with possible zero-length local on some ranks |
| MEDIUM | …/unit_tests/transformer/test_multi_token_prediction.py | 774 | # Create a dummy loss tensor |
| MEDIUM | …tests/transformer/test_transformer_block_custom_pgs.py | 259 | # Create a transformer block with default process groups |
| MEDIUM | …tests/transformer/test_transformer_block_custom_pgs.py | 285 | # Create a transformer block with custom process groups |
| MEDIUM | …tests/transformer/test_transformer_block_custom_pgs.py | 737 | # Create a single transformer block |
| MEDIUM | tests/unit_tests/transformer/test_cuda_graphs.py | 718 | # Create the CUDA graphs - this is where the is_last_layer logic is tested |
| MEDIUM | tests/unit_tests/transformer/test_cuda_graphs.py | 965 | # Create a mapping of sample_keys to indices |
| MEDIUM | tests/unit_tests/transformer/test_transformer_block.py | 545 | # Create a new build_layers method that uses interleaved attention |
| MEDIUM | …ts/unit_tests/transformer/moe/test_token_dispatcher.py | 172 | # Create the answer. |
| MEDIUM | tests/unit_tests/transformer/moe/test_shared_experts.py | 96 | # Create a dummy input tensor. |
| MEDIUM | tests/unit_tests/transformer/moe/test_aux_loss.py | 211 | # Create a new config with updated parameters |
| MEDIUM | tests/unit_tests/transformer/moe/test_aux_loss.py | 214 | # Create the router with the updated config |
| MEDIUM | …s/unit_tests/training/config/test_instantiate_utils.py | 334 | # Create a mock that raises an error when used with functools.partial |
| MEDIUM | tests/unit_tests/training/config/test_yaml_utils.py | 215 | # Create a mock torch dtype |
| MEDIUM | tests/unit_tests/training/config/test_yaml_utils.py | 262 | # Create a mock GenerationConfig |
| MEDIUM | tests/unit_tests/utils/test_experimental_log_once.py | 23 | # Define a fresh function with the decorator so it has its own closure state. |
| MEDIUM | tests/unit_tests/models/test_mimo_audio_submodules.py | 212 | # Create a time array |
| MEDIUM | tests/unit_tests/models/test_mimo_audio_submodules.py | 215 | # Create a simple sine wave at 440 Hz (A4) |
| MEDIUM | tests/unit_tests/models/test_gpt_model.py | 237 | # Define the expected signature |
| MEDIUM | …sts/unit_tests/models/test_mimo_embedding_alignment.py | 22 | # Create a minimal MimoModelConfig |
| MEDIUM | …sts/unit_tests/models/test_mimo_embedding_alignment.py | 73 | # Create a simple batch |
| MEDIUM | …sts/unit_tests/models/test_mimo_embedding_alignment.py | 249 | # Create a test case with 2 batches: |
| MEDIUM | …sts/unit_tests/models/test_mimo_embedding_alignment.py | 285 | # Create the unflattened embeddings that would come from a vision encoder |
| MEDIUM | tests/unit_tests/models/test_fastconformer_model.py | 45 | # Create a parameter with the target dtype so ``next(self.parameters()).dtype`` |
| MEDIUM | tests/unit_tests/models/test_mimo_submodules.py | 92 | # Create the main module spec |
| MEDIUM | tests/unit_tests/models/test_mimo_submodules.py | 351 | # Create a data batch without images |
| MEDIUM | tests/unit_tests/dist_checkpointing/test_fp8.py | 33 | # Create a quantizer for FP8 conversion |
| MEDIUM | …s/unit_tests/dist_checkpointing/test_fully_parallel.py | 601 | # Create a mock that will do what it's supposed to do, |
| MEDIUM | …/unit_tests/inference/contexts/test_dynamic_context.py | 317 | # Initialize all variables |
| MEDIUM | …/unit_tests/inference/contexts/test_dynamic_context.py | 896 | # Create an active_requests_mask where requests 0, 2, and 4 are finished (0), |
| MEDIUM | …/unit_tests/inference/contexts/test_dynamic_context.py | 986 | # Create an active_requests_mask where all requests are finished |
| MEDIUM | …ts/unit_tests/inference/engines/test_dynamic_engine.py | 1894 | # Create a deterministic mock forward pass that returns logits |
| MEDIUM | …ts/unit_tests/inference/engines/test_dynamic_engine.py | 1590 | # Create a request with length 513 |
| MEDIUM | tests/unit_tests/data/test_builder.py | 93 | # Define the class here to avoid pytest warnings |
| 71 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tasks/finetune_utils.py | 61 | except Exception: |
| LOW | tasks/eval_utils.py | 114 | except Exception: |
| MEDIUM | tasks/eval_utils.py | 111 | def correct_answers_forward_step(batch, model): |
| LOW | .gitlab/scripts/check_imports.py | 117 | except Exception: |
| LOW | tools/check_copyright.py | 22 | except Exception as e: |
| MEDIUM | tools/check_copyright.py | 23 | print(f"Error reading {file_path}: {e}") |
| MEDIUM | tools/text_generation_cli.py | 21 | print(f"Error {response.status_code}: {response.json()['message']}") |
| LOW | tools/checkpoint/saver_hf_llava.py | 400 | except Exception as e: |
| LOW | tools/checkpoint/loader_base.py | 78 | except Exception as e: |
| MEDIUM | tools/checkpoint/loader_base.py | 79 | print(f"Error validating Megatron arguments: {e}") |
| LOW | tools/checkpoint/saver_core.py | 70 | except Exception as e: |
| LOW | tools/checkpoint/gpt_hybrid_conversion.py | 384 | except Exception: |
| LOW | tools/checkpoint/dist_checkpoint_io.py | 215 | except Exception: |
| LOW | tools/checkpoint/hybrid_conversion.py | 322 | except Exception: |
| LOW | tools/checkpoint/loader_mixtral_hf.py | 340 | except Exception: |
| MEDIUM | tools/checkpoint/loader_mixtral_hf.py | 337 | def load_checkpoint(queue, args): |
| LOW | tools/checkpoint/saver_llava.py | 404 | except Exception as e: |
| LOW | tools/checkpoint/loader_core.py | 90 | except Exception as e: |
| LOW | tools/checkpoint/loader_llava.py | 360 | except Exception as e: |
| LOW | tools/bert_embedding/embed.py | 217 | except Exception: |
| LOW | tools/bert_embedding/embed.py | 223 | except Exception: |
| LOW | tests/unit_tests/conftest.py | 48 | except Exception: |
| LOW | tests/unit_tests/conftest.py | 96 | except Exception as e: |
| LOW | tests/unit_tests/test_hyper_comm_grid.py | 325 | except Exception as e: |
| LOW | tests/unit_tests/test_api_backwards_compat_setup.py | 106 | except Exception as e: |
| LOW | tests/unit_tests/test_api_backwards_compat_setup.py | 134 | except Exception as e: |
| LOW | tests/unit_tests/test_imports.py | 58 | except Exception: |
| LOW | tests/unit_tests/test_imports.py | 77 | except Exception: |
| LOW | tests/unit_tests/test_utilities.py | 107 | except Exception: |
| LOW | tests/unit_tests/tokenizers/test_tokenizer.py | 649 | except Exception: |
| LOW | tests/unit_tests/tokenizers/test_tokenizer.py | 18 | except Exception: |
| LOW | tests/unit_tests/tokenizers/test_tokenizer.py | 151 | except Exception: |
| LOW | tests/unit_tests/tokenizers/test_tokenizer.py | 177 | except Exception: |
| LOW | tests/unit_tests/tokenizers/test_tokenizer.py | 319 | except Exception: |
| LOW | …egatron_fsdp/test_mcore_fully_sharded_data_parallel.py | 1156 | except Exception: |
| LOW | …ts/distributed/megatron_fsdp/test_mfsdp_fully_shard.py | 73 | except Exception as e: |
| LOW | tests/unit_tests/transformer/moe/test_aux_loss.py | 38 | except Exception: # pragma: no cover - defensive |
| LOW | tests/unit_tests/transformer/moe/test_routers.py | 25 | except Exception: # pragma: no cover - defensive |
| LOW | tests/unit_tests/models/test_mimo_audio_submodules.py | 123 | except Exception as e: |
| LOW | tests/unit_tests/models/test_mimo_audio_submodules.py | 197 | except Exception as e: |
| LOW | tests/unit_tests/models/test_hybrid_moe_model.py | 405 | except Exception: |
| LOW | tests/unit_tests/models/test_mimo_submodules.py | 55 | except Exception as e: |
| LOW | tests/unit_tests/models/test_mimo_submodules.py | 105 | except Exception as e: |
| LOW | tests/unit_tests/models/test_mimo_submodules.py | 229 | except Exception as e: |
| LOW | tests/unit_tests/models/test_mimo_submodules.py | 284 | except Exception as e: |
| MEDIUM | tests/unit_tests/models/test_mimo_model.py | 147 | def setup_method(self, method): |
| MEDIUM | tests/unit_tests/models/test_mimo_model.py | 163 | def teardown_method(self, method): |
| MEDIUM | tests/unit_tests/models/test_mimo_model.py | 528 | def setup_method(self, method): |
| MEDIUM | tests/unit_tests/models/test_mimo_model.py | 542 | def teardown_method(self, method): |
| LOW | tests/unit_tests/models/test_mimo_model.py | 150 | except Exception: |
| LOW | tests/unit_tests/models/test_mimo_model.py | 166 | except Exception: |
| LOW | tests/unit_tests/models/test_mimo_model.py | 531 | except Exception: |
| LOW | tests/unit_tests/models/test_mimo_model.py | 545 | except Exception: |
| LOW | tests/unit_tests/dist_checkpointing/test_fp8.py | 24 | except Exception as e: |
| LOW | tests/unit_tests/dist_checkpointing/test_async_save.py | 30 | except Exception as e: |
| LOW | tests/unit_tests/resharding/test_model_swap.py | 34 | except Exception: |
| LOW | tests/unit_tests/resharding/test_model_swap.py | 44 | except Exception: |
| LOW | tests/unit_tests/resharding/test_model_swap.py | 365 | except Exception: |
| LOW | tests/unit_tests/resharding/test_model_swap.py | 492 | except Exception: |
| LOW | tests/unit_tests/resharding/test_model_swap.py | 644 | except Exception: |
| 132 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | train_rl.py | 62 | # Check if fp8_model_init supports preserve_high_precision_init_val |
| LOW | tools/check_copyright.py | 20 | # Check if the expected header is at the start of the file |
| LOW | tools/common_pile_dataset/setup_common_pile_dataset.sh | 50 | # Check if create_common_pile_ci_dataset.py was scp'd alongside this script |
| LOW | docker/common/install_source_wheels.sh | 25 | # Check if required arguments are provided |
| LOW | tests/unit_tests/test_optimizer.py | 34 | # Check if FP8 block scaling is available. |
| LOW | tests/unit_tests/test_utils.py | 457 | # Check if configuration was success. |
| LOW | tests/unit_tests/test_utils.py | 460 | # Check if the instance is in disabled state. |
| LOW | tests/unit_tests/test_utils.py | 464 | # Check if all ranks have straggler detector enabled. |
| LOW | tests/unit_tests/conftest.py | 83 | # Check if data directory exists and has content |
| LOW | tests/unit_tests/fusions/test_torch_softmax.py | 145 | # Check if output is a valid probability distribution |
| LOW | tests/unit_tests/transformer/test_cuda_graphs.py | 516 | # Check if cuda graph is correctly setting is first/last layer |
| LOW | tests/unit_tests/transformer/test_attention.py | 416 | # Check if output and bias have the correct shape |
| LOW | tests/unit_tests/transformer/test_attention.py | 598 | # Check if the output is close |
| LOW | …/unit_tests/transformer/test_multi_latent_attention.py | 1551 | # Check if the output is the same |
| LOW | …it_tests/transformer/moe/test_moe_layer_discrepancy.py | 69 | # Check if parameters are the same |
| LOW | …it_tests/transformer/moe/test_moe_layer_discrepancy.py | 75 | # Check if input is the same across all ranks |
| LOW | …it_tests/transformer/moe/test_moe_layer_discrepancy.py | 150 | # Check if output is the same across all ranks |
| LOW | …it_tests/transformer/moe/test_moe_layer_discrepancy.py | 216 | # Check if output is the same across all ranks |
| LOW | tests/unit_tests/transformer/moe/test_moe_layer.py | 164 | # Check if the moe layer is interleaved correctly |
| LOW | tests/unit_tests/models/test_llava_model.py | 753 | # Check if output shape is as expected |
| LOW | …_tests/dist_checkpointing/test_layer_wise_optimizer.py | 147 | # Check if optimizer is ChainedOptimizer (expected for standard setup) |
| LOW | …/unit_tests/inference/contexts/test_dynamic_context.py | 879 | # Assign blocks to the requests (one block per request) |
| LOW | …/unit_tests/inference/contexts/test_dynamic_context.py | 955 | # Assign blocks to the requests: |
| LOW | …ts/unit_tests/inference/engines/test_dynamic_engine.py | 3183 | # Check if any request was evicted during this step |
| LOW | …ts/test_utils/python_scripts/download_golden_values.py | 282 | # Check if we should skip based on only_failing flag |
| LOW | …nal_tests/test_cases/common/ckpt_converter/__main__.py | 895 | # Print results. |
| LOW | …unctional_tests/shell_test_utils/run_batch_ci_tests.sh | 173 | # Check if file is empty (job still running or not started) |
| LOW | tests/functional_tests/shell_test_utils/run_ci_test.sh | 11 | # Set umask to 0002 to allow group read/write permissions |
| LOW | tests/functional_tests/shell_test_utils/run_ci_test.sh | 257 | ## Loop over the list of model configs in the params file and run each one in sequence, collecting |
| LOW | docs/conf.py | 43 | # Check if we should skip autodoc generation |
| LOW | examples/post_training/modelopt/finetune.py | 271 | # Check if this is OpenAI chat data? |
| LOW | examples/mimo/utils/logging.py | 40 | # Print output projections |
| LOW | examples/gptoss/02_train.sh | 55 | # Check if checkpoint path exists |
| LOW | examples/gptoss/02_train.sh | 62 | # Check if tensorboard logs path exists |
| LOW | examples/multimodal/dataset_helpers.py | 121 | # Check if all samples fit in the knapsack capacity. |
| LOW | examples/inference/utils.py | 235 | # Check if we have any prompts (from command line or JSONL) |
| LOW | examples/inference/advanced/gpt_dynamic_inference.py | 263 | # Check if all requests are finished. |
| LOW | examples/rl/benchmark_refit.py | 216 | # Print results |
| LOW | examples/rl/benchmark_refit.py | 312 | # Print results |
| LOW | examples/rl/environments/countdown/countdown.py | 34 | # Check if all numbers in equation are available |
| LOW | scripts/check_api_backwards_compatibility.py | 240 | # Check if this breakage kind should be ignored globally (not a signature change) |
| LOW | scripts/check_api_backwards_compatibility.py | 253 | # Check if this is a breakage kind we ignore for __init__ methods |
| LOW | scripts/check_api_backwards_compatibility.py | 263 | # Check if it's a child of a filtered object |
| LOW | scripts/check_api_backwards_compatibility.py | 350 | # Print results |
| LOW | .github/actions/check-nvidia-sso-membership/action.yml | 94 | # Check if SSO file is available |
| LOW | .github/actions/check-nvidia-sso-membership/action.yml | 103 | # Check if username exists as a key in the JSON object |
| LOW | megatron/post_training/model_builder.py | 225 | # Set num_layers to 0 for base model in offline mode |
| LOW | megatron/core/parallel_state.py | 836 | # Set NCCL_COLLNET_ENABLE to 1 to enable SHARP for the dp group. |
| LOW | megatron/core/parallel_state.py | 1275 | # Set NCCL_COLLNET_ENABLE to 1 to enable SHARP for the dp_replica group. |
| LOW | megatron/core/parallel_state.py | 1304 | # Set NCCL_COLLNET_ENABLE to 0 to restrict SHARP application to the dp_replica group. |
| LOW | megatron/core/fp4_utils.py | 14 | # Check if Transformer Engine is installed |
| LOW | megatron/core/fp4_utils.py | 25 | # Check if Transformer Engine has class for fp4 tensors. |
| LOW | megatron/core/fp8_utils.py | 23 | # Check if Transformer Engine is installed |
| LOW | megatron/core/fp8_utils.py | 40 | # Check if Transformer Engine has class for fp8 tensors. |
| LOW | megatron/core/fp8_utils.py | 55 | # Check if Transformer Engine has MXFP8Tensor class |
| LOW | megatron/core/fp8_utils.py | 635 | # Check if fp8_model_init supports setting recipe |
| LOW | megatron/core/fp8_utils.py | 640 | # Check if fp8_model_init supports preserve_high_precision_init_val |
| LOW | megatron/core/utils.py | 2841 | # Check if any deprecated key is present in kwargs |
| LOW | megatron/core/ssm/ops/causal_conv1d_triton.py | 243 | # Check if input is 2D, temporarily treat as 3D for uniform processing |
| LOW | megatron/core/tokenizers/megatron_tokenizer.py | 67 | # Check if metadata file exists |
| 67 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | tools/checkpoint/remap_gpt_dsa_to_mamba.py | 46 | Return the HybridModel state-dict key corresponding to *key* from GPTModel. Args: key: A key from the GPTMo |
| HIGH | …egatron_fsdp/test_mcore_fully_sharded_data_parallel.py | 774 | Run a small deterministic (optional) training loop using a mocked MoE/GPT model and optimizer. This hel |
| HIGH | megatron/core/process_groups_config.py | 668 | Get process group collection for a specific module. Args: module_name: Name of the module. |
| HIGH | megatron/core/hyper_comm_grid.py | 121 | Create a process group based on a list of dimension names Note: The unique key used to store the process group |
| HIGH | megatron/core/utils.py | 120 | Validates the request to the experimental function. Args: func (Callable): Callee max_l |
| HIGH | megatron/core/utils.py | 186 | Validates the request to the experimental function. Args: func (Callable): Callee max_l |
| HIGH | megatron/core/utils.py | 218 | Pass-through to callee attribute if experimental flag is enabled. Args: super (supe |
| HIGH | megatron/core/timers.py | 385 | Returns the output string with logged timer values according to configured options. Args: names (Li |
| HIGH | …ore/tokenizers/text/libraries/huggingface_tokenizer.py | 189 | Adds a dictionary of special tokens (eos, pad, cls...). If special tokens are NOT in the vocabulary, th |
| HIGH | megatron/core/fusions/fused_bias_swiglu.py | 210 | Implementation of biased SwiGLU that handles different input shapes. This function reshapes the input if necessary, |
| HIGH | megatron/core/fusions/fused_bias_geglu.py | 154 | Implementation of biased GEGLU that handles different input shapes. This function reshapes the input if necessary, |
| HIGH | megatron/core/optimizer/optimizer.py | 414 | Filter and reorder state_dict parameter groups to match current optimizer groups. Keys used for matching align w |
| HIGH | …ibuted/fsdp/src/megatron_fsdp/param_and_grad_buffer.py | 4187 | Release the specified parameter bucket, freeing its associated buffer storage. This function marks or |
| HIGH | …ibuted/fsdp/src/megatron_fsdp/param_and_grad_buffer.py | 4598 | Creates a distributed tensor (DTensor) from a local tensor with support for Megatron-FSDP and Tensor Parallel s |
| HIGH | …e/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py | 140 | Validates the chunk metadata of an uneven DTensor to ensure correctness and boundary coverage. Notes: - `g |
| HIGH | …e/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py | 257 | Gather a DTensor with potentially uneven sharding across ranks into a full tensor. This function handles DTens |
| HIGH | …core/distributed/fsdp/src/megatron_fsdp/fully_shard.py | 446 | Fully shard the optimizer for Megatron-FSDP. This is an in-place operation on the optimizer instance, which mod |
| HIGH | …tron/core/datasets/blended_megatron_dataset_builder.py | 497 | Build the DistributedDataset Return None if and only if the underlying dataset class is not built on the curren |
| HIGH | megatron/core/datasets/object_storage_utils.py | 136 | Ascertain whether the object at the given S3 path exists in S3 Args: client (S3Client): The S3 client |
| HIGH | megatron/core/datasets/indexed_dataset.py | 88 | Get the size of the dtype/code in bytes Args: key (Union[int, Type[numpy.number]]): The dtype or co |
| HIGH | megatron/core/datasets/indexed_dataset.py | 798 | Return from the dataset Args: idx (Union[int, numpy.integer, slice]): The index or index slice into |
| HIGH | …ron/core/pipeline_parallel/multimodule_communicator.py | 496 | Compute the total number of pipeline stages across a multi-module chain. Interprets ``topology`` as a directed |
| HIGH | megatron/core/models/hybrid/hybrid_layer_allocation.py | 161 | Count layers by type across the full hybrid pattern (main + MTP). Parses the pattern to extract main and MTP compon |
| HIGH | megatron/core/models/hybrid/hybrid_layer_allocation.py | 200 | Parse a unified hybrid pattern string into main and MTP components. The pattern uses "/" as a separator between the |
| HIGH | megatron/core/models/hybrid/hybrid_layer_allocation.py | 301 | Validate and convert a single pipeline segment pattern to a layer type list. This is used after the main pattern ha |
| HIGH | megatron/core/models/hybrid/hybrid_layer_allocation.py | 337 | Select and validate the pipeline segment for the given PP rank and VP stage. When the main pattern contains '|' pip |
| HIGH | megatron/core/dist_checkpointing/validation.py | 294 | Raises or logs an error in case missing or unexpected keys are non-empty. Args: missing_keys (Set[str]): mi |
| HIGH | megatron/core/dist_checkpointing/validation.py | 372 | Validate if the ShardedTensors and ShardedObjects from multiple processes define correct sharding. Local ShardedTen |
| HIGH | …tron/core/dist_checkpointing/strategies/async_utils.py | 669 | Finalizes all available calls. This method must be called on all ranks. Args: blocking (bo |
| HIGH | …ng/nvshmem_copy_service/memory/tensor_pointer_utils.py | 19 | Extract the data pointer from a tensor. Args: tensor: Can be torch.Tensor, CuPy array, or |
| HIGH | megatron/core/export/trtllm/trtllm_layers.py | 85 | Helper function to rename model layer names to TRTLLM Layer names We go through each layer (keys) in the model |
| HIGH | megatron/training/arguments.py | 183 | Validate model config arguments from heterogeneous config. This function takes model arguments and validates them b |
| HIGH | megatron/training/config/instantiate_utils.py | 128 | Instantiate an object or callable from a config object. This function takes a configuration object (dictionary, lis |
| HIGH | megatron/training/config/instantiate_utils.py | 232 | Recursively instantiates a node within a configuration structure. This function handles the instantiation of indivi |
| HIGH | megatron/rl/rl_utils.py | 1181 | Pad trajectories and extract the generation masks. Args: rollouts: Rollouts to extract trajectories from. |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | .gitlab/scripts/check_imports.py | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. |
| LOW | tools/bisect.sh | 1 | #!/usr/bin/env bash |
| LOW | tools/trigger_internal_ci.py | 1 | #!/usr/bin/env python3 |
| LOW | tools/checkpoint/convert.py | 1 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. |
| LOW | tools/checkpoint/convert.py | 21 | # full model weights, nothing split. |
| LOW | tools/checkpoint/convert.py | 41 | # consumed_train_samples |
| LOW | tools/checkpoint/convert.py | 61 | # "mlp l0 bias" |
| LOW | tools/checkpoint/gpt_hybrid_conversion.py | 221 | else: |
| LOW | tools/checkpoint/gpt_hybrid_conversion.py | 241 | # |
| LOW | tools/common_pile_dataset/setup_common_pile_dataset.sh | 1 | #!/bin/bash |
| LOW | …ts/performance_tests/shell_test_utils/run_perf_test.sh | 1 | #!/usr/bin/env bash |
| LOW | …ts/performance_tests/shell_test_utils/run_perf_test.sh | 101 | # from /usr/lib/python3.12/dist-packages — but mamba-ssm + causal-conv1d (and |
| LOW | tests/unit_tests/test_imports.py | 1 | # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. |
| LOW | …s/checkpoint/test_gpt_hybrid_conversion_parallelism.py | 41 | # tiny synthetic DCP checkpoint and round-trips it through the converter on |
| LOW | …unit_tests/transformer/test_fsdp_dtensor_checkpoint.py | 1 | # Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. |
| LOW | …s/unit_tests/elastification/test_hybrid_flex_router.py | 81 | tensor_model_parallel_size=1, pipeline_model_parallel_size=1 |
| LOW | …s/unit_tests/pipeline_parallel/test_pipeline_layout.py | 361 | |
| LOW | tests/unit_tests/inference/test_hybrid_moe.py | 61 | # Combinatorial sweep: unordered combinations with repetition of ALL_STATES |
| LOW | tests/test_utils/python_scripts/recipe_parser.py | 21 | # cadence remains the trigger axis. |
| LOW | tests/test_utils/recipes/h100/moe.yaml | 101 | - environment: [lts] |
| LOW | tests/test_utils/recipes/h100/moe.yaml | 221 | # Super important mr, mr-github tests that run for both DEV and LTS per mr, mr-github # |
| LOW | tests/test_utils/recipes/h100/gpt.yaml | 461 | # scope: [nightly] # Requires PyT 2.4: #481 |
| LOW | tests/test_utils/recipes/gb200/moe.yaml | 101 | platforms: [dgx_gb200] |
| LOW | tests/test_utils/recipes/gb200/moe.yaml | 201 | # - test_case: [gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer] |
| LOW | …unctional_tests/test_cases/common/moe_perf/__main__.py | 401 | |
| LOW | …unctional_tests/test_cases/common/moe_perf/__main__.py | 421 | # test_moe_layer_performance(case, debug_mode=True) |
| LOW | …unctional_tests/shell_test_utils/run_batch_ci_tests.sh | 1 | #!/bin/bash |
| LOW | docs/autodoc2_docstrings_parser.py | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| LOW | docs/conf.py | 1 | # Copyright (c) 2025-2026, NVIDIA CORPORATION. All rights reserved. |
| LOW | examples/megatron_fsdp/train_llama3_8b_fsdp_h100_fp8.sh | 121 | --ckpt-format fsdp_dtensor |
| LOW | examples/megatron_fsdp/sbatch_checkpoint_convert.sh | 21 | SLURM_LOGS="${OUTPUT_PATH}/slurm_logs" |
| LOW | examples/mimo/train.py | 121 | # iterator exhausted on all ranks |
| LOW | examples/inference/run_inference_server.sh | 1 | #!/bin/bash |
| LOW | examples/inference/run_offline_inference.sh | 1 | #!/bin/bash |
| LOW | .github/workflows/sync-team-usergroups.yml | 1 | # Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. |
| LOW | .github/workflows/cicd-approve-test-queue.yml | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| LOW | .github/workflows/release-docs.yml | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. |
| LOW | .github/workflows/community-bot.yml | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| LOW | .github/workflows/nightly-sync-main-to-dev.yml | 1 | # Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. |
| LOW | .github/workflows/release.yaml | 1 | # Copyright (c) 2019-2026, NVIDIA CORPORATION. All rights reserved. |
| LOW | .github/workflows/sync-skills.yml | 1 | # Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. |
| LOW | .github/workflows/close-inactive-issue-pr.yml | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| LOW | .github/workflows/oncall-assign.yml | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| LOW | .github/workflows/oncall-rotation.yml | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| LOW | .github/workflows/cherry-pick-release-commit.yml | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| LOW | .github/workflows/build-docs.yml | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. |
| LOW | .github/workflows/release-nightly-docs.yml | 1 | # Copyright (c) 2026, NVIDIA CORPORATION. |
| LOW | .github/workflows/cicd-main.yml | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| LOW | .github/workflows/install-test.yml | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| LOW | .github/workflows/release-freeze.yml | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| LOW | .github/workflows/copyright-check.yml | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. |
| LOW | .github/scripts/oncall_manager.py | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| LOW | .github/scripts/sync_team_usergroups.py | 1 | # Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. |
| LOW | .github/actions/action.yml | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. |
| LOW | skills/nightly-sync/SKILL.md | 261 | # pyproject.toml is allowed to differ ONLY for git source reconciliation |
| LOW | megatron/core/parallel_state.py | 41 | _DATA_PARALLEL_GROUP_GLOO = None |
| LOW | megatron/core/parallel_state.py | 1021 | global _EMBEDDING_GROUP |
| LOW | megatron/core/parallel_state.py | 1041 | # UCC backend requires CUDA_DEVICE_MAX_CONNECTIONS variable to be larger than 1, |
| LOW | megatron/core/config_logger.py | 1 | # Copyright (c) 2025, NVIDIA CORPORATION. |
| LOW | megatron/core/rerun_state_machine.py | 821 | # caller isn't required to have wrapped its iterator in |
| 52 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tools/checkpoint/saver_hf_llava.py | 17 | # Step 1: Reshape back to (num_head, 3*head_dim, -1) |
| LOW | tools/checkpoint/saver_hf_llava.py | 20 | # Step 2: Slice along the head_dim dimension to get q, k, v |
| LOW | tools/checkpoint/saver_hf_llava.py | 25 | # Step 3: Reshape each back to (num_head * head_dim, -1) |
| LOW | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 361 | # Step 1: Get vocabulary files |
| LOW | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 387 | # Step 2: Get raw text data |
| LOW | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 411 | # Step 3: Preprocess for GPT (GPT2BPETokenizer) |
| LOW | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 434 | # Step 4: Preprocess for BERT (BertWordPieceLowerCase + split-sentences) |
| LOW | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 475 | # Step 5: Preprocess for T5 (BertWordPieceCase) |
| LOW | …s/common_pile_dataset/create_common_pile_ci_dataset.py | 497 | # Step 6: Clean up and verify |
| LOW | …/unit_tests/inference/contexts/test_dynamic_context.py | 1796 | # Step 1: Forward pass for all 3 requests |
| LOW | …/unit_tests/inference/contexts/test_dynamic_context.py | 1805 | # Step 2: Forward pass where req 10 finishes, req 11 continues. Req 999 is NOT scheduled. |
| LOW | …/unit_tests/inference/contexts/test_dynamic_context.py | 1827 | # Step 3: Add the next chunk. It should sit exactly at the boundary (index 1) and inherit the state. |
| LOW | …/unit_tests/inference/contexts/test_dynamic_context.py | 1894 | # Step 1: All 3 requests are active, process forward pass |
| LOW | …/unit_tests/inference/contexts/test_dynamic_context.py | 1903 | # Step 2: Both decode requests finish, chunked prefill NOT scheduled this step. |
| LOW | …ts/unit_tests/inference/engines/test_dynamic_engine.py | 2490 | # Step 1: Prefill. Processes the 4 prompt tokens. |
| LOW | …l_tests/python_test_utils/compute_golden_statistics.py | 11 | # Step 1: Run batch tests (from megatron-rl directory): |
| LOW | …l_tests/python_test_utils/compute_golden_statistics.py | 15 | # Step 2: Wait for jobs to complete, then compute statistics: |
| LOW | .github/scripts/oncall_manager.py | 218 | # Step 1: Add new oncall first (include current members to avoid removing anyone yet) |
| LOW | .github/scripts/oncall_manager.py | 228 | # Step 2: Now set the usergroup to contain only the new oncall |
| LOW | …ibuted/fsdp/src/megatron_fsdp/param_and_grad_buffer.py | 1374 | # Step 0: Register new FSDP unit modules. |
| LOW | …ibuted/fsdp/src/megatron_fsdp/param_and_grad_buffer.py | 1407 | # Step 1: Group the parameters according to their execution order and attributes. |
| LOW | …ibuted/fsdp/src/megatron_fsdp/param_and_grad_buffer.py | 1445 | # Step 2: Bucket the parameters based on the guide bucket size. |
| LOW | …ibuted/fsdp/src/megatron_fsdp/param_and_grad_buffer.py | 1494 | # Step 3: Split parameter groups to meet communication segmentation requirements. |
| LOW | …ibuted/fsdp/src/megatron_fsdp/param_and_grad_buffer.py | 1539 | # Step 4: Generate the groups of collective buckets, where each group aggregates |
| LOW | …ibuted/fsdp/src/megatron_fsdp/param_and_grad_buffer.py | 4050 | # If use double buffer, we need to check if the next bucket |
| LOW | …e/transformer/custom_layers/batch_invariant_kernels.py | 265 | # Step 1: Find maximum value in the row for numerical stability |
| LOW | …e/transformer/custom_layers/batch_invariant_kernels.py | 277 | # Step 2: Compute sum of exp(x - max_val) |
| LOW | …e/transformer/custom_layers/batch_invariant_kernels.py | 293 | # Step 3: Compute final log_softmax values: x - max_val - log_sum_exp |
| LOW | megatron/core/pipeline_parallel/bridge_communicator.py | 415 | # Step 1: broadcast its shape so receivers can allocate |
| LOW | megatron/core/pipeline_parallel/bridge_communicator.py | 421 | # Step 2: broadcast the actual tensor |
| LOW | …n/core/dist_checkpointing/strategies/fully_parallel.py | 238 | # Step 3: load part of the checkpoint. |
| LOW | …n/core/dist_checkpointing/strategies/fully_parallel.py | 264 | # Step 4: exchange data between ranks |
| LOW | …gatron/core/resharding/nvshmem_copy_service/service.py | 232 | # Step 1: Segment tasks (break large tasks into chunks) |
| LOW | …gatron/core/resharding/nvshmem_copy_service/service.py | 242 | # Step 2: Pack tasks into workload groups |
| LOW | …gatron/core/resharding/nvshmem_copy_service/service.py | 249 | # Step 3: Schedule workloads to iterations |
| LOW | …gatron/core/resharding/nvshmem_copy_service/service.py | 258 | # Step 4: Prepare iteration schedules |
| LOW | …gatron/core/resharding/nvshmem_copy_service/service.py | 264 | # Step 5: Build GPU execution plans |
| LOW | …gatron/core/resharding/nvshmem_copy_service/service.py | 273 | # Step 6: Create double-buffered events |
| LOW | …harding/nvshmem_copy_service/core/pipeline_executor.py | 153 | # Step 1: Pack NEXT iteration (async) |
| LOW | …harding/nvshmem_copy_service/core/pipeline_executor.py | 165 | # Step 2: Unpack PRIOR iteration (async) |
| LOW | …harding/nvshmem_copy_service/core/pipeline_executor.py | 180 | # Step 3: Send CURRENT iteration |
| LOW | …harding/nvshmem_copy_service/core/pipeline_executor.py | 223 | # Step 5: Wait for async pack to complete (double-buffer safety) |
| LOW | …shmem_copy_service/planning/communication_scheduler.py | 39 | # Step 1: Collect all batches across all PE pairs |
| LOW | …shmem_copy_service/planning/communication_scheduler.py | 44 | # Step 2: Assign batches to iterations using greedy conflict-free algorithm |
| LOW | …shmem_copy_service/planning/communication_scheduler.py | 49 | # Step 3: Exchange detailed workload summaries (Task IDs/Sizes) |
| LOW | …shmem_copy_service/planning/communication_scheduler.py | 55 | # Step 4: Build schedule map for this PE |
| LOW | …e/communication/torch_symm_triton/fused_collectives.py | 151 | # Step 1: - reduce-scatter + residual add for this token + collect sq sum |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tasks/finetune_utils.py | 120 | # shuffling so we can just use a simple infinite loop. |
| MEDIUM | tests/unit_tests/test_inference.py | 107 | # we are replicating what lm-eval-harness::TemplateLM::_encode_pair does |
| MEDIUM | …s/checkpoint/test_gpt_hybrid_conversion_parallelism.py | 43 | # harness launched pytest. When that default PG is multi-rank (e.g. Megatron's |
| LOW | …ts/distributed/megatron_fsdp/test_mfsdp_fully_shard.py | 227 | # DP: Only relevant when using HSDP, where we need the flattened DP group for data parallelism. (Otherwise, just pas |
| LOW | …ts/distributed/megatron_fsdp/test_mfsdp_fully_shard.py | 229 | # DP-Shard-CP: Only required if using CP. Otherwise, just pass dp_shard to FSDP. |
| MEDIUM | tests/unit_tests/transformer/test_utils.py | 331 | """Test a comprehensive scenario with multiple configurations.""" |
| MEDIUM | tests/unit_tests/training/models/test_base.py | 171 | """from_dict() reconstructs configs from serialized dicts, handles nested dataclasses, and is robust to unknown keys |
| LOW | …s/unit_tests/dist_checkpointing/models/test_mlp_glu.py | 129 | # Load happens in-place, so we can just use the same tensors |
| MEDIUM | …/unit_tests/inference/contexts/test_dynamic_context.py | 2292 | # 6. Verify seamless append (no legacy offset math needed) |
| MEDIUM | …cipes/h100/gpt-dynamic-inference-with-coordinator.yaml | 92 | # skills/run-performance-tests/SKILL.md for the harness it runs under. |
| MEDIUM | …inference_server_smoke_tp1_pp1_dp8_583m/serve_smoke.py | 43 | # the JET harness expects at ``logs/*/*/attempt_0/*/std*.log``) while still |
| LOW | …sts/functional_tests/shell_test_utils/_run_training.sh | 85 | # If value is "true", just use the key |
| MEDIUM | .github/workflows/claude_review.yml | 94 | # Strict review: comprehensive Megatron-LM focused analysis |
| LOW | megatron/core/optimizer_param_scheduler.py | 235 | # If the learning rate is constant, just return the initial value. |
| LOW | megatron/core/timers.py | 476 | # polutes the runs list, so we just add each as a scalar |
| MEDIUM | megatron/core/ssm/triton_cache_manager.py | 69 | # use temp dir to be robust against program interruptions |
| LOW | …ore/tokenizers/text/parsers/qwen3_coder_tool_parser.py | 16 | # These map to vLLM types but we just use dictionaries for now |
| LOW | megatron/core/tensor_parallel/random.py | 108 | # if not using cuda graphs, just use the builtin pytorch function |
| LOW | megatron/core/tensor_parallel/random.py | 189 | # already graphable, just return it. |
| LOW | megatron/core/tensor_parallel/random.py | 195 | # already non-graphable, just return it. |
| MEDIUM | …ibuted/fsdp/src/megatron_fsdp/param_and_grad_buffer.py | 1549 | # Set aggregate buckets by FSDP units, i.e. buckets pertaining to the same |
| MEDIUM | …ibuted/fsdp/src/megatron_fsdp/param_and_grad_buffer.py | 2149 | # to leverage NCCL UBR for high-precision gradient reduction with |
| MEDIUM | …ibuted/fsdp/src/megatron_fsdp/param_and_grad_buffer.py | 3111 | # for a seamless user experience and coverage for ZeRO-1 and ZeRO-2? |
| MEDIUM | …ibuted/fsdp/src/megatron_fsdp/param_and_grad_buffer.py | 4319 | # TODO(@cspades): Clean up this logic in conjunction with |
| LOW | …atron/core/distributed/fsdp/src/megatron_fsdp/utils.py | 198 | # if not using cuda graphs, just use the builtin pytorch function |
| LOW | megatron/core/transformer/transformer_config.py | 2237 | # so just set both if either is specified. |
| LOW | megatron/core/transformer/transformer_layer.py | 862 | # elements in bias_chunks are the same for all chunks, so we can just use the first one |
| MEDIUM | megatron/core/transformer/mlp.py | 207 | # Weight resharding across TP sizes will have aforementioned problems. |
| MEDIUM | megatron/core/datasets/indexed_dataset.py | 6 | # Essentially re-written in entirety |
| MEDIUM | …e/models/common/embeddings/language_model_embedding.py | 145 | # the original tensor from being garbage collected. Clone to facilitate GC. |
| LOW | megatron/core/models/bert/bert_model.py | 193 | # For local layer spec we just use b1ss |
| LOW | megatron/core/extensions/transformer_engine.py | 687 | # TODO should we ditch normalization config and just use spec to choose LayerNorm vs RMSNorm? |
| LOW | …tron/core/dist_checkpointing/strategies/async_utils.py | 479 | # to simply call `sync_all_async_calls` to check if other ranks complete the writing |
| MEDIUM | …/core/inference/data_parallel_inference_coordinator.py | 386 | # Todo [Siddharth]: Make this more robust to handle invalid messages. |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | …core/transformer/heterogeneous/heterogeneous_config.py | 177 | "n_heads_in_group": null, |
| HIGH | …e/pipeline_parallel/fine_grained_activation_offload.py | 504 | self.push(chunk) |
| HIGH | …e/pipeline_parallel/fine_grained_activation_offload.py | 655 | self.push(cur_chunk) |
| HIGH | megatron/core/inference/inference_request.py | 94 | Each block hash is computed as SHA-256(parent_digest || block_bytes), where |
| HIGH | megatron/core/inference/unified_memory.py | 113 | if (device != prev_device && device >= 0) cudaSetDevice(device); |
| HIGH | megatron/core/inference/unified_memory.py | 149 | if (device != prev_device && prev_device >= 0) cudaSetDevice(prev_device); |
| HIGH | megatron/core/inference/contexts/dynamic_context.py | 3718 | # : [ XX | XX | 16 XX | 12 72 24 88 XX ] (XX = undefined) |
| HIGH | megatron/core/inference/moe/vllm_fused_moe.py | 518 | `input` are undefined). |
| HIGH | megatron/training/arguments.py | 3220 | "n_heads_in_group": null, |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tools/checkpoint/loader_llava.py | 52 | |
| MEDIUM | …nce_tp1_pp1_583m_cuda_graphs_validation/cuda_graphs.py | 183 | |
| MEDIUM | …nce_tp1_pp1_583m_cuda_graphs_validation/cuda_graphs.py | 184 | |
| MEDIUM | …nce_tp1_pp1_583m_cuda_graphs_validation/cuda_graphs.py | 185 | |
| MEDIUM | …nce_tp1_pp1_583m_cuda_graphs_validation/cuda_graphs.py | 186 |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | .github/workflows/claude-copy-to-main.yml | 113 | --body "🤖 **This PR was auto-generated by Claude** via the \`/claude copy\` command.\n\nCherry-picked f |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tools/bisect.sh | 5 | # Usage: |
| LOW | tools/common_pile_dataset/setup_common_pile_dataset.sh | 9 | # Usage: |
| LOW | …unctional_tests/shell_test_utils/run_batch_ci_tests.sh | 5 | # Usage: |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | megatron/core/process_groups_config.py | 660 | """Check if this rank has a language model. |
| MEDIUM | megatron/core/process_groups_config.py | 663 | True if this rank has a language model, False otherwise. |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | …s/unit_tests/training/config/test_instantiate_utils.py | 52 | def test_function(arg1=None, arg2=None, **kwargs): |
| LOW | tests/unit_tests/training/config/test_yaml_utils.py | 45 | def test_function(): |
| LOW | megatron/core/utils.py | 2544 | def my_function(): |