A high-throughput and memory-efficient inference and serving engine for LLMs
14571 matches across 21 categories. Click a row to expand file-level details.
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | setup.py | 52 | def should_require_rust_frontend() -> bool: |
| LOW | setup.py | 445 | def fetch_metadata_for_variant( |
| LOW | setup.py | 479 | def detect_system_cuda_variant() -> str: |
| LOW | setup.py | 533 | def fetch_wheel_from_pypi_index(index_url: str, package: str = "vllm") -> str: |
| LOW | setup.py | 688 | def extract_precompiled_and_patch_package( |
| LOW | setup.py | 799 | def get_base_commit_in_main_branch() -> str: |
| LOW | csrc/cpu/generate_cpu_attn_dispatch.py | 92 | def generate_cases_for_isa_group(isa_list: list[str], include_fp8: bool = False) -> str: |
| LOW | csrc/quantization/machete/generate.py | 334 | def generate_type_option_name(kernel_types: TypeConfig): |
| LOW | csrc/quantization/machete/generate.py | 370 | def unsigned_type_with_bitwidth(num_bits): |
| LOW | tools/generate_versions_json.py | 86 | def generate_bake_native_json(args: dict[str, str]) -> dict: |
| LOW | tools/install_nixl_from_source_ubuntu.py | 65 | def install_system_dependencies(): |
| LOW | tools/install_nixl_from_source_ubuntu.py | 105 | def build_and_install_prerequisites(args): |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 1046 | def _expand_flash_attn_variants( |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 1150 | def parse_cuda_priority_lists() -> dict[str, list[str]]: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 300 | def parse_mla_prefill_registry() -> dict[str, str]: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 320 | def parse_mla_prefill_priorities() -> dict[str, list[str]]: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 386 | def parse_mla_prefill_backend_file(class_path: str) -> dict[str, Any] | None: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 466 | def parse_mla_prefill_backends() -> list[dict[str, Any]]: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 866 | def parse_flash_attn_features() -> dict[str, dict[str, Any]]: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 1003 | def parse_flashinfer_trtllm_features() -> dict[str, dict[str, Any]]: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 1098 | def _expand_flashinfer_variants( |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 1203 | def _get_backends_from_return(stmts: list) -> list[str]: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 1490 | def generate_priority_section(priorities: dict[str, list[str]]) -> str: |
| LOW | tools/profiler/visualize_layerwise_profile.py | 75 | def shorten_plot_legend_strings(legend, max_char_len: int): |
| LOW | tools/profiler/visualize_layerwise_profile.py | 94 | def attempt_to_make_names_unique(entries_and_traces): |
| LOW | tools/profiler/visualize_layerwise_profile.py | 144 | def group_trace_by_operations(trace_df: "pd.DataFrame") -> "pd.DataFrame": |
| LOW | tools/profiler/nsys_profile_tools/gputrc2graph.py | 45 | def gen_nonoverlapped_sum_from_gputrace(self, in_file, out_file): |
| LOW | tools/profiler/nsys_profile_tools/gputrc2graph.py | 66 | def sum_non_overlapping_intervals(self, df): |
| LOW | tools/vllm-rocm/pin_rocm_dependencies.py | 20 | def extract_version_from_wheel(wheel_name: str) -> str: |
| LOW | tools/vllm-rocm/pin_rocm_dependencies.py | 40 | def get_custom_wheel_versions(install_dir: str) -> dict[str, str]: |
| LOW | tools/vllm-rocm/pin_rocm_dependencies.py | 94 | def pin_dependencies_in_requirements(requirements_path: str, versions: dict[str, str]): |
| LOW | tests/test_sequence.py | 9 | def test_sequence_intermediate_tensors_equal(): |
| LOW | tests/test_zen_cpu_platform_detection.py | 35 | def test_is_amd_zen_cpu_returns_false_when_cpuinfo_missing(): |
| LOW | tests/test_version.py | 36 | def test_prev_minor_version_was(version_tuple, version_str, expected): |
| LOW | tests/test_ray_env_utils.py | 36 | def test_arbitrary_var_propagated(self): |
| LOW | tests/test_ray_env_utils.py | 42 | def test_worker_specific_excluded(self): |
| LOW | tests/test_ray_env_utils.py | 50 | def test_non_carry_over_blacklist(self): |
| LOW | tests/test_fxgraphcache_pickle_patch.py | 20 | def test_valueerror_converted_to_bypass(self): |
| LOW | tests/test_fxgraphcache_pickle_patch.py | 30 | def test_original_valueerror_chained(self): |
| LOW | tests/test_fxgraphcache_pickle_patch.py | 44 | def test_non_valueerror_propagates(self): |
| LOW | tests/test_fxgraphcache_pickle_patch.py | 54 | def test_normal_return_preserved(self): |
| LOW | tests/test_fxgraphcache_pickle_patch.py | 76 | def test_sentinel_attribute_set(self): |
| LOW | tests/test_fxgraphcache_pickle_patch.py | 90 | def test_patch_applied_in_current_environment(): |
| LOW | tests/conftest.py | 217 | def init_test_http_connection(): |
| LOW | tests/conftest.py | 264 | def should_do_global_cleanup_after_test(request) -> bool: |
| LOW | tests/conftest.py | 738 | def _hidden_states_to_seq_logprobs( |
| LOW | tests/conftest.py | 761 | def _hidden_states_to_logprobs( |
| LOW | tests/conftest.py | 788 | def generate_greedy_logprobs_limit( |
| LOW | tests/conftest.py | 1027 | def _final_steps_generate_w_logprobs( |
| LOW | tests/conftest.py | 1137 | def generate_prompt_perplexity( |
| LOW | tests/conftest.py | 1250 | def _wait_for_rocm_memory_release(self, gpu_memory_utilization: float) -> None: |
| LOW | tests/conftest.py | 1295 | def temporary_enable_log_propagate(): |
| LOW | tests/conftest.py | 1499 | def pytest_collection_modifyitems(config, items): |
| LOW | tests/conftest.py | 1516 | def cli_config_file_with_model(): |
| LOW | tests/conftest.py | 1654 | def clean_gpu_memory_between_tests(): |
| LOW | tests/test_inputs.py | 21 | def test_preprocessor_always_mm_code_path(model_id, prompt): |
| LOW | tests/test_ray_env.py | 54 | def test_pythonhashseed_in_result(self): |
| LOW | tests/test_ray_env.py | 118 | def test_worker_specific_host_vars_are_excluded(self): |
| LOW | tests/test_ray_env.py | 128 | def test_non_carry_over_blacklist(self): |
| LOW | tests/test_ray_env.py | 142 | def test_additional_vars_passthrough(self): |
| 8133 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 1041 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 1043 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 1145 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 1147 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 24 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 26 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 75 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 77 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 263 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 265 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 536 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 538 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 822 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 824 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 1283 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 1289 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 1382 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 1384 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 1618 | # --------------------------------------------------------------------------- |
| MEDIUM | tools/pre_commit/generate_attention_backend_docs.py | 1620 | # --------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 106 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 108 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 117 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 121 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 135 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 138 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 151 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 157 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 65 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 67 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 78 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 81 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 93 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 95 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 175 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 179 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 192 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 199 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 214 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 217 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 240 | # ----------------------------------------------------------------------------- |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 243 | # ----------------------------------------------------------------------------- |
| MEDIUM | tests/test_ray_env.py | 11 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/test_ray_env.py | 13 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/test_ray_env.py | 46 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/test_ray_env.py | 48 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/test_ray_env.py | 61 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/test_ray_env.py | 63 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/test_ray_env.py | 97 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/test_ray_env.py | 99 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/test_ray_env.py | 133 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/test_ray_env.py | 135 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/test_ray_env.py | 147 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/test_ray_env.py | 149 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/test_jit_monitor.py | 21 | # ------------------------------------------------------------------ |
| MEDIUM | tests/test_jit_monitor.py | 23 | # ------------------------------------------------------------------ |
| MEDIUM | tests/test_jit_monitor.py | 39 | # ------------------------------------------------------------------ |
| MEDIUM | tests/test_jit_monitor.py | 41 | # ------------------------------------------------------------------ |
| MEDIUM | tests/test_jit_monitor.py | 172 | # ------------------------------------------------------------------ |
| MEDIUM | tests/test_jit_monitor.py | 174 | # ------------------------------------------------------------------ |
| 1162 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | tests/test_envs.py | 0 | test that callable choices raise error for invalid values. |
| HIGH | tests/test_envs.py | 0 | test that callable choices raise error for invalid values. |
| HIGH | tests/test_envs.py | 0 | test that callable choices raise error for invalid values. |
| HIGH | tests/test_logger.py | 0 | this test calls _configure_vllm_root_logger again to test custom logging config behavior, however mocks are used to ensu |
| HIGH | tests/test_logger.py | 0 | this test calls _configure_vllm_root_logger again to test custom logging config behavior, however mocks are used to ensu |
| HIGH | tests/test_logger.py | 0 | this test calls _configure_vllm_root_logger again to test custom logging config behavior, however mocks are used to ensu |
| HIGH | tests/test_logger.py | 0 | this test calls _configure_vllm_root_logger again to test custom logging config behavior, however it fails before any ch |
| HIGH | tests/test_logger.py | 0 | this test calls _configure_vllm_root_logger again to test custom logging config behavior, however it fails before any ch |
| HIGH | tests/test_logger.py | 0 | this test calls _configure_vllm_root_logger again to test custom logging config behavior, however it fails before any ch |
| HIGH | tests/v1/attention/test_mla_prefill_selector.py | 0 | clear lru cache to ensure each test case runs without caching. |
| HIGH | tests/kernels/attention/test_attention_selector.py | 0 | clear lru cache to ensure each test case runs without caching. |
| HIGH | tests/kernels/attention/test_mha_attn.py | 0 | clear lru cache to ensure each test case runs without caching. |
| HIGH | tests/kernels/attention/test_rocm_attention_selector.py | 0 | clear lru cache to ensure each test case runs without caching. |
| HIGH | tests/v1/logits_processors/utils.py | 0 | fake logit processor to support unit testing and examples |
| HIGH | docs/features/custom_logitsprocs.md | 0 | fake logit processor to support unit testing and examples |
| HIGH | examples/features/logits_processor/custom.py | 0 | fake logit processor to support unit testing and examples |
| HIGH | tests/v1/logits_processors/utils.py | 0 | the request-level logits processor masks out all logits except the token id identified by `target_token` |
| HIGH | docs/features/custom_logitsprocs.md | 0 | the request-level logits processor masks out all logits except the token id identified by `target_token` |
| HIGH | examples/features/logits_processor/custom_req.py | 0 | the request-level logits processor masks out all logits except the token id identified by `target_token` |
| HIGH | examples/features/logits_processor/custom_req_init.py | 0 | the request-level logits processor masks out all logits except the token id identified by `target_token` |
| HIGH | tests/v1/logits_processors/utils.py | 0 | example of wrapping a fake request-level logit processor to create a batch-level logits processor |
| HIGH | docs/features/custom_logitsprocs.md | 0 | example of wrapping a fake request-level logit processor to create a batch-level logits processor |
| HIGH | examples/features/logits_processor/custom_req.py | 0 | example of wrapping a fake request-level logit processor to create a batch-level logits processor |
| HIGH | tests/v1/logits_processors/utils.py | 0 | this method returns a new request-level logits processor, customized to the `target_token` value associated with a parti |
| HIGH | docs/features/custom_logitsprocs.md | 0 | this method returns a new request-level logits processor, customized to the `target_token` value associated with a parti |
| HIGH | examples/features/logits_processor/custom_req.py | 0 | this method returns a new request-level logits processor, customized to the `target_token` value associated with a parti |
| HIGH | …s/v1/kv_connector/nixl_integration/toy_proxy_server.py | 0 | lifespan context manager to handle startup and shutdown events. |
| HIGH | …cache/disagg_prefill_lmcache_v1/disagg_proxy_server.py | 0 | lifespan context manager to handle startup and shutdown events. |
| HIGH | …regated/mooncake_connector/mooncake_connector_proxy.py | 0 | lifespan context manager to handle startup and shutdown events. |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | <seed:think>the user\'s current thinking budget is 512.</seed:cot_budget_reflect>\nlet me analyze the |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | <seed:think>the user\'s current thinking budget is 512.</seed:cot_budget_reflect>\nlet me analyze the |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | <seed:think>the user\'s current thinking budget is 512.</seed:cot_budget_reflect>\nlet me analyze the |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | <seed:think>the user\'s current thinking budget is 512.</seed:cot_budget_reflect>\nlet me analyze the |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | question. the user wants to know the weather in barcelona, spain. looking at the functions available, |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | question. the user wants to know the weather in barcelona, spain. looking at the functions available, |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | question. the user wants to know the weather in barcelona, spain. looking at the functions available, |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | question. the user wants to know the weather in barcelona, spain. looking at the functions available, |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | there\'s a get_weather function that can retrieve the current temperature for a given location. \n\nfirst, |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | there\'s a get_weather function that can retrieve the current temperature for a given location. \n\nfirst, |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | there\'s a get_weather function that can retrieve the current temperature for a given location. \n\nfirst, |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | there\'s a get_weather function that can retrieve the current temperature for a given location. \n\nfirst, |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | check the parameters required by get_weather: location is mandatory (needs city and country), and unit is |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | check the parameters required by get_weather: location is mandatory (needs city and country), and unit is |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | check the parameters required by get_weather: location is mandatory (needs city and country), and unit is |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | check the parameters required by get_weather: location is mandatory (needs city and country), and unit is |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | optional. the user provided "barcelona spain" as the location, which fits the required format (city, |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | optional. the user provided "barcelona spain" as the location, which fits the required format (city, |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | optional. the user provided "barcelona spain" as the location, which fits the required format (city, |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | optional. the user provided "barcelona spain" as the location, which fits the required format (city, |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | country). \n<seed:cot_budget_reflect>i have used 131 tokens, and there are 381 tokens remaining for use. |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | country). \n<seed:cot_budget_reflect>i have used 131 tokens, and there are 381 tokens remaining for use. |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | country). \n<seed:cot_budget_reflect>i have used 131 tokens, and there are 381 tokens remaining for use. |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | country). \n<seed:cot_budget_reflect>i have used 131 tokens, and there are 381 tokens remaining for use. |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | </seed:cot_budget_reflect>\n since the unit isn\'t specified, the function will default to celsius, which |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | </seed:cot_budget_reflect>\n since the unit isn\'t specified, the function will default to celsius, which |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | </seed:cot_budget_reflect>\n since the unit isn\'t specified, the function will default to celsius, which |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | </seed:cot_budget_reflect>\n since the unit isn\'t specified, the function will default to celsius, which |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | is fine. \n\nthere\'s no need to ask for more information because the location is clear. so i should call |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | is fine. \n\nthere\'s no need to ask for more information because the location is clear. so i should call |
| HIGH | tests/tool_parsers/test_seed_oss_tool_parser.py | 0 | is fine. \n\nthere\'s no need to ask for more information because the location is clear. so i should call |
| 429 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | …c/cutlass_extensions/vllm_cutlass_library_extension.py | 6 | |
| LOW | tests/conftest.py | 75 | |
| LOW | tests/conftest.py | 75 | |
| LOW | tests/conftest.py | 76 | |
| LOW | tests/utils.py | 63 | |
| LOW | tests/utils.py | 1951 | |
| LOW | tests/v1/attention/test_mla_backends.py | 69 | |
| LOW | tests/v1/attention/test_attention_backends.py | 46 | |
| LOW | tests/v1/logits_processors/utils.py | 13 | |
| LOW | tests/v1/cudagraph/test_cudagraph_mode.py | 38 | |
| LOW | tests/v1/cudagraph/test_breakable_cudagraph.py | 7 | |
| LOW | tests/v1/kv_connector/unit/test_tp_mapping.py | 10 | |
| LOW | tests/v1/kv_connector/unit/test_hf3fs_client.py | 17 | |
| LOW | tests/v1/kv_connector/unit/test_hf3fs_client.py | 17 | |
| LOW | tests/v1/kv_connector/unit/test_hf3fs_client.py | 17 | |
| LOW | tests/v1/kv_connector/unit/test_hf3fs_client.py | 17 | |
| LOW | tests/v1/kv_connector/unit/test_hf3fs_client.py | 17 | |
| LOW | …/v1/kv_connector/unit/offloading_connector/conftest.py | 3 | |
| LOW | tests/v1/spec_decode/test_backup_token_async_spec.py | 9 | |
| LOW | tests/v1/sample/test_topk_topp_sampler.py | 27 | |
| LOW | tests/v1/engine/conftest.py | 20 | |
| LOW | tests/v1/engine/conftest.py | 20 | |
| LOW | tests/v1/simple_kv_offload/test_scheduler.py | 5 | |
| LOW | tests/tool_use/test_gemma4_responses_adjust_request.py | 25 | |
| LOW | tests/renderers/test_chat_utils_prompt_embeds.py | 6 | |
| LOW | tests/kernels/core/test_vit_fp8_attn.py | 20 | |
| LOW | tests/kernels/core/test_fused_q_kv_rmsnorm.py | 11 | |
| LOW | tests/kernels/ir/test_ir_ops.py | 11 | |
| LOW | tests/kernels/ir/test_layernorm.py | 7 | |
| LOW | tests/kernels/mamba/test_ssu_dispatch.py | 24 | |
| LOW | tests/kernels/moe/test_moe.py | 18 | |
| LOW | tests/distributed/test_eplb_spec_decode.py | 3 | |
| LOW | …add_dummy_platform/vllm_add_dummy_platform/__init__.py | 10 | |
| LOW | tests/cuda/scripts/check_device_count_respects_env.py | 14 | |
| LOW | tests/model_executor/test_oink_integration.py | 37 | |
| LOW | tests/models/language/pooling/embed_utils.py | 8 | |
| LOW | tests/models/language/pooling/test_reward.py | 18 | |
| LOW | tests/models/multimodal/generation/test_pixtral.py | 24 | |
| LOW | tests/vllm_test_utils/vllm_test_utils/__init__.py | 8 | |
| LOW | tests/vllm_test_utils/vllm_test_utils/__init__.py | 8 | |
| LOW | tests/vllm_test_utils/vllm_test_utils/__init__.py | 9 | |
| LOW | tests/vllm_test_utils/vllm_test_utils/__init__.py | 9 | |
| LOW | tests/compile/test_compile_ranges.py | 10 | |
| LOW | tests/compile/test_config.py | 32 | |
| LOW | tests/compile/test_structured_logging.py | 10 | |
| LOW | tests/compile/test_graph_partition.py | 21 | |
| LOW | tests/compile/test_decorator.py | 21 | |
| LOW | tests/compile/fullgraph/test_toy_llama.py | 34 | |
| LOW | tests/compile/fullgraph/test_multiple_graphs.py | 28 | |
| LOW | tests/compile/passes/ir/test_lowering.py | 7 | |
| LOW | …ranscription/test_transcription_inter_chunk_spacing.py | 11 | |
| LOW | tests/entrypoints/openai/test_dp_supervisor.py | 17 | |
| LOW | tests/entrypoints/openai/responses/conftest.py | 3 | |
| LOW | tests/entrypoints/openai/responses/test_mcp_tools.py | 5 | |
| LOW | tests/entrypoints/openai/responses/test_harmony.py | 5 | |
| LOW | tests/standalone_tests/lazy_imports.py | 22 | |
| LOW | vllm/__init__.py | 7 | |
| LOW | vllm/__init__.py | 7 | |
| LOW | vllm/__init__.py | 14 | |
| LOW | vllm/__init__.py | 42 | |
| 1099 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tools/install_deepgemm.sh | 83 | # Create a temporary directory for the build |
| MEDIUM | tools/report_build_time_ninja.py | 201 | # Create a list that is in order by time stamp and has entries for the |
| MEDIUM | tools/pre_commit/update-dockerfile-graph.sh | 26 | # Define the target file path |
| MEDIUM | tests/conftest.py | 551 | # Create a copy to avoid modifying the original dict |
| MEDIUM | tests/utils.py | 727 | # Create a dedicated process group so we can kill |
| MEDIUM | tests/utils.py | 1493 | # Create a unique temporary file to store exception info from child |
| MEDIUM | tests/test_access_log_filter.py | 259 | # Create a logger with our filter (simulating uvicorn.access) |
| MEDIUM | tests/test_access_log_filter.py | 266 | # Create a custom handler that tracks messages |
| MEDIUM | tests/test_config.py | 694 | # Create a new mock and run the method with the same S3 URL |
| MEDIUM | tests/test_logger.py | 274 | # Create a mock logger to capture log calls |
| MEDIUM | tests/v1/test_tensor_ipc_queue.py | 193 | # Create a CPU tensor |
| MEDIUM | tests/v1/test_tensor_ipc_queue.py | 511 | # Create a CPU tensor |
| MEDIUM | tests/v1/test_tensor_ipc_queue.py | 642 | # Create a CPU tensor |
| MEDIUM | tests/v1/test_tensor_ipc_queue.py | 905 | # Create a tensor queue |
| MEDIUM | tests/v1/test_serial_utils.py | 189 | # Create a sample Python object |
| MEDIUM | tests/v1/test_serial_utils.py | 207 | # Create a sample tensor |
| MEDIUM | tests/v1/test_serial_utils.py | 227 | # Create a sample numpy array |
| MEDIUM | tests/v1/test_serial_utils.py | 313 | # Create a request with a non-multimodal tensor |
| MEDIUM | tests/v1/test_serial_utils.py | 354 | # Create a request with None for the tensor field |
| MEDIUM | tests/v1/kv_offload/test_file_mapper.py | 43 | # Create a copy of the mock config to avoid modifying the global one |
| MEDIUM | tests/v1/metrics/test_ray_metrics.py | 58 | # Create the actor and call the async method |
| MEDIUM | tests/v1/attention/test_mla_backends.py | 279 | # Create a realistic slot mapping that corresponds to the block table |
| MEDIUM | tests/v1/attention/test_mla_backends.py | 1198 | # Create a summary for the single-line failure message |
| MEDIUM | tests/v1/attention/test_attention_backends.py | 187 | # Create a realistic slot mapping that corresponds to the block table |
| MEDIUM | tests/v1/logits_processors/test_correctness.py | 807 | # Define a shuffled batch of requests which individually use a different |
| MEDIUM | tests/v1/logits_processors/test_custom_offline.py | 28 | # Create a mixture of requests which do and don't utilize the dummy logitproc |
| MEDIUM | tests/v1/logits_processors/test_custom_offline.py | 63 | # Create a vLLM instance and load custom logitproc |
| MEDIUM | tests/v1/logits_processors/test_custom_offline.py | 70 | # Create a reference vLLM instance without custom logitproc |
| MEDIUM | tests/v1/core/test_kv_cache_utils.py | 252 | # Create a list of KVCacheBlock objects |
| MEDIUM | tests/v1/core/test_kv_cache_utils.py | 255 | # Create a FreeKVCacheBlockQueue with these blocks |
| MEDIUM | tests/v1/core/test_kv_cache_utils.py | 420 | # Create a list of KVCacheBlock objects |
| MEDIUM | tests/v1/core/test_kv_cache_utils.py | 423 | # Create a FreeKVCacheBlockQueue with these blocks |
| MEDIUM | tests/v1/core/test_kv_cache_utils.py | 298 | # Create an empty FreeKVCacheBlockQueue with these blocks |
| MEDIUM | tests/v1/core/test_kv_cache_utils.py | 346 | # Create an empty FreeKVCacheBlockQueue |
| MEDIUM | tests/v1/core/test_kv_cache_utils.py | 363 | # Create an empty FreeKVCacheBlockQueue with these blocks |
| MEDIUM | tests/v1/core/test_kv_cache_utils.py | 1315 | # Create a VllmConfig |
| MEDIUM | tests/v1/core/test_kv_cache_utils.py | 1351 | # Create a VllmConfig |
| MEDIUM | tests/v1/core/test_scheduler.py | 2638 | # Create a request and schedule it |
| MEDIUM | tests/v1/core/test_scheduler.py | 2665 | # Create a high priority request and schedule it |
| MEDIUM | tests/v1/core/test_scheduler.py | 3424 | # Create a request and schedule it (and to be preempted) |
| MEDIUM | tests/v1/core/test_scheduler.py | 3474 | # Create a high priority request and schedule it |
| MEDIUM | tests/v1/core/test_scheduler.py | 4263 | # Create a text-only request (no mm_features). |
| MEDIUM | tests/v1/cudagraph/test_cudagraph_dispatch.py | 55 | # Create a real LoRAConfig with specialize_active_lora enabled |
| MEDIUM | tests/v1/kv_connector/unit/test_nixl_connector.py | 1912 | # Create a request that triggers do_remote_decode so that |
| MEDIUM | tests/v1/kv_connector/unit/test_lmcache_connector.py | 216 | # Create a mock object that is not LMCacheKVEvents |
| MEDIUM | tests/v1/kv_connector/unit/test_moriio_connector.py | 173 | # Define a fake remote engine id for testing |
| MEDIUM | …ts/v1/kv_connector/unit/test_decode_bench_connector.py | 145 | # Create a request with multiple blocks worth of tokens |
| MEDIUM | …ts/v1/kv_connector/unit/test_decode_bench_connector.py | 189 | # Create a request |
| MEDIUM | …ts/v1/kv_connector/unit/test_decode_bench_connector.py | 211 | # Create a request with just 1 token |
| MEDIUM | …ts/v1/kv_connector/unit/test_decode_bench_connector.py | 229 | # Create a request with 2 tokens |
| MEDIUM | …ts/v1/kv_connector/unit/test_decode_bench_connector.py | 255 | # Create a request with many blocks |
| MEDIUM | …ts/v1/kv_connector/unit/test_decode_bench_connector.py | 338 | # Create a request that doesn't align to block boundaries |
| MEDIUM | tests/v1/kv_connector/unit/test_nixl_connector_hma.py | 75 | # Create a mock worker with just the required attributes |
| MEDIUM | tests/v1/kv_connector/unit/test_example_connector.py | 146 | # Create the LLM instance |
| MEDIUM | …r/extract_hidden_states_integration/test_extraction.py | 48 | # Create a minimal Llama config with small dimensions |
| MEDIUM | …r/extract_hidden_states_integration/test_extraction.py | 63 | # Create a simple tokenizer |
| MEDIUM | tests/v1/determinism/test_batch_invariance.py | 102 | # Create a batch of size `max_batch_size` and insert the needle at |
| MEDIUM | tests/v1/distributed/test_external_lb_dp.py | 154 | # Create a client for each server |
| MEDIUM | tests/v1/distributed/test_hybrid_lb_dp.py | 182 | # Create a client for each node (each node has its own API endpoint) |
| MEDIUM | tests/v1/streaming_input/test_async_llm_streaming.py | 20 | # Create a minimal mock without initializing the full engine |
| 262 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | use_existing_torch.py | 21 | |
| LOW | setup.py | 943 | |
| LOW | setup.py | 987 | |
| LOW | setup.py | 173 | |
| LOW | setup.py | 589 | |
| LOW | setup.py | 688 | |
| LOW | csrc/cpu/generate_cpu_attn_dispatch.py | 92 | |
| LOW | csrc/quantization/marlin/generate_kernels.py | 173 | |
| LOW | csrc/moe/marlin_moe_wna16/generate_kernels.py | 173 | |
| LOW | tools/report_build_time_ninja.py | 151 | |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 115 | |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 140 | |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 161 | |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 206 | |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 320 | |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 386 | |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 550 | |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 594 | |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 866 | |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 1150 | |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 1203 | |
| LOW | tools/pre_commit/validate_config.py | 73 | |
| LOW | tools/pre_commit/check_boolean_context_manager.py | 21 | |
| LOW | tools/pre_commit/check_spdx_header.py | 27 | |
| LOW | tools/pre_commit/check_spdx_header.py | 65 | |
| LOW | tools/pre_commit/check_spdx_header.py | 109 | |
| LOW | tools/vllm-rocm/pin_rocm_dependencies.py | 40 | |
| LOW | tools/vllm-rocm/pin_rocm_dependencies.py | 94 | |
| LOW | tests/conftest.py | 525 | |
| LOW | tests/conftest.py | 941 | |
| LOW | tests/utils.py | 1150 | |
| LOW | tests/utils.py | 1484 | |
| LOW | tests/utils.py | 1610 | |
| LOW | tests/utils.py | 535 | |
| LOW | tests/utils.py | 1490 | |
| LOW | tests/utils.py | 1633 | |
| LOW | tests/v1/utils.py | 12 | |
| LOW | tests/v1/kv_offload/cpu/test_gpu_worker.py | 223 | |
| LOW | tests/v1/attention/test_mla_backends.py | 721 | |
| LOW | tests/v1/attention/test_sparse_mla_backends.py | 531 | |
| LOW | tests/v1/logits_processors/test_correctness.py | 304 | |
| LOW | tests/v1/logits_processors/test_correctness.py | 456 | |
| LOW | tests/v1/core/test_scheduler.py | 1923 | |
| LOW | tests/v1/core/utils.py | 176 | |
| LOW | tests/v1/kv_connector/unit/test_nixl_connector.py | 2561 | |
| LOW | tests/v1/kv_connector/unit/utils.py | 317 | |
| LOW | tests/v1/kv_connector/unit/test_mooncake_connector.py | 315 | |
| LOW | tests/v1/kv_connector/unit/test_mooncake_connector.py | 704 | |
| LOW | tests/v1/determinism/test_batch_invariance.py | 27 | |
| LOW | tests/v1/determinism/test_batch_invariance.py | 154 | |
| LOW | tests/v1/determinism/test_batch_invariance.py | 645 | |
| LOW | tests/v1/determinism/test_nvfp4_batch_invariant.py | 45 | |
| LOW | tests/v1/spec_decode/test_eagle.py | 48 | |
| LOW | tests/v1/spec_decode/test_acceptance_length.py | 178 | |
| LOW | tests/v1/spec_decode/test_acceptance_length.py | 228 | |
| LOW | tests/v1/sample/test_logprobs.py | 129 | |
| LOW | tests/v1/sample/test_logprobs.py | 494 | |
| LOW | tests/v1/sample/test_logprobs.py | 866 | |
| LOW | tests/v1/sample/test_logprobs.py | 910 | |
| LOW | tests/v1/sample/test_logprobs.py | 956 | |
| 995 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | CMakeLists.txt | 1 | cmake_minimum_required(VERSION 3.26) |
| LOW | CMakeLists.txt | 41 | set(PYTHON_SUPPORTED_VERSIONS "3.10" "3.11" "3.12" "3.13" "3.14") |
| LOW | CMakeLists.txt | 1081 | |
| LOW | csrc/torch_utils.h | 1 | #pragma once |
| LOW | csrc/torch_bindings.cpp | 1 | // Provides torch::Tensor for ops.h (previously included transitively via |
| LOW | csrc/torch_bindings.cpp | 61 | "Tensor q_in, Tensor kv, Tensor! k_cache, " |
| LOW | csrc/cumem_allocator_compat.h | 101 | } // extern "C" |
| LOW | csrc/cub_helpers.h | 1 | #pragma once |
| LOW | csrc/launch_bounds_utils.h | 1 | #pragma once |
| LOW | csrc/cumem_allocator.cpp | 1 | // A CUDAPluggableAllocator based on cumem* APIs. |
| LOW | csrc/cuda_compat.h | 41 | #define VLLM_LDG(arg) __ldg(arg) |
| LOW | csrc/cuda_compat.h | 61 | #endif |
| LOW | csrc/cuda_utils.h | 1 | #pragma once |
| LOW | csrc/spinloop.cpp | 1 | #include <Python.h> |
| LOW | csrc/attention/attention_dtypes.h | 1 | #pragma once |
| LOW | csrc/core/scalar_type.hpp | 1 | #pragma once |
| LOW | csrc/core/registration.h | 1 | #pragma once |
| LOW | csrc/cpu/cpu_attn_neon.hpp | 1 | #ifndef CPU_ATTN_NEON_HPP |
| LOW | csrc/cpu/utils.cpp | 1 | #ifndef VLLM_NUMA_DISABLED |
| LOW | csrc/cpu/cpu_fused_moe.cpp | 1 | #include "cpu/cpu_types.hpp" |
| LOW | csrc/cpu/cpu_types.hpp | 1 | #ifndef CPU_TYPES_HPP |
| LOW | csrc/cpu/cpu_types.hpp | 21 | #include "cpu_types_scalar.hpp" |
| LOW | csrc/cpu/cpu_types_riscv.hpp | 1 | #ifndef CPU_TYPES_RISCV_HPP |
| LOW | csrc/cpu/cpu_attn_impl.hpp | 901 | static constexpr int64_t head_dim = attention_impl_t::HeadDim; |
| LOW | csrc/cpu/cpu_attn_impl.hpp | 921 | // BlockSizeAlignment |
| LOW | csrc/cpu/cpu_attn_impl.hpp | 1741 | blocksize_alignment); |
| LOW | csrc/cpu/cpu_attn_impl.hpp | 1761 | float* curr_partial_q_buffer = |
| LOW | csrc/cpu/cpu_attn_vxe.hpp | 381 | } // namespace cpu_attention |
| LOW | csrc/cpu/generate_cpu_attn_dispatch.py | 141 | #ifdef CPU_CAPABILITY_AMXBF16 |
| LOW | csrc/cpu/cpu_arch_macros.h | 61 | #endif |
| LOW | csrc/cpu/cpu_arch_macros.h | 161 | #include <riscv_vector.h> |
| LOW | csrc/cpu/cpu_attn_rvv.hpp | 1 | // SPDX-License-Identifier: Apache-2.0 |
| LOW | csrc/cpu/cpu_attn_fp8.hpp | 1 | // SPDX-License-Identifier: Apache-2.0 |
| LOW | csrc/cpu/cpu_types_vxe.hpp | 1 | |
| LOW | csrc/cpu/cpu_types_vxe.hpp | 21 | |
| LOW | csrc/cpu/cpu_types_x86.hpp | 1 | |
| LOW | csrc/cpu/shm.cpp | 1 | #include "cpu/cpu_types.hpp" |
| LOW | csrc/cpu/cpu_types_riscv_impl.hpp | 1 | #ifndef CPU_TYPES_RISCV_IMPL_HPP |
| LOW | csrc/cpu/cpu_types_riscv_impl.hpp | 921 | #define CPU_KERNEL_GUARD_IN(NAME) |
| LOW | csrc/cpu/cpu_types_riscv_defs.hpp | 1 | #ifndef CPU_TYPES_RISCV_DEFS_HPP |
| LOW | csrc/cpu/cpu_types_riscv_defs.hpp | 21 | #define LMUL_256 m1 |
| LOW | csrc/cpu/cpu_types_arm.hpp | 21 | #define VLLM_DISPATCH_CASE_FLOATING_TYPES(...) \ |
| LOW | csrc/cpu/cpu_attn_vsx.hpp | 1 | // SPDX-License-Identifier: Apache-2.0 |
| LOW | csrc/cpu/sgl-kernels/gemm.cpp | 81 | constexpr int BLOCK_N = block_size_n(); |
| LOW | csrc/cpu/sgl-kernels/fla.cpp | 1 | // Adapted from |
| LOW | csrc/cpu/sgl-kernels/gemm.h | 1 | // Adapted from |
| LOW | csrc/cpu/sgl-kernels/vec.h | 401 | __m512i vec_zero = _mm512_setzero_epi32(); |
| LOW | csrc/cpu/sgl-kernels/moe.cpp | 1 | // Adapted from |
| LOW | csrc/cpu/sgl-kernels/moe.cpp | 21 | // allocates 2 intermediate_caches instead of 3 |
| LOW | csrc/cpu/sgl-kernels/moe.cpp | 1261 | // unlike triton kernel, we fuse silu with gemm1 so only need 2 intermediate_caches: |
| LOW | csrc/cpu/sgl-kernels/moe_int4.cpp | 61 | // num_threads * BLOCK_M * K + |
| LOW | csrc/cpu/sgl-kernels/common.h | 1 | // Adapted from |
| LOW | csrc/cpu/sgl-kernels/common.h | 201 | } |
| LOW | csrc/cpu/sgl-kernels/common.h | 281 | return std::max(1, (actual_nth >> 1) * 2); |
| LOW | csrc/cpu/sgl-kernels/conv.cpp | 141 | Unroll<ROWS * COLS>{}(loadb); |
| LOW | csrc/cpu/sgl-kernels/conv.cpp | 641 | seqlen, |
| LOW | csrc/libtorch_stable/torch_utils.h | 1 | #pragma once |
| LOW | csrc/libtorch_stable/torch_bindings.cpp | 1 | #include "ops.h" |
| LOW | …e/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp | 41 | |
| LOW | csrc/libtorch_stable/mamba/selective_scan.h | 1 | /****************************************************************************** |
| 725 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | setup.py | 110 | def find_tcmalloc() -> Path | None: |
| MEDIUM | setup.py | 799 | def get_base_commit_in_main_branch() -> str: |
| LOW | setup.py | 118 | except Exception: |
| LOW | setup.py | 207 | except Exception as e: |
| LOW | setup.py | 497 | except Exception: |
| LOW | setup.py | 508 | except Exception: |
| LOW | setup.py | 635 | except Exception as e: |
| LOW | setup.py | 853 | except Exception as err: |
| LOW | setup.py | 924 | except Exception: |
| MEDIUM | tools/generate_cmake_presets.py | 168 | print(f"Error writing file: {e}") |
| LOW | tools/install_nixl_from_source_ubuntu.py | 30 | except Exception: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 311 | except Exception: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 332 | except Exception: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 401 | except Exception: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 743 | except Exception: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 757 | except Exception as e: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 840 | except Exception: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 877 | except Exception: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 1014 | except Exception: |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 1171 | except Exception: |
| LOW | tools/profiler/nsys_profile_tools/gputrc2graph.py | 239 | except Exception: |
| LOW | tools/vllm-rocm/pin_rocm_dependencies.py | 79 | except Exception as e: |
| LOW | tests/conftest.py | 1281 | except Exception: |
| LOW | tests/conftest.py | 1543 | except Exception as e: |
| MEDIUM | tests/utils.py | 117 | def _nvml(): |
| LOW | tests/utils.py | 81 | except Exception as e: |
| LOW | tests/utils.py | 274 | except Exception: |
| LOW | tests/utils.py | 555 | except Exception as e: |
| LOW | tests/utils.py | 641 | except Exception: |
| LOW | tests/utils.py | 1522 | except Exception as e: |
| LOW | tests/utils.py | 1545 | except Exception: |
| LOW | tests/utils.py | 1739 | except Exception as e: |
| LOW | tests/utils.py | 1954 | except Exception: |
| LOW | tests/v1/test_tensor_ipc_queue.py | 83 | except Exception as e: |
| LOW | tests/v1/test_tensor_ipc_queue.py | 120 | except Exception as e: |
| LOW | tests/v1/test_tensor_ipc_queue.py | 308 | except Exception as e: |
| LOW | tests/v1/test_tensor_ipc_queue.py | 409 | except Exception as e: |
| LOW | tests/v1/test_tensor_ipc_queue.py | 443 | except Exception as e: |
| LOW | tests/v1/test_tensor_ipc_queue.py | 532 | except Exception as e: |
| LOW | tests/v1/test_tensor_ipc_queue.py | 570 | except Exception as e: |
| LOW | tests/v1/test_tensor_ipc_queue.py | 721 | except Exception as e: |
| LOW | tests/v1/utils.py | 64 | except Exception as e: |
| LOW | tests/v1/kv_offload/cpu/test_shared_offload_region.py | 164 | except Exception as e: |
| LOW | tests/v1/kv_offload/cpu/test_shared_offload_region.py | 123 | except Exception as e: |
| LOW | tests/v1/shutdown/test_forward_error.py | 81 | except Exception as e: |
| LOW | tests/v1/shutdown/test_processor_error.py | 39 | except Exception as e: |
| LOW | tests/v1/kv_connector/unit/test_hf3fs_client.py | 28 | except Exception: |
| LOW | tests/v1/kv_connector/unit/test_multi_connector.py | 383 | except Exception as e: |
| LOW | tests/v1/kv_connector/unit/utils.py | 352 | except Exception as e: |
| LOW | …/v1/kv_connector/unit/test_mooncake_store_connector.py | 575 | except Exception: |
| LOW | …/v1/kv_connector/unit/test_mooncake_store_connector.py | 612 | except Exception: |
| LOW | tests/v1/kv_connector/unit/test_rixl_gpu_mem_diag.py | 37 | except Exception: |
| LOW | …s/v1/kv_connector/nixl_integration/toy_proxy_server.py | 253 | except Exception as e: |
| MEDIUM | …s/v1/kv_connector/nixl_integration/toy_proxy_server.py | 258 | print(f"Error occurred in disagg prefill proxy server - {api} endpoint") |
| MEDIUM | …/kv_connector/nixl_integration/test_disagg_accuracy.py | 159 | print(f"Error writing to file: {e}") |
| MEDIUM | …/kv_connector/nixl_integration/test_disagg_accuracy.py | 168 | print(f"Error writing to file: {e}") |
| LOW | tests/v1/determinism/test_online_batch_invariance.py | 42 | except Exception as e: # pragma: no cover |
| LOW | tests/v1/distributed/test_external_lb_dp.py | 91 | except Exception as e: |
| LOW | tests/v1/distributed/test_external_lb_dp.py | 118 | except Exception as e: |
| MEDIUM | tests/v1/distributed/test_external_lb_dp.py | 119 | print(f"Error stopping servers: {e}") |
| 564 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | setup.py | 821 | # Check if the upstream_main_commit exists in the local repo |
| LOW | tools/install_torchcodec_rocm.sh | 15 | # Check if torchcodec is already installed and working |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 348 | # Check if it's a capability.major == 10 check (Blackwell) |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 766 | # Check if this is an MLA backend by parent class or naming |
| LOW | tools/pre_commit/generate_attention_backend_docs.py | 1188 | # Check if this is the "if use_mla:" branch |
| LOW | tools/pre_commit/check_forbidden_imports.py | 99 | # Check if it's allowed |
| LOW | tools/pre_commit/update-dockerfile-graph.sh | 10 | # Check if docker/Dockerfile is among the provided files |
| LOW | tools/pre_commit/update-dockerfile-graph.sh | 14 | # Check if Docker is installed and running |
| LOW | tools/pre_commit/update-dockerfile-graph.sh | 71 | # Check if the graph has changed |
| LOW | tools/vllm-rocm/pin_rocm_dependencies.py | 148 | # Check if this line is for one of our custom packages |
| LOW | tests/conftest.py | 385 | # Set this to avoid hanging issue |
| LOW | tests/conftest.py | 897 | # Set this to avoid hanging issue |
| LOW | tests/utils.py | 503 | os.kill(spid, 0) # Check if still alive |
| LOW | tests/test_config.py | 478 | # Check if LONGCHAT_ROPE_PARAMETERS entries are in longchat_model_config |
| LOW | tests/test_logger.py | 358 | # Set max_log_len to 10 |
| LOW | tests/v1/attention/test_mla_backends.py | 830 | # Set num_speculative_tokens to query_len - 1 |
| LOW | tests/v1/attention/test_sparse_mla_backends.py | 611 | # Set some to -1 to test masking |
| LOW | tests/v1/attention/test_sparse_mla_backends.py | 615 | # Set some to out of bounds |
| LOW | tests/v1/attention/test_sparse_mla_backends.py | 671 | # Set some to -1 to test masking |
| LOW | tests/v1/attention/test_sparse_mla_backends.py | 675 | # Set some to out of bounds |
| LOW | tests/v1/core/test_scheduler.py | 1987 | # Verify if position length is identical |
| LOW | tests/v1/core/test_scheduler.py | 2883 | # Check if scheduled_encoder_inputs is empty as expected |
| LOW | tests/v1/core/test_scheduler.py | 3235 | # Set up to test different encoder cache existence scenario after preemption |
| LOW | tests/v1/core/test_scheduler.py | 3565 | # Set up to test different encoder cache existence scenario after preemption |
| LOW | tests/v1/core/utils.py | 231 | # Verify if position length is identical |
| LOW | …extract_hidden_states_integration/predictable_llama.py | 81 | # Check if we need auxiliary hidden states |
| LOW | …nnector/nixl_integration/config_sweep_accuracy_test.sh | 97 | # Check if cross-layers is enabled (non-empty) |
| LOW | tests/v1/determinism/test_batch_invariance.py | 279 | # Check if tokens match first |
| LOW | tests/v1/determinism/test_batch_invariance.py | 558 | # Check if tokens match first |
| LOW | tests/v1/determinism/test_batch_invariance.py | 787 | # Check if tokens match |
| LOW | tests/v1/determinism/test_batch_invariance.py | 805 | # Check if logprobs match bitwise |
| LOW | tests/v1/spec_decode/test_acceptance_length.py | 102 | # Check if get_valid_backends is actually defined in the platform class |
| LOW | …1/ec_connector/integration/run_epd_correctness_test.sh | 25 | # Set 1 to use multimodal prompts; else to use text-only |
| LOW | …ts/v1/ec_connector/integration/test_epd_correctness.py | 218 | # Check if server is ready |
| LOW | tests/v1/e2e/spec_decode/test_async_spec_decode.py | 34 | # Increment counter |
| LOW | tests/v1/engine/test_engine_core_client.py | 284 | # Check if all request IDs in outputs have finished |
| LOW | tests/v1/engine/utils.py | 131 | # Check if the sampled_token_id occurs in choice_tensor[1:] |
| LOW | tests/utils_/test_network_utils.py | 79 | # Check if IPv6 is supported by trying to create an IPv6 socket |
| LOW | tests/tool_parsers/test_minimax_tool_parser.py | 494 | # Check if function name is sent (should happen only once) |
| LOW | tests/tool_parsers/test_minimax_tool_parser.py | 500 | # Check if arguments are sent incrementally |
| LOW | tests/tool_parsers/test_mistral_tool_parser.py | 135 | # Check if the slice from the current position matches the target sequence |
| LOW | tests/kernels/moe/test_cpu_int4_moe.py | 17 | # Check if the dynamic_4bit_int_moe op is available |
| LOW | tests/kernels/moe/test_cpu_int4_moe.py | 21 | # Check if KleidiAI ops are available |
| LOW | tests/kernels/moe/test_moe_layer.py | 1791 | # Check if enough GPUs available |
| LOW | tests/kernels/moe/test_rocm_aiter_topk.py | 26 | # Check if aiter package is installed |
| LOW | tests/kernels/moe/test_rocm_aiter_topk.py | 35 | # Check if the op exists in torch.ops.vllm |
| LOW | tests/kernels/moe/test_rocm_aiter_topk.py | 38 | # Check if the op is callable |
| LOW | tests/kernels/moe/test_rocm_aiter_topk.py | 44 | # Check if the op exists in torch.ops.vllm |
| LOW | tests/kernels/moe/test_rocm_aiter_topk.py | 47 | # Check if the op is callable |
| LOW | tests/evals/gsm8k/gsm8k_eval.py | 297 | # Print results to terminal |
| LOW | tests/distributed/test_eplb_execute.py | 150 | # Check if the weights are correct |
| LOW | tests/model_executor/test_eagle_quantization.py | 112 | # Check if get_cache_scale is called and returns expected value |
| LOW | tests/model_executor/test_qwen3_omni.py | 29 | # Check if it's a special token that should be compressed |
| LOW | …model_loader/runai_streamer_loader/test_runai_utils.py | 56 | # Read the file in chunks to handle large files efficiently |
| LOW | tests/models/multimodal/generation/test_maverick.py | 59 | # Print the outputs |
| LOW | tests/quantization/test_gptq_v2.py | 43 | # Check if gptq_v2 format is correctly loaded |
| LOW | tests/quantization/test_gptq_v2.py | 105 | # Print the output sequences if failed |
| LOW | tests/compile/test_config.py | 50 | # Check if get_raw_stream exists in builtins |
| LOW | tests/compile/fusions_e2e/conftest.py | 44 | # Print the outputs. |
| LOW | tests/compile/fullgraph/test_full_graph.py | 251 | # Print the outputs. |
| 252 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | csrc/quantization/machete/generate.py | 435 | # For now, we can just use the first accumulator type seen since |
| MEDIUM | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 145 | # More robust: count lines matching our UID. |
| MEDIUM | tests/v1/logits_processors/test_custom_offline.py | 28 | # Create a mixture of requests which do and don't utilize the dummy logitproc |
| LOW | tests/v1/cudagraph/test_breakable_cudagraph.py | 235 | # Outside capture: decorator should just call through. |
| LOW | tests/tool_parsers/test_mistral_tool_parser.py | 141 | # Otherwise, just add the current token and move to the next one |
| MEDIUM | tests/tool_parsers/test_glm4_moe_tool_parser.py | 295 | # depending on how robust we want the parsing to be |
| MEDIUM | tests/renderers/test_sparse_tensor_validation.py | 58 | # explicitly so this fixture is robust to process-wide invariant-check state |
| MEDIUM | tests/kernels/mamba/test_mamba_mixer2.py | 106 | # - utilize mock patching to disable TP when |
| MEDIUM | tests/distributed/test_pynccl.py | 382 | # Essentially this is an all-gather operation. |
| MEDIUM | tests/distributed/test_context_parallel.py | 42 | # .buildkite/lm-eval-harness/configs/DeepSeek-V2-Lite-Chat.yaml |
| MEDIUM | tests/distributed/test_context_parallel.py | 44 | # .buildkite/lm-eval-harness/configs/Qwen2.5-1.5B-Instruct.yaml |
| LOW | tests/model_executor/test_qwen3_omni.py | 41 | # Regular token, just add it |
| LOW | vllm/env_override.py | 356 | # functions just return True. |
| LOW | vllm/v1/attention/backends/flash_attn.py | 291 | # but for now just set it to `UNIFORM_BATCH` to get use to drop down |
| LOW | vllm/v1/attention/backends/utils.py | 263 | # then we can simply use a cdiv for the rest. |
| MEDIUM | vllm/v1/attention/ops/triton_decode_attention.py | 362 | # explicitly facilitate overlapping load/compute |
| MEDIUM | vllm/v1/core/encoder_cache_manager.py | 321 | # utilize the cache and this class will fold into EncoderCacheManager, as |
| LOW | vllm/v1/spec_decode/llm_base_proposer.py | 1661 | # Therefore, we can just return the logits. |
| MEDIUM | vllm/v1/worker/gpu_worker.py | 638 | # CUDAGraph memory size and may not utilize all gpu memory. |
| MEDIUM | vllm/v1/worker/gpu_worker.py | 888 | # Generate the trace name by combining prefix with comprehensive rank suffix |
| LOW | vllm/v1/worker/gpu/model_runner.py | 1175 | # For piecewise and eager mode, just call model(). |
| LOW | vllm/v1/engine/parallel_sampling.py | 116 | # If streaming, just return the current output |
| MEDIUM | vllm/tool_parsers/llama_tool_parser.py | 262 | # re-set stuff pertaining to progress in the current tool |
| MEDIUM | vllm/tool_parsers/jamba_tool_parser.py | 218 | # re-set stuff pertaining to progress in the current tool |
| MEDIUM | vllm/tool_parsers/granite_tool_parser.py | 186 | # re-set stuff pertaining to progress in the current tool |
| LOW | vllm/tool_parsers/xlam_tool_parser.py | 558 | # If we encounter an error, just return the delta text as regular content |
| MEDIUM | vllm/tool_parsers/granite_20b_fc_tool_parser.py | 203 | # re-set stuff pertaining to progress in the current tool |
| MEDIUM | vllm/tool_parsers/step3_tool_parser.py | 51 | # Explicit state flags for robust streaming |
| LOW | vllm/tokenizers/mistral.py | 535 | # if underlying tokenizer is sentencepiece, we just add "�". |
| LOW | vllm/platforms/cuda.py | 586 | # users can just use IR op priority directly |
| MEDIUM | vllm/distributed/utils.py | 302 | """A robust barrier to synchronize all ranks. |
| MEDIUM | …distributed/kv_transfer/kv_connector/v1/nixl/worker.py | 615 | # we can leverage host_buffer for permute |
| MEDIUM | …ransfer/kv_connector/v1/mooncake/mooncake_connector.py | 756 | # Tasks can await async events, so a surplus (2x is a robust heuristic) |
| MEDIUM | vllm/config/model.py | 310 | """Enable the custom cumem allocator to leverage advanced GPU memory |
| MEDIUM | vllm/config/parallel.py | 580 | # To make the initialization more robust we retry a few times |
| LOW | vllm/model_executor/layers/mamba/ops/causal_conv1d.py | 185 | # first chunk and does not have prior-token, so just set to 0 |
| MEDIUM | vllm/model_executor/layers/fused_moe/modular_kernel.py | 47 | # The goal is to be able to utilize different communication mechanisms with |
| LOW | …m/model_executor/layers/fused_moe/runner/moe_runner.py | 259 | # Once the MK can be created upfront, we can just pass in the proper |
| MEDIUM | …xecutor/layers/fused_moe/prepare_finalize/deepep_ll.py | 40 | # TODO (varun) : Optimize leverage num_tokens_per_expert counts |
| MEDIUM | …executor/layers/fused_moe/experts/fused_humming_moe.py | 233 | # Neighboring nodes are required to utilize distinct workspaces. |
| MEDIUM | vllm/model_executor/layers/quantization/fp8.py | 281 | # For GPUs that lack FP8 hardware support, we can leverage the Marlin |
| MEDIUM | vllm/model_executor/layers/quantization/modelopt.py | 317 | # Normalize quant_algo for robust matching (ModelOpt may emit lowercase). |
| MEDIUM | vllm/model_executor/layers/quantization/fbgemm_fp8.py | 53 | # For GPUs that lack FP8 hardware support, we can leverage the Marlin |
| MEDIUM | …executor/layers/quantization/utils/marlin_utils_fp4.py | 101 | # to fully utilize the E4M3 dynamic range (e.g., global_scale=1). |
| MEDIUM | …executor/layers/quantization/utils/marlin_utils_fp4.py | 161 | # For GPUs that lack FP4 hardware support, we can leverage the |
| MEDIUM | …executor/layers/quantization/utils/marlin_utils_fp8.py | 53 | # For GPUs that lack FP8 hardware support, we can leverage the |
| LOW | …odel_executor/layers/quantization/utils/quant_utils.py | 406 | # Unquantized layer: just return base weights |
| MEDIUM | …/model_executor/layers/quantization/quark/quark_moe.py | 166 | # For GPUs that lack FP8 hardware support, we can leverage the Marlin |
| MEDIUM | …rs/quantization/compressed_tensors/transform/module.py | 85 | # do not fold into weight in order to utilize FWHT |
| MEDIUM | vllm/model_executor/models/grok1.py | 93 | # Check for Grok2-specific attributes (both for robust detection) |
| MEDIUM | vllm/model_executor/models/deepseek_ocr.py | 133 | """Example of overriding the wrapper class `__init__()` in order to utilize |
| MEDIUM | vllm/model_executor/models/deepseek_ocr.py | 133 | """Example of overriding the wrapper class `__init__()` in order to utilize |
| LOW | vllm/model_executor/models/qwen3_asr.py | 464 | # No audio features, just return linear positions |
| LOW | vllm/model_executor/models/transformers/multimodal.py | 197 | # NOTE: we can't just set caching=False because base class method |
| MEDIUM | …dels/deepseek_v4/nvidia/ops/fused_indexer_q_cutedsl.py | 167 | # all threads in a warp to be active since we utilize warp shuffle later. |
| LOW | vllm/reasoning/granite_reasoning_parser.py | 198 | # corrected; just return the delta text as normal content. |
| LOW | vllm/reasoning/hunyuan_a13b_reasoning_parser.py | 91 | # this id is not part of content, so just return [] here. |
| LOW | vllm/reasoning/olmo3_reasoning_parser.py | 271 | # this id is not part of content, so just return [] here. |
| MEDIUM | vllm/multimodal/evs.py | 255 | # exact timestamp count. This is robust even when early |
| MEDIUM | vllm/benchmarks/datasets/datasets.py | 2910 | # leverage CustomDataset sample |
| 53 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/v1/logits_processors/test_correctness.py | 1180 | # Step 1: think-start token appears. |
| LOW | tests/v1/kv_connector/unit/test_lmcache_connector.py | 567 | # Step 1: Get events from lmcache engine |
| LOW | tests/v1/kv_connector/unit/test_lmcache_connector.py | 576 | # Step 2: Update connector output (simulate receiving from worker) |
| LOW | tests/v1/kv_connector/unit/test_lmcache_connector.py | 582 | # Step 3: Take events |
| LOW | …/v1/kv_connector/unit/test_remote_prefill_lifecycle.py | 433 | # Step 2: 5 blocks are in use (2 new for remote blocks). |
| LOW | …/v1/kv_connector/unit/test_remote_prefill_lifecycle.py | 441 | # Step 3: finish recving (5 blocks in use) |
| LOW | …/v1/kv_connector/unit/test_remote_prefill_lifecycle.py | 450 | # Step 4: try to schedule, remote request is put to running list |
| LOW | …/v1/kv_connector/unit/test_remote_prefill_lifecycle.py | 460 | # Step 5: Remote request will be put back to waiting list |
| LOW | …/v1/kv_connector/unit/test_remote_prefill_lifecycle.py | 468 | # Step 6: finish the request, free it. |
| LOW | …/v1/kv_connector/unit/test_remote_prefill_lifecycle.py | 477 | # Step 7: now we can schedule (with 2 blocks computed), |
| LOW | …/v1/kv_connector/unit/test_remote_prefill_lifecycle.py | 489 | # Step 8: free everything. |
| LOW | …/v1/kv_connector/unit/test_remote_prefill_lifecycle.py | 536 | # Step 2: 3 blocks are in use, |
| LOW | …/v1/kv_connector/unit/test_remote_prefill_lifecycle.py | 547 | # Step 3: finish the request, free it. |
| LOW | …/v1/kv_connector/unit/test_remote_prefill_lifecycle.py | 556 | # Step 4: now we can initiate KV transfer (with 2 blocks computed). |
| LOW | …/v1/kv_connector/unit/test_remote_prefill_lifecycle.py | 564 | # Step 5: finish recving (5 blocks in use) |
| LOW | …/v1/kv_connector/unit/test_remote_prefill_lifecycle.py | 573 | # Step 6: schedule remote request |
| LOW | …/v1/kv_connector/unit/test_remote_prefill_lifecycle.py | 580 | # Step 7: free everything. |
| LOW | tests/v1/determinism/test_batch_invariance.py | 700 | # Step 1: Run decode and collect logprobs |
| LOW | tests/v1/determinism/test_batch_invariance.py | 719 | # Step 2: For each token position, run prefill and compare |
| LOW | tests/v1/streaming_input/test_scheduler_streaming.py | 373 | # Step 2: Schedule creates NewRequestData |
| LOW | tests/v1/streaming_input/test_scheduler_streaming.py | 447 | # Step 7: Schedule again - now request uses cached state |
| LOW | tests/v1/streaming_input/test_scheduler_streaming.py | 514 | # Step 12: Add new streaming request with seq_id=1 |
| LOW | tests/v1/streaming_input/test_scheduler_streaming.py | 388 | # Step 3: Simulate model runner caching the prompt_token_ids |
| LOW | tests/v1/streaming_input/test_scheduler_streaming.py | 421 | # Step 6: Verify request state after Cycle 1 |
| LOW | tests/v1/streaming_input/test_scheduler_streaming.py | 459 | # Step 8: Calculate num_tokens like gpu_model_runner.py:1284 does |
| LOW | tests/v1/streaming_input/test_scheduler_streaming.py | 495 | # Step 11: Verify request transitioned to WAITING_FOR_STREAMING_REQ |
| LOW | tests/v1/streaming_input/test_scheduler_streaming.py | 526 | # Step 13: Scheduler schedules the updated session |
| LOW | tests/v1/streaming_input/test_scheduler_streaming.py | 544 | # Step 14: Model runner caches NEW prompt_token_ids reference |
| LOW | tests/v1/streaming_input/test_scheduler_streaming.py | 557 | # Step 15: FINAL CRITICAL VERIFICATION |
| LOW | …/streaming_input/test_gpu_model_runner_v2_streaming.py | 84 | # Step 1: Add initial request with 3 prompt tokens, all computed |
| LOW | …/streaming_input/test_gpu_model_runner_v2_streaming.py | 100 | # Step 2: Create streaming update with extended prompt |
| LOW | …/streaming_input/test_gpu_model_runner_v2_streaming.py | 116 | # Step 3: Verify no free_indices leak (old slot recycled) |
| LOW | …/streaming_input/test_gpu_model_runner_v2_streaming.py | 155 | # Step 1: Add initial request with one audio feature |
| LOW | …/streaming_input/test_gpu_model_runner_v2_streaming.py | 175 | # Step 2: Create streaming update with additional multimodal feature |
| LOW | …/streaming_input/test_gpu_model_runner_v2_streaming.py | 192 | # Step 3: Verify no free_indices leak |
| LOW | …/v1/streaming_input/test_gpu_model_runner_streaming.py | 59 | # Step 1: Create initial request state with some computed tokens |
| LOW | …/v1/streaming_input/test_gpu_model_runner_streaming.py | 77 | # Step 2: Create new request data with extended prompt |
| LOW | …/v1/streaming_input/test_gpu_model_runner_streaming.py | 96 | # Step 3: Update the request |
| LOW | …/v1/streaming_input/test_gpu_model_runner_streaming.py | 101 | # Step 4: Verify the request state was updated correctly |
| LOW | …/v1/streaming_input/test_gpu_model_runner_streaming.py | 131 | # Step 1: Create initial request state with one multimodal feature |
| LOW | …/v1/streaming_input/test_gpu_model_runner_streaming.py | 156 | # Step 2: Create new request data with additional multimodal feature |
| LOW | …/v1/streaming_input/test_gpu_model_runner_streaming.py | 176 | # Step 3: Update the request |
| LOW | …/v1/streaming_input/test_gpu_model_runner_streaming.py | 181 | # Step 4: Verify the request state was updated correctly |
| LOW | …1/ec_connector/integration/run_epd_correctness_test.sh | 459 | # Step 1: Run baseline |
| LOW | …1/ec_connector/integration/run_epd_correctness_test.sh | 462 | # Step 2: Test 1E + 1PD |
| LOW | …1/ec_connector/integration/run_epd_correctness_test.sh | 465 | # Step 3: Test baseline 1P + 1D |
| LOW | …1/ec_connector/integration/run_epd_correctness_test.sh | 468 | # Step 4: Test 1E + 1P + 1D |
| LOW | tests/kernels/test_fused_inv_rope_fp8_quant.py | 683 | # Step 1: In-place CUDA RoPE (same as production) |
| LOW | tests/kernels/test_fused_inv_rope_fp8_quant.py | 695 | # Step 2: Reshape + quant + reshape (same as production) |
| LOW | …ultimodal/generation/test_vit_backend_functionality.py | 415 | # Step 1: Backend filtering |
| LOW | …ultimodal/generation/test_vit_backend_functionality.py | 425 | # Step 2: Apply GPU marks dynamically |
| LOW | …ultimodal/generation/test_vit_backend_functionality.py | 430 | # Step 3: Route to appropriate handler |
| LOW | …ts/entrypoints/llm/test_mm_cache_external_injection.py | 91 | # Step 1: Normal requests to populate the cache |
| LOW | …ts/entrypoints/llm/test_mm_cache_external_injection.py | 99 | # Step 2: Use a second image to get valid expanded tokens and |
| LOW | …ntrypoints/weight_transfer/test_weight_transfer_llm.py | 263 | # Step 1: Initialize weight transfer engine |
| LOW | …ntrypoints/weight_transfer/test_weight_transfer_llm.py | 268 | # Step 2: Start weight update |
| LOW | …ntrypoints/weight_transfer/test_weight_transfer_llm.py | 271 | # Step 3: Update weights |
| LOW | …ntrypoints/weight_transfer/test_weight_transfer_llm.py | 282 | # Step 4: Finish weight update |
| LOW | tests/entrypoints/openai/responses/test_harmony.py | 1069 | # Step 1: Get a function call from the model |
| LOW | tests/entrypoints/openai/responses/test_harmony.py | 1095 | # Step 2: Build full conversation history |
| 87 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | setup.py | 656 | "build_tag": null, |
| HIGH | setup.py | 660 | "variant": null, |
| HIGH | csrc/cpu/generate_cpu_attn_dispatch.py | 156 | (__riscv_v_min_vlen == 128 || __riscv_v_min_vlen == 256) |
| HIGH | csrc/cpu/generate_cpu_attn_dispatch.py | 228 | "&& (__riscv_v_min_vlen == 128 || __riscv_v_min_vlen == 256)", |
| HIGH | csrc/quantization/machete/generate.py | 512 | "M > 256 && K <= 16384 && N <= 4096": ((128, 128), (2, 1, 1)), |
| HIGH | csrc/quantization/machete/generate.py | 515 | "M > 128 && K <= 4096 && N <= 4096": ((128, 64), (2, 1, 1)), |
| HIGH | csrc/quantization/machete/generate.py | 516 | "M > 128 && K <= 8192 && N <= 8192": ((128, 128), (2, 1, 1)), |
| HIGH | csrc/quantization/machete/generate.py | 519 | "M > 64 && K <= 4069 && N <= 4069": ((128, 32), (2, 1, 1)), |
| HIGH | csrc/quantization/machete/generate.py | 520 | "M > 64 && K <= 4069 && N <= 8192": ((128, 64), (2, 1, 1)), |
| HIGH | csrc/quantization/machete/generate.py | 521 | "M > 64 && K >= 8192 && N >= 12288": ((256, 128), (2, 1, 1)), |
| HIGH | csrc/quantization/machete/generate.py | 524 | "M > 32 && K <= 6144 && N <= 6144": ((128, 16), (1, 1, 1)), |
| HIGH | csrc/quantization/machete/generate.py | 525 | "M > 32 && K >= 16384 && N >= 12288": ((256, 64), (2, 1, 1)), |
| HIGH | csrc/quantization/machete/generate.py | 528 | "M > 16 && K <= 12288 && N <= 8192": ((128, 32), (2, 1, 1)), |
| HIGH | tests/v1/attention/test_trtllm_attention_integration.py | 140 | # Randomly permute blocks (starting from block 1; block 0 is null). |
| HIGH | tests/v1/attention/test_mla_backends.py | 256 | # Permute the context blocks (excluding block 0 which is null) |
| HIGH | tests/v1/attention/test_attention_backends.py | 164 | # Permute the context blocks (excluding block 0 which is null) |
| HIGH | tests/v1/core/test_scheduler.py | 2630 | num_blocks=5, # Can hold 64 tokens (first block is null) |
| HIGH | tests/v1/core/test_scheduler.py | 3412 | num_blocks=15, # can hold 244 tokens with 14 blocks (first block is null) |
| HIGH | tests/v1/core/test_scheduler.py | 3653 | num_blocks=11, # Can hold 160 tokens (first block is null) |
| HIGH | tests/v1/cudagraph/test_cudagraph_mode.py | 67 | # when above code raises, `llm` may be undefined, so we need to catch that |
| HIGH | tests/v1/cudagraph/test_cudagraph_mode.py | 123 | # when above code raises, `llm` may be undefined, so we need to catch that |
| HIGH | tests/tool_parsers/test_granite_tool_parser.py | 36 | "null_field": null, |
| HIGH | tests/tool_parsers/test_gemma4_tool_parser.py | 91 | # instead of `{"param": null}` for nullable tool parameters. |
| HIGH | tests/tool_parsers/test_gemma4_tool_parser.py | 94 | assert json.dumps(result) == '{"param": null}' |
| HIGH | tests/tool_parsers/test_granite_20b_fc_tool_parser.py | 36 | "null_field": null, |
| HIGH | tests/tool_parsers/test_olmo3_tool_parser.py | 34 | "role=null, " |
| HIGH | tests/tool_parsers/test_olmo3_tool_parser.py | 43 | '"role": null, ' |
| HIGH | tests/tool_parsers/test_pythonic_tool_parser.py | 35 | '"role": null, ' |
| HIGH | tests/tool_parsers/test_deepseekv3_tool_parser.py | 43 | """"bool_field": true, "null_field": null, """ |
| HIGH | tests/tool_parsers/test_internlm2_tool_parser.py | 54 | "null_field": null, |
| HIGH | tests/tool_parsers/test_llama4_pythonic_tool_parser.py | 35 | '"role": null, ' |
| HIGH | tests/tool_parsers/test_phi4mini_tool_parser.py | 51 | "null_field": null, |
| HIGH | tests/tool_parsers/test_lfm2_tool_parser.py | 38 | '"role": null, ' |
| HIGH | tests/tool_parsers/test_longcat_tool_parser.py | 54 | "null_field": null, |
| HIGH | tests/tool_parsers/test_hunyuan_a13b_tool_parser.py | 45 | '<tool_calls>[{"name": "get_weather", "arguments": {"city": "San Francisco", "metric": "celsius"}}, {"name": |
| HIGH | tests/tool_parsers/test_qwen3coder_tool_parser.py | 633 | # Multi non-null: anyOf[string, integer, null] → first non-null is string |
| HIGH | tests/tool_parsers/test_qwen3coder_tool_parser.py | 189 | [{"question": "Pick a color", "multiSelect": false, "answer": null}] |
| HIGH | …entrypoints/serve/disagg/test_return_routed_experts.py | 36 | '{"sliding_window": null}', |
| HIGH | tests/entrypoints/openai/test_return_routed_experts.py | 33 | '{"sliding_window": null}', |
| HIGH | vllm/_custom_ops.py | 1812 | assert k_times_2 % 2 == 0, "input width must be even (gate || up layout)" |
| HIGH | vllm/_custom_ops.py | 1921 | assert k_times_2 % 2 == 0, "input width must be even (gate || up layout)" |
| HIGH | vllm/_custom_ops.py | 1792 | input_tensor: The input tensor with gate || up layout [m_topk, k*2] |
| HIGH | vllm/v1/core/single_type_kv_cache_manager.py | 777 | # result [null] [null] ... [null] [hit block 1 (1st block contain |
| HIGH | vllm/v1/core/single_type_kv_cache_manager.py | 338 | every (non-null) block — the default for full attention. |
| HIGH | vllm/v1/core/single_type_kv_cache_manager.py | 731 | [null, null, block 3], otherwise, we return [null, null] |
| HIGH | vllm/v1/core/single_type_kv_cache_manager.py | 736 | we return 4 blocks[null, null, null, null] |
| HIGH | vllm/tool_parsers/utils.py | 280 | (null, true, false) that some models produce instead of Python |
| HIGH | …ibuted/kv_transfer/kv_connector/v1/flexkv_connector.py | 47 | cd FlexKV && bash build.sh |
| HIGH | …isaggregated/flexkv_connector/prefix_caching_flexkv.py | 12 | 2. cd FlexKV && bash build.sh |
| HIGH | .buildkite/scripts/generate-nightly-index.py | 209 | "build_tag": null, |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | vllm/v1/attention/backends/registry.py | 208 | Register or override a backend implementation. Args: backend: The AttentionBackendEnum member to register |
| HIGH | vllm/v1/attention/backends/mla/prefill/registry.py | 102 | Register or override an MLA prefill backend implementation. Args: backend: The MLAPrefillBackendEnum member |
| HIGH | vllm/v1/core/single_type_kv_cache_manager.py | 718 | For chunked local attention, we need to find the longest cache hit prefix of the blocks that is not lon |
| HIGH | vllm/v1/structured_output/utils.py | 290 | Check if grammar appears to use Lark syntax. Args: grammar_str: Input grammar string Returns: |
| HIGH | vllm/v1/structured_output/utils.py | 322 | Convert a Lark grammar string to EBNF format. EBNF reference: https://github.com/ggerganov/llama.cpp/blob/ |
| HIGH | vllm/v1/worker/utils.py | 264 | Select a block size that is supported by all backends and is a factor of kv_manager_block_size. If kv_mana |
| HIGH | vllm/tool_parsers/apertus_tool_parser.py | 137 | Buffers incoming delta chunks to prevent fragmentation of multi-token special tags. If a chunk |
| HIGH | vllm/tool_parsers/apertus_tool_parser.py | 181 | Extracts tool calls from a completely generated model response (Non-Streaming). Args: mode |
| HIGH | vllm/tool_parsers/apertus_tool_parser.py | 283 | Handles streaming chunks Args: previous_text: The complete model text generated prior to t |
| HIGH | vllm/tool_parsers/apertus_tool_parser.py | 494 | Calculates the exact string difference to safely append new tool parameters. This ensures characters l |
| HIGH | …ed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py | 738 | Get number of new tokens that can be loaded from the external KV cache beyond the num_computed_tokens. |
| HIGH | vllm/distributed/kv_transfer/kv_connector/v1/base.py | 459 | Get number of new tokens that can be loaded from the external KV cache beyond the num_computed_tokens. |
| HIGH | …ed/kv_transfer/kv_connector/v1/moriio/moriio_engine.py | 205 | Get remote allocation info for a request. Args: transfer_id:TransferId The request ID Retu |
| HIGH | …nector/v1/lmcache_integration/multi_process_adapter.py | 198 | Submit a new lookup request to LMCache if there is no ongoing request. Supports both token-based and h |
| HIGH | …nector/v1/lmcache_integration/multi_process_adapter.py | 601 | Check and get the finished store and retrieve requests. Args: finished_req_ids_from_engine |
| HIGH | …/kv_transfer/kv_connector/v1/p2p/tensor_memory_pool.py | 107 | Allocates a memory block of at least the requested size. Args: size (int): Minimum size of memory t |
| HIGH | …/kv_transfer/kv_connector/v1/p2p/tensor_memory_pool.py | 184 | Stores a CUDA tensor in pinned host memory. Args: tensor (torch.Tensor): CUDA tensor to store |
| HIGH | …/kv_transfer/kv_connector/v1/p2p/tensor_memory_pool.py | 229 | Loads a tensor from pinned host memory to the specified device. Args: addr (int): Address where ten |
| HIGH | vllm/distributed/weight_transfer/factory.py | 82 | Create a weight transfer engine instance. Args: config: Weight transfer configuration containing th |
| HIGH | vllm/distributed/weight_transfer/base.py | 86 | Construct typed init info from dict with validation. Args: init_dict: Dictionary containin |
| HIGH | vllm/distributed/weight_transfer/base.py | 106 | Construct typed update info from dict with validation. Args: update_dict: Dictionary conta |
| HIGH | vllm/logging_utils/formatter.py | 22 | Shortens a file path for logging display: - Removes leading 'vllm' folder if present. |
| HIGH | vllm/model_executor/kernels/linear/__init__.py | 441 | Choose a _KernelT that can implement the given config for the given compute capability. Attempts to choose the |
| HIGH | vllm/model_executor/kernels/linear/__init__.py | 619 | Choose an MPLinearKernel that can implement the given config for the given compute capability. Attempts to cho |
| HIGH | …/model_executor/layers/fused_moe/expert_map_manager.py | 337 | Map global expert ID to local expert ID. Args: global_id: Global expert ID (0 to global_nu |
| HIGH | vllm/model_executor/layers/fla/ops/chunk.py | 153 | Args: q (torch.Tensor): Queries of shape `[B, T, H, K]`. k (torch.Tensor): |
| HIGH | vllm/model_executor/layers/fla/ops/fused_recurrent.py | 530 | Args: q (torch.Tensor): queries of shape `[B, T, H, K]`. k (torch.Tensor): |
| HIGH | vllm/model_executor/models/keye_vl1_5.py | 79 | Return num_patches per video. Args: grid_thw: Tensor with shape [N, 3] containing temporal, height, wi |
| HIGH | vllm/model_executor/models/isaac.py | 246 | Apply pixel shuffle to a packed vision sequence without unpacking per image. Args: x (`torch.Tensor`): |
| HIGH | vllm/parser/parser_manager.py | 37 | Retrieve a registered or lazily registered Parser class. Args: name: The registered name o |
| HIGH | vllm/multimodal/audio.py | 90 | Normalize audio to the specified format. This function handles channel reduction for multi-channel audio, suppo |
| HIGH | vllm/benchmarks/lib/ready_checker.py | 25 | Wait for an endpoint to become available before starting benchmarks. Args: request_func: The async req |
| HIGH | vllm/entrypoints/chat_utils.py | 1440 | Parses a given multi-modal content part based on its type. Args: part: A dict containing the content p |
| HIGH | vllm/transformers_utils/gguf_utils.py | 174 | Extract vision config parameters from mmproj.gguf metadata. Reads vision encoder configuration from GGUF metadata f |
| HIGH | vllm/transformers_utils/processors/isaac.py | 192 | Convert normalized images into flattened ViT-style patches. Args: image (`torch.Tensor`): Tenso |
| HIGH | vllm/lora/resolver.py | 72 | Get a registered resolver instance by name. Args: resolver_name: Name of the resolver to get. |
| HIGH | …ications/chatbot/streamlit_openai_chatbot_webserver.py | 111 | Generate and stream LLM response with optional reasoning process. Args: messages (list): List of conversati |
| HIGH | benchmarks/benchmark_long_document_qa_throughput.py | 68 | Repeat each prompt in the list for a specified number of times. The order of prompts in the output list depends |
| HIGH | benchmarks/attention_benchmarks/batch_spec.py | 74 | Parse batch specification string into list of BatchRequest objects. Grammar: (<count>?) q<q_len>(k?) (s<seq_le |
| Severity | File | Line | Snippet |
|---|---|---|---|
| CRITICAL | rust/src/cmd/src/cli/unsupported.rs | 138 | /// - `vllm.entrypoints.cli.serve.ServeSubcommand.subparser_init(...)` |
| CRITICAL | tests/v1/e2e/general/test_mamba_prefix_cache.py | 773 | assert engine.llm_engine.engine_core.engine_core.scheduler.reset_prefix_cache() |
| CRITICAL | tests/distributed/test_torchrun_example_moe.py | 68 | llm.llm_engine.model_executor.driver_worker.worker.model_runner.model.parameters() |
| CRITICAL | tests/distributed/test_torchrun_example.py | 59 | llm.llm_engine.model_executor.driver_worker.worker.model_runner.model.parameters() |
| CRITICAL | tests/models/language/generation/test_gemma.py | 19 | lambda self: self.model_runner.model.language_model.model.normalizer.cpu().item() # noqa: E501 |
| CRITICAL | tests/models/language/generation/test_gemma.py | 24 | lambda self: self.model_runner.model.model.normalizer.cpu().item() |
| CRITICAL | vllm/v1/spec_decode/llm_base_proposer.py | 1311 | self.model.model.embed_tokens.weight.cpu(), |
| CRITICAL | vllm/model_executor/layers/fla/ops/utils.py | 170 | triton.runtime.driver.active.utils.get_device_properties(i)[ |
| CRITICAL | docs/training/layerwise.md | 124 | model = llm.llm_engine.engine_core.engine_core.model_executor.driver_worker.worker.get_model() |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/tool_parsers/test_kimi_k2_tool_parser.py | 85 | '{"to": "user@example.com", "subject": "Daily Update"}', |
| LOW | tests/tool_parsers/test_kimi_k2_tool_parser.py | 92 | {"to": "user@example.com", "subject": "Daily Update"}, |
| LOW | tests/tool_parsers/test_mistral_tool_parser.py | 303 | "name": "John Doe", |
| LOW | tests/tool_parsers/test_mistral_tool_parser.py | 693 | "name": "John Doe", |
| LOW | tests/tool_parsers/test_mistral_tool_parser.py | 1056 | "name": "John Doe", |
| LOW | tests/tool_parsers/test_mistral_tool_parser.py | 296 | """[TOOL_CALLS] [{"arguments":{"name": "John Doe"}, "name": "get_age"}]""", # noqa: E501 |
| LOW | tests/tool_parsers/test_mistral_tool_parser.py | 686 | """[TOOL_CALLS] [{"arguments": {"name": "John Doe"}, "name": "get_age"}]""", # noqa: E501 |
| LOW | tests/tool_parsers/test_mistral_tool_parser.py | 1049 | """[TOOL_CALLS] [{"arguments": {"name": "John Doe"}, "name": "get_age"}]""", # noqa: E501 |
| LOW | tests/tool_parsers/test_olmo3_tool_parser.py | 23 | "register_user(name='John Doe', " |
| LOW | tests/tool_parsers/test_olmo3_tool_parser.py | 31 | "register_user(name='John Doe', " |
| LOW | tests/tool_parsers/test_olmo3_tool_parser.py | 40 | arguments='{"name": "John Doe", ' |
| LOW | tests/tool_parsers/test_pythonic_tool_parser.py | 23 | "register_user(name='John Doe', " |
| LOW | tests/tool_parsers/test_pythonic_tool_parser.py | 32 | arguments='{"name": "John Doe", ' |
| LOW | tests/tool_parsers/test_lfm2_tool_parser.py | 26 | "register_user(name='John Doe', " |
| LOW | tests/tool_parsers/test_lfm2_tool_parser.py | 35 | arguments='{"name": "John Doe", ' |
| LOW | tests/tool_parsers/test_lfm2_tool_parser.py | 339 | "deliveryAddress='123 Main St')]" |
| LOW | tests/tool_parsers/test_hunyuan_a13b_tool_parser.py | 45 | '<tool_calls>[{"name": "get_weather", "arguments": {"city": "San Francisco", "metric": "celsius"}}, {"name": |
| LOW | tests/tool_parsers/test_hunyuan_a13b_tool_parser.py | 53 | "name": "John Doe", |
| LOW | tests/benchmarks/test_txt_slices_dataset.py | 20 | Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor |
| LOW | tests/benchmarks/test_txt_slices_dataset.py | 20 | Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tools/setup_deepgemm_pythons.sh | 6 | # Usage: |
| LOW | tools/vllm-rocm/generate-rocm-wheels-root-index.sh | 11 | # Usage: |
| LOW | docker/docker-bake.hcl | 5 | # Usage: |
| LOW | docker/entrypoints/test_vllm_nonroot_entrypoint.sh | 9 | # Usage: |
| LOW | …/nixl_integration/run_multi_connector_accuracy_test.sh | 15 | # Usage: |
| LOW | …nector/nixl_integration/spec_decode_acceptance_test.sh | 13 | # Usage: |
| LOW | …nixl_integration/run_multi_connector_edge_case_test.sh | 15 | # Usage: |
| LOW | examples/tool_calling/chat_with_tools_offline.py | 44 | # Usage: |
| LOW | examples/ray_serving/run_cluster.sh | 8 | # Usage: |
| LOW | examples/ray_serving/multi-node-serving.sh | 11 | # Example usage: |
| LOW | examples/generate/multimodal/mistral-small_offline.py | 51 | # Usage: |
| LOW | …nchmarks/attention_benchmarks/configs/mla_prefill.yaml | 14 | # Usage: |
| LOW | .buildkite/scripts/cache-rocm-base-wheels.sh | 10 | # Usage: |
| LOW | .buildkite/scripts/tool_call/run-bfcl-eval.sh | 5 | # Usage: |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | …les/pooling/embed/openai_embedding_long_text/client.py | 22 | --api-key your-api-key |
| HIGH | …les/pooling/embed/openai_embedding_long_text/client.py | 32 | --api-key your-api-key |
| HIGH | …les/pooling/embed/openai_embedding_long_text/client.py | 44 | API_KEY = "your-api-key" # Replace with your actual API key |
| HIGH | …es/pooling/embed/openai_embedding_long_text/service.sh | 19 | API_KEY=${API_KEY:-"your-api-key"} |
| HIGH | .github/ISSUE_TEMPLATE/400-bug-report.yml | 20 | Consider redacting or replacing sensitive values with placeholders like `<YOUR_TOKEN_HERE>` when sharing configura |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/models/multimodal/generation/test_common.py | 64 | # model arch happens to be a substring of another one, you can add a |
| MEDIUM | tests/models/multimodal/generation/test_common.py | 85 | # NOTE you can add --collect-only to any of the above commands to see |
| LOW | …distributed/kv_transfer/kv_connector/v1/nixl/worker.py | 1974 | # while processing the next batch, we make sure to only set an |
| MEDIUM | vllm/model_executor/models/interfaces.py | 224 | as a language model component. |
| MEDIUM | …ache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh | 24 | # secure random value. This is set to a fixed value for demonstration purposes only. |
| MEDIUM | examples/rl/rlhf_ipc.py | 17 | for demonstration purposes we simply zero out the weights. |
| MEDIUM | .buildkite/test-amd.yaml | 1 | # In this file, you can add more tests to run either by adding a new step or |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | …tic_prefix_caching/automatic_prefix_caching_offline.py | 26 | # A prompt containing a large markdown table. The table is randomly generated by GPT-4. |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | …gated/disaggregated_serving/moriio_toy_proxy_server.py | 227 | async def handle_request(api: str, request: Request): |
| LOW | …aggregated/p2p_nccl_xpyd/disagg_proxy_p2p_nccl_xpyd.py | 125 | async def handle_request(): |
| LOW | …marks/disagg_benchmarks/disagg_prefill_proxy_server.py | 243 | async def handle_request(): |
| LOW | benchmarks/disagg_benchmarks/round_robin_proxy.py | 16 | async def handle_request(self, request): |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/v1/e2e/general/test_streaming_input.py | 502 |