Repository Analysis

sgl-project/sglang

SGLang is a high-performance serving framework for large language models and multimodal models.

15.1 Moderate AI signal View on GitHub
15.1
Adjusted Score
15.1
Raw Score
100%
Time Factor
2026-05-30
Last Push
28,461
Stars
Python
Language
1,524,079
Lines of Code
5242
Files
17209
Pattern Hits
2026-05-31
Scan Date

Score History

Severity Breakdown

CRITICAL 6HIGH 472MEDIUM 1918LOW 14813

Pattern Findings

17209 matches across 20 categories. Click a row to expand file-level details.

Hyper-Verbose Identifiers9004 hits · 8835 pts
SeverityFileLineSnippet
LOWbenchmark/dspy/bench_dspy_intro.py136 def validate_context_and_answer(example, pred, trace=None):
LOW…ing_window_attention_triton/bench_triton_swa_kernel.py11def extend_attention_fwd_torch(
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py122def cleanup_flashinfer_workspace(ipc_handles):
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py85def setup_flashinfer_workspace(
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py152 def get_trtllm_fused_allreduce_kwargs(self):
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py162def flashinfer_fused_allreduce_rmsnorm(
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py287def standard_allreduce_rmsnorm(
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py397def standard_allreduce_rmsnorm_native(
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py478def standard_allreduce_rmsnorm_native_compiled(
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py894def prepare_results_with_speedups(results_dict):
LOW…uler_batch/benchmark_write_req_to_token_pool_triton.py12def write_req_to_token_pool_triton(
LOW…uler_batch/benchmark_write_req_to_token_pool_triton.py49def write_req_to_token_pool_triton_optimize(
LOW…uler_batch/benchmark_write_req_to_token_pool_triton.py91def write_req_to_token_pool_reference(
LOW…uler_batch/benchmark_write_req_to_token_pool_triton.py114def test_write_req_to_token_pool():
LOWbenchmark/kernels/fused_moe_triton/common_utils.py23def calculate_shard_intermediate_size(
LOWbenchmark/kernels/fused_moe_triton/common_utils.py172def get_rocm_configs_compute_bound() -> List[Dict[str, int]]:
LOWbenchmark/kernels/fused_moe_triton/common_utils.py195def get_configs_compute_bound() -> List[Dict[str, int]]:
LOW…pseek/benchmark_deepgemm_dsv3_router_gemm_blackwell.py137def get_benchmark_plot_friendly(tp_sizes):
LOW…nels/deepseek/benchmark_deepgemm_fp8_gemm_blackwell.py231def get_benchmark_plot_friendly(tp_size):
LOW…hmark/kernels/quantization/tuning_block_wise_kernel.py148def get_rocm_configs_compute_bound():
LOW…hmark/kernels/quantization/tuning_block_wise_kernel.py171def get_configs_compute_bound():
LOW…s/decoding_attention_triton/triton_flashinfer_cudnn.py98def decode_attention_flashinfer(dtype, head_num_q, head_num_kv):
LOWbenchmark/mmmu/bench_sglang.py127async def process_sample_with_semaphore(
LOWbenchmark/mmmu/eval_utils.py292def _parse_explicit_multi_choice_answer(response, all_choices):
LOWbenchmark/mmmu/eval_utils.py303def parse_multi_choice_response(response, all_choices, index2ans):
LOW…hmark/bench_pynccl_allocator/bench_segment_tracking.py56def bench_register_segments_with_comm(
LOW…hmark/bench_pynccl_allocator/bench_segment_tracking.py114def bench_with_various_segment_counts(
LOWbenchmark/scheduler/bench_token_storage.py84def _batch_tensor_from_pyarrays(parts: list[array]) -> torch.Tensor:
LOWbenchmark/scheduler/bench_token_storage.py256def microbench_torch_tensor_paths(
LOWbenchmark/hicache/bench_mix.py299async def async_request_sglang_generate(
LOWbenchmark/hicache/bench_warm_cache.py90def _create_bench_client_session() -> aiohttp.ClientSession:
LOWbenchmark/hicache/bench_warm_cache.py97async def async_request_sglang_generate(
LOWbenchmark/hicache/bench_warm_cache.py413def maybe_write_summary_jsonl(
LOWbenchmark/hicache/bench_warm_cache.py472async def benchmark_shared_prefix_pct(
LOWbenchmark/hicache/bench_serving.py71async def async_request_openai_completions(
LOWbenchmark/hicache/data_processing.py150def sample_ultrachat_requests(
LOWbenchmark/hicache/data_processing.py434def sample_generated_shared_prefix_requests(
LOWbenchmark/prefill_only/bench_score.py65def create_score_request_builder():
LOWbenchmark/prefill_only/bench_score.py80 def generate_text_with_token_count_local(num_toks):
LOWbenchmark/prefill_only/bench_score.py120def build_warmup_score_request() -> dict:
LOWbenchmark/prefill_only/util.py46def generate_text_with_token_count(
LOWbenchmark/prefill_only/util.py106def prepare_all_requests_parallel(
LOWbenchmark/prefill_only/util.py398async def perform_global_warmup_and_freeze(
LOWbenchmark/prefill_only/bench_embeddings.py100def validate_embeddings_response(response_data: dict) -> bool:
LOWbenchmark/prefill_only/bench_embeddings.py110def build_warmup_embeddings_request() -> dict:
LOW…nchmark/bench_in_batch_prefix/bench_in_batch_prefix.py76def test_batch_by_batch_with_hint(all_prompts, gen_len):
LOWbenchmark/asr/bench_sglang.py55def run_asr_transcription_sync(client, model_name, y, sr, language=None):
LOWbenchmark/asr/bench_sglang.py75def run_asr_transcription_stream_sync(
LOWbenchmark/benchmark_batch/benchmark_tokenizer.py179def generate_random_token_ids(*, num_prompts, num_tokens, tokenizer):
LOWbenchmark/lora/lora_bench.py48async def async_request_openai_completions(
LOW…ark/bench_linear_attention/bench_cutedsl_kda_decode.py155def run_prefill_then_decode_baseline(inp):
LOW…ark/bench_linear_attention/bench_cutedsl_kda_decode.py191def run_prefill_then_decode_cutedsl(inp):
LOWbenchmark/generative_agents/agent_functions.py56def generate_event_triple_prompt(persona_name, action):
LOWbenchmark/generative_agents/agent_functions.py89def generate_pronunciatio_prompt(action):
LOWbenchmark/generative_agents/agent_functions.py158def action_location_sector_prompt(
LOWbenchmark/generative_agents/agent_functions.py260def action_location_object_prompt(
LOWbenchmark/llm_judge/bench_other.py52async def multi_dimension_judge_async(article, generate):
LOW…l/sgl-router/tests/scripts/generate_parity_fixtures.py63def load_tokenizer_with_fallback(primary, fallback, slug):
LOWexperimental/sgl-router/tests/e2e/conftest.py169def build_smoke_router_config(
LOW…sgl-router/tests/e2e/k8s_integration/test_lifecycle.py54 def test_router_routes_after_scale_up(self, router_url):
8944 more matches not shown…
Decorative Section Separators1489 hits · 4868 pts
SeverityFileLineSnippet
MEDIUM…hmark/kernels/quantization/tuning_block_wise_kernel.py13# ==============================================================================
MEDIUMbenchmark/kernels/lora_csgmv/tune_lora_csgmv.py128# ---------------------------------------------------------------------------
MEDIUMbenchmark/kernels/lora_csgmv/tune_lora_csgmv.py130# ---------------------------------------------------------------------------
MEDIUMbenchmark/kernels/lora_csgmv/tune_lora_csgmv.py181# ---------------------------------------------------------------------------
MEDIUMbenchmark/kernels/lora_csgmv/tune_lora_csgmv.py183# ---------------------------------------------------------------------------
MEDIUMbenchmark/kernels/lora_csgmv/tune_lora_csgmv.py333# ---------------------------------------------------------------------------
MEDIUMbenchmark/kernels/lora_csgmv/tune_lora_csgmv.py335# ---------------------------------------------------------------------------
MEDIUMbenchmark/kernels/lora_csgmv/tune_lora_csgmv.py390# ---------------------------------------------------------------------------
MEDIUMbenchmark/kernels/lora_csgmv/tune_lora_csgmv.py392# ---------------------------------------------------------------------------
MEDIUMbenchmark/lora/lora_bench.py13# ==============================================================================
MEDIUMbenchmark/bench_linear_attention/bench_gdn_decode.py39# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_decode.py41# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_decode.py89# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_decode.py91# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_decode.py155# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_decode.py157# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_decode.py206# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_decode.py208# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_decode.py299# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_decode.py301# ---------------------------------------------------------------------------
MEDIUM…rk/bench_linear_attention/bench_gdn_prefill_cutedsl.py38# ---------------------------------------------------------------------------
MEDIUM…rk/bench_linear_attention/bench_gdn_prefill_cutedsl.py40# ---------------------------------------------------------------------------
MEDIUM…rk/bench_linear_attention/bench_gdn_prefill_cutedsl.py68# ---------------------------------------------------------------------------
MEDIUM…rk/bench_linear_attention/bench_gdn_prefill_cutedsl.py70# ---------------------------------------------------------------------------
MEDIUM…rk/bench_linear_attention/bench_gdn_prefill_cutedsl.py117# ---------------------------------------------------------------------------
MEDIUM…rk/bench_linear_attention/bench_gdn_prefill_cutedsl.py119# ---------------------------------------------------------------------------
MEDIUM…rk/bench_linear_attention/bench_gdn_prefill_cutedsl.py173# ---------------------------------------------------------------------------
MEDIUM…rk/bench_linear_attention/bench_gdn_prefill_cutedsl.py175# ---------------------------------------------------------------------------
MEDIUM…rk/bench_linear_attention/bench_gdn_prefill_cutedsl.py232# ---------------------------------------------------------------------------
MEDIUM…rk/bench_linear_attention/bench_gdn_prefill_cutedsl.py234# ---------------------------------------------------------------------------
MEDIUM…rk/bench_linear_attention/bench_gdn_prefill_cutedsl.py323# ---------------------------------------------------------------------------
MEDIUM…rk/bench_linear_attention/bench_gdn_prefill_cutedsl.py325# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_prefill.py37# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_prefill.py39# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_prefill.py96# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_prefill.py98# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_prefill.py161# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_prefill.py163# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_prefill.py247# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_prefill.py249# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_prefill.py350# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_prefill.py352# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_prefill.py448# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_linear_attention/bench_gdn_prefill.py450# ---------------------------------------------------------------------------
MEDIUMbenchmark/bench_rope/benchmark_rope_index.py21# -----------------------------
MEDIUMbenchmark/bench_rope/benchmark_rope_index.py23# -----------------------------
MEDIUMbenchmark/bench_rope/benchmark_rope_index.py39# -----------------------------
MEDIUMbenchmark/bench_rope/benchmark_rope_index.py41# -----------------------------
MEDIUMexperimental/sgl-router/tests/e2e/conftest.py246# ---------------------------------------------------------------------------
MEDIUMexperimental/sgl-router/tests/e2e/conftest.py248# ---------------------------------------------------------------------------
MEDIUM…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh37# ---------------------------------------------------------------------------
MEDIUM…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh39# ---------------------------------------------------------------------------
MEDIUM…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh49# ---------------------------------------------------------------------------
MEDIUM…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh51# ---------------------------------------------------------------------------
MEDIUM…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh74# ---------------------------------------------------------------------------
MEDIUM…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh76# ---------------------------------------------------------------------------
MEDIUM…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh81# ---------------------------------------------------------------------------
MEDIUM…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh83# ---------------------------------------------------------------------------
MEDIUM…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh88# ---------------------------------------------------------------------------
MEDIUM…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh92# ---------------------------------------------------------------------------
1429 more matches not shown…
Cross-File Repetition424 hits · 2120 pts
SeverityFileLineSnippet
HIGH…hmark/kernels/quantization/tuning_block_wise_kernel.py0this function performs matrix multiplication with block-wise quantization. it takes two input tensors `a` and `b` with s
HIGHpython/sglang/srt/layers/quantization/int8_kernel.py0this function performs matrix multiplication with block-wise quantization. it takes two input tensors `a` and `b` with s
HIGHpython/sglang/srt/layers/quantization/fp8_kernel.py0this function performs matrix multiplication with block-wise quantization. it takes two input tensors `a` and `b` with s
HIGHbenchmark/tree_of_thought_v0/bench_sglang.py0please generate a high-level plan for solving the following question. as the first step, just say what method and idea y
HIGHbenchmark/tree_of_thought_v0/bench_other.py0please generate a high-level plan for solving the following question. as the first step, just say what method and idea y
HIGHbenchmark/tree_of_thought_deep/bench_sglang.py0please generate a high-level plan for solving the following question. as the first step, just say what method and idea y
HIGHbenchmark/tree_of_thought_deep/lmql_funcs.py0please generate a high-level plan for solving the following question. as the first step, just say what method and idea y
HIGHbenchmark/tree_of_thought_deep/bench_other.py0please generate a high-level plan for solving the following question. as the first step, just say what method and idea y
HIGHbenchmark/tree_of_thought_v0/bench_sglang.py0the plan looks good! now, use real numbers and do the calculation. please solve the question step-by-step according to t
HIGHbenchmark/tree_of_thought_v0/bench_other.py0the plan looks good! now, use real numbers and do the calculation. please solve the question step-by-step according to t
HIGHbenchmark/tree_of_thought_deep/bench_sglang.py0the plan looks good! now, use real numbers and do the calculation. please solve the question step-by-step according to t
HIGHbenchmark/tree_of_thought_deep/lmql_funcs.py0the plan looks good! now, use real numbers and do the calculation. please solve the question step-by-step according to t
HIGHbenchmark/tree_of_thought_deep/bench_other.py0the plan looks good! now, use real numbers and do the calculation. please solve the question step-by-step according to t
HIGHbenchmark/tip_suggestion/bench_sglang.py0please expand a tip for a topic into a detailed paragraph. topic: staying healthy tip: regular exercise paragraph: incor
HIGHbenchmark/tip_suggestion/lmql_funcs.py0please expand a tip for a topic into a detailed paragraph. topic: staying healthy tip: regular exercise paragraph: incor
HIGHbenchmark/tip_suggestion/bench_other.py0please expand a tip for a topic into a detailed paragraph. topic: staying healthy tip: regular exercise paragraph: incor
HIGHbenchmark/json_jump_forward/bench_sglang.py0"house": "(gryffindor|slytherin|ravenclaw|hufflepuff)",\n
HIGHbenchmark/json_jump_forward/bench_other.py0"house": "(gryffindor|slytherin|ravenclaw|hufflepuff)",\n
HIGHexamples/frontend_language/usage/json_decode.py0"house": "(gryffindor|slytherin|ravenclaw|hufflepuff)",\n
HIGHbenchmark/json_jump_forward/bench_sglang.py0"blood status": "(pure-blood|half-blood|muggle-born)",\n
HIGHbenchmark/json_jump_forward/bench_other.py0"blood status": "(pure-blood|half-blood|muggle-born)",\n
HIGHexamples/frontend_language/usage/json_decode.py0"blood status": "(pure-blood|half-blood|muggle-born)",\n
HIGHbenchmark/json_jump_forward/bench_sglang.py0"occupation": "(student|teacher|auror|ministry of magic|death eater|order of the phoenix)",\n
HIGHbenchmark/json_jump_forward/bench_other.py0"occupation": "(student|teacher|auror|ministry of magic|death eater|order of the phoenix)",\n
HIGHexamples/frontend_language/usage/json_decode.py0"occupation": "(student|teacher|auror|ministry of magic|death eater|order of the phoenix)",\n
HIGHbenchmark/tree_of_thought_deep/bench_sglang.py0okay. now, evaluate your own solution and give it a score on a scale of 1 to 5. please do rigorous check of the correctn
HIGHbenchmark/tree_of_thought_deep/lmql_funcs.py0okay. now, evaluate your own solution and give it a score on a scale of 1 to 5. please do rigorous check of the correctn
HIGHbenchmark/tree_of_thought_deep/bench_other.py0okay. now, evaluate your own solution and give it a score on a scale of 1 to 5. please do rigorous check of the correctn
HIGHbenchmark/tree_of_thought_deep/bench_sglang.py0based on your reflection, do you change your mind? now, give me the final answer after careful consideration.
HIGHbenchmark/tree_of_thought_deep/lmql_funcs.py0based on your reflection, do you change your mind? now, give me the final answer after careful consideration.
HIGHbenchmark/tree_of_thought_deep/bench_other.py0based on your reflection, do you change your mind? now, give me the final answer after careful consideration.
HIGHtest/srt/cpu/test_qwen3.py0derives `query`, `key` and `value` tensors from `mixed_qkvzba`.
HIGHtest/registered/cpu/test_qwen3.py0derives `query`, `key` and `value` tensors from `mixed_qkvzba`.
HIGHpython/sglang/srt/models/qwen3_5.py0derives `query`, `key` and `value` tensors from `mixed_qkvzba`.
HIGHpython/sglang/srt/models/qwen3_next.py0derives `query`, `key` and `value` tensors from `mixed_qkvzba`.
HIGHtest/srt/cpu/utils.py0matrix multiplication function that supports per-token input quantization and per-column weight quantization
HIGHtest/registered/quant/test_int8_kernel.py0matrix multiplication function that supports per-token input quantization and per-column weight quantization
HIGHtest/registered/cpu/utils.py0matrix multiplication function that supports per-token input quantization and per-column weight quantization
HIGH…t/registered/moe/test_triton_moe_channel_fp8_kernel.py0matrix multiplication function that supports per-token input quantization and per-column weight quantization
HIGHtest/srt/cpu/utils.py0this function performs fused moe with per-column int8 quantization using native torch.
HIGHtest/registered/quant/test_int8_kernel.py0this function performs fused moe with per-column int8 quantization using native torch.
HIGHtest/registered/cpu/utils.py0this function performs fused moe with per-column int8 quantization using native torch.
HIGH…t/registered/moe/test_triton_moe_channel_fp8_kernel.py0this function performs fused moe with per-column int8 quantization using native torch.
HIGHtest/srt/cpu/test_causal_conv1d.py0x: (batch, dim, seqlen) weight: (dim, width) bias: (dim,) initial_states: (batch, dim, width - 1) final_states_out: (bat
HIGHtest/registered/layers/mamba/test_causal_conv1d.py0x: (batch, dim, seqlen) weight: (dim, width) bias: (dim,) initial_states: (batch, dim, width - 1) final_states_out: (bat
HIGHtest/registered/cpu/test_causal_conv1d.py0x: (batch, dim, seqlen) weight: (dim, width) bias: (dim,) initial_states: (batch, dim, width - 1) final_states_out: (bat
HIGHsgl-kernel/tests/test_causal_conv1d.py0x: (batch, dim, seqlen) weight: (dim, width) bias: (dim,) initial_states: (batch, dim, width - 1) final_states_out: (bat
HIGHtest/srt/cpu/test_causal_conv1d.py0x: (batch, dim) or (batch, dim, seqlen) conv_state: (batch, dim, state_len), where state_len >= width - 1 weight: (dim,
HIGHtest/registered/layers/mamba/test_causal_conv1d.py0x: (batch, dim) or (batch, dim, seqlen) conv_state: (batch, dim, state_len), where state_len >= width - 1 weight: (dim,
HIGHtest/registered/cpu/test_causal_conv1d.py0x: (batch, dim) or (batch, dim, seqlen) conv_state: (batch, dim, state_len), where state_len >= width - 1 weight: (dim,
HIGHsgl-kernel/tests/test_causal_conv1d.py0x: (batch, dim) or (batch, dim, seqlen) conv_state: (batch, dim, state_len), where state_len >= width - 1 weight: (dim,
HIGHtest/manual/quant/test_block_fp8.py0this function performs matrix multiplication with block-wise quantization using native torch. it takes two input tensors
HIGHtest/manual/quant/test_block_fp8_deep_gemm_blackwell.py0this function performs matrix multiplication with block-wise quantization using native torch. it takes two input tensors
HIGHtest/registered/quant/test_block_int8.py0this function performs matrix multiplication with block-wise quantization using native torch. it takes two input tensors
HIGHtest/manual/nightly/test_vlms_vit_cuda_graph.py0evaluate a vlm on the mmmu validation set with lmms‑eval. only `model_version` (checkpoint) and `chat_template` vary; we
HIGHtest/manual/nightly/test_vlms_vit_flashinfer_cudnn.py0evaluate a vlm on the mmmu validation set with lmms‑eval. only `model_version` (checkpoint) and `chat_template` vary; we
HIGHtest/manual/nightly/test_vlms_piecewise_cuda_graph.py0evaluate a vlm on the mmmu validation set with lmms‑eval. only `model_version` (checkpoint) and `chat_template` vary; we
HIGHpython/sglang/test/ascend/vlm_utils.py0evaluate a vlm on the mmmu validation set with lmms‑eval. only `model_version` (checkpoint) and `chat_template` vary; we
HIGHtest/manual/nightly/test_vlms_vit_cuda_graph.py0common method to run vlm mmmu benchmark test. args: model: model to test output_path: path for output logs test_name: op
HIGHtest/manual/nightly/test_vlms_vit_flashinfer_cudnn.py0common method to run vlm mmmu benchmark test. args: model: model to test output_path: path for output logs test_name: op
364 more matches not shown…
Unused Imports1818 hits · 1706 pts
SeverityFileLineSnippet
LOWbenchmark/scheduler/bench_token_storage.py25
LOWbenchmark/hicache/perf.py1
LOW…router/tests/scripts/generate_kv_events_hash_parity.py48
LOW…perimental/sgl-router/tests/e2e/test_tokenize_smoke.py5
LOWexperimental/sgl-router/tests/e2e/conftest.py21
LOWexperimental/sgl-router/tests/e2e/test_chat_smoke.py5
LOWexperimental/sgl-router/tests/e2e/test_chat_smoke.py8
LOWexperimental/sgl-router/tests/e2e/infra/gateway.py26
LOWexperimental/sgl-router/tests/e2e/infra/model_specs.py16
LOWexperimental/sgl-router/tests/e2e/infra/model_pool.py19
LOW…al/sgl-router/tests/e2e/k8s_integration/fake_worker.py10
LOW…sgl-router/tests/e2e/k8s_integration/test_lifecycle.py14
LOW…sgl-router/tests/e2e/k8s_integration/test_lifecycle.py16
LOW…sgl-router/tests/e2e/k8s_integration/test_lifecycle.py19
LOW…sgl-router/tests/e2e/k8s_integration/test_lifecycle.py20
LOW…ental/sgl-router/tests/e2e/k8s_integration/conftest.py12
LOW…sgl-router/tests/e2e/k8s_integration/test_discovery.py8
LOW…sgl-router/tests/e2e/k8s_integration/test_discovery.py11
LOW…sgl-router/tests/e2e/k8s_integration/test_discovery.py12
LOW…uter/tests/e2e/k8s_integration/test_cross_namespace.py15
LOW…uter/tests/e2e/k8s_integration/test_cross_namespace.py18
LOW…outer/tests/e2e/k8s_integration/test_reconciliation.py16
LOW…outer/tests/e2e/k8s_integration/test_reconciliation.py18
LOW…l-router/tests/e2e/chat_completions/test_validation.py14
LOW…ts/e2e/chat_completions/test_two_router_convergence.py38
LOWtest/manual/test_ray_engine.py25
LOWtest/manual/quant/test_block_fp8.py679
LOWtest/manual/layers/moe/bench_mxfp4_sm90_kernels.py21
LOWtest/manual/hicache/test_pp_with_hicache.py79
LOW…ion/unittests/dual_chunk/test_dual_chunk_flash_attn.py48
LOW…ed/unit/layers/quantization/test_mxfp4_sm90_cutlass.py14
LOW…ed/unit/layers/quantization/test_mxfp4_sm90_cutlass.py174
LOW…d/unit/hardware_backend/mlx/test_attention_patching.py3
LOW…istered/unit/hardware_backend/mlx/test_quantization.py13
LOWtest/registered/unit/distributed/test_parallel_state.py37
LOW…egistered/unit/utils/test_hf_transformers_fastokens.py19
LOW…t/registered/unit/model_loader/test_modelopt_export.py29
LOW…saggregation/test_disaggregation_decode_radix_cache.py24
LOW…saggregation/test_disaggregation_decode_radix_cache.py32
LOWtest/registered/layers/test_fla_layernorm_guard.py1
LOW…gistered/models_e2e/test_deepseek_v4_flash_fp4_h200.py28
LOW…st/registered/openai_server/basic/test_http2_server.py24
LOWtest/registered/prefill_only/test_serving_rerank.py134
LOWtest/registered/observability/test_tracing.py57
LOWtest/registered/vlm/test_vision_openai_server_a.py12
LOWtest/registered/amd/test_wan22_fp8_mla.py3
LOW…t/registered/debug_utils/comparator/testing_helpers.py3
LOW…gistered/debug_utils/comparator/test_meta_overrider.py3
LOWtest/registered/debug_utils/comparator/test_e2e_demo.py9
LOW…tils/comparator/aligner/token_aligner/test_executor.py1
LOW…ered/breakable_cuda_graph/test_breakable_cuda_graph.py37
LOW…ered/breakable_cuda_graph/test_breakable_cuda_graph.py52
LOW…ered/breakable_cuda_graph/test_breakable_cuda_graph.py198
LOW…ered/breakable_cuda_graph/test_breakable_cuda_graph.py260
LOWpython/sglang/kernel_api_logging.py7
LOWpython/sglang/__init__.py35
LOWpython/sglang/__init__.py36
LOWpython/sglang/__init__.py36
LOWpython/sglang/__init__.py36
LOWpython/sglang/__init__.py36
1758 more matches not shown…
Excessive Try-Catch Wrapping1416 hits · 1670 pts
SeverityFileLineSnippet
LOWbenchmark/bench_adaptive_speculative.py70 except Exception as e:
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py117 except Exception as e:
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py130 except Exception as e:
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py47except Exception: # pragma: no cover - fallback on non-supported platforms
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py650 except Exception as e:
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py664 except Exception as e:
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py683 except Exception as e:
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py700 except Exception as e:
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py720 except Exception as e:
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py737 except Exception as e:
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py762 except Exception as e:
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py787 except Exception as e:
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py811 except Exception as e:
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py828 except Exception as e:
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py854 except Exception as e:
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py882 except Exception as e:
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py1083 except Exception as e:
LOWbenchmark/kernels/all_reduce/benchmark_aiter.py227 except Exception as e:
LOWbenchmark/kernels/all_reduce/benchmark_aiter.py237 except Exception as e:
LOWbenchmark/kernels/all_reduce/benchmark_aiter.py253 except Exception as e:
LOWbenchmark/kernels/all_reduce/benchmark_aiter.py265 except Exception as e:
LOWbenchmark/kernels/all_reduce/benchmark_aiter.py305 except Exception:
LOWbenchmark/kernels/all_reduce/benchmark_all_reduce.py245 except Exception as e:
LOWbenchmark/kernels/all_reduce/benchmark_all_reduce.py255 except Exception as e:
LOWbenchmark/kernels/all_reduce/benchmark_all_reduce.py274 except Exception as e:
LOWbenchmark/kernels/all_reduce/benchmark_all_reduce.py284 except Exception as e:
LOWbenchmark/kernels/all_reduce/benchmark_all_reduce.py324 except Exception:
LOWbenchmark/kernels/quantization/bench_fp4_quant.py114 except Exception as e:
LOWbenchmark/kernels/lora_csgmv/tune_lora_csgmv.py228 except Exception:
LOWbenchmark/kernels/lora_csgmv/tune_lora_csgmv.py305 except Exception:
LOWbenchmark/mmmu/bench_sglang.py64 except Exception:
LOWbenchmark/mmmu/bench_hf.py35 except Exception as first_exception:
LOWbenchmark/mmmu/bench_hf.py60 except Exception as second_exception:
LOWbenchmark/mmmu/eval_utils.py195 except Exception as exc:
MEDIUMbenchmark/mmmu/eval_utils.py514 print(f"Error to extract answer from: {pred_i}")
LOWbenchmark/json_schema/bench_sglang.py104 except Exception as e:
MEDIUM…hmark/bench_pynccl_allocator/bench_segment_tracking.py195 print("Error: CUDA is not available. This benchmark requires a GPU.")
LOWbenchmark/hicache/bench_mix.py362 except Exception as e:
LOWbenchmark/hicache/bench_mix.py423 except Exception as e:
MEDIUMbenchmark/hicache/bench_mix.py479 print(f"Error processing response for client {user_data}: {e}")
MEDIUMbenchmark/hicache/bench_mix.py417def handle_request(self, user_data):
LOWbenchmark/hicache/bench_warm_cache.py169 except Exception as exc:
LOWbenchmark/hicache/bench_serving.py194 except Exception:
LOWbenchmark/hicache/bench_serving.py214 except Exception:
LOWbenchmark/hicache/bench_serving.py273 except Exception as e:
MEDIUMbenchmark/hicache/bench_serving.py632 print(f"Error running benchmark for request rate: {request_rate}")
LOWbenchmark/hicache/bench_serving.py739 except Exception as e:
MEDIUMbenchmark/hicache/bench_multiturn.py180 print(f"Error writing to JSONL file: {e}")
LOWbenchmark/hicache/bench_multiturn.py395 except Exception as e:
MEDIUMbenchmark/hicache/bench_multiturn.py543 print(f"Error processing response for client {client_id}: {e}")
LOWbenchmark/hicache/bench_multiturn.py552 except Exception as e:
LOWbenchmark/prefill_only/bench_score.py108 except Exception as e:
MEDIUMbenchmark/prefill_only/bench_score.py109 print(f"Error building request {index}: {e}")
LOWbenchmark/prefill_only/util.py131 except Exception as e:
MEDIUMbenchmark/prefill_only/util.py132 print(f"Error building request {index}: {e}")
LOWbenchmark/prefill_only/util.py160 except Exception as e:
MEDIUMbenchmark/prefill_only/util.py161 print(f"Error processing request result: {e}")
LOWbenchmark/prefill_only/util.py278 except Exception as e:
LOWbenchmark/prefill_only/util.py327 except Exception as e:
MEDIUMbenchmark/prefill_only/util.py328 print(f"Error sending {profile_text} request: {e}")
1356 more matches not shown…
Deep Nesting1463 hits · 1308 pts
SeverityFileLineSnippet
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py605
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py894
LOW…hmark/kernels/all_reduce/benchmark_fused_ar_rms_amd.py367
LOW…/fused_moe_triton/benchmark_torch_compile_fused_moe.py16
LOWbenchmark/kernels/fused_moe_triton/common_utils.py32
LOWbenchmark/kernels/fused_moe_triton/common_utils.py172
LOWbenchmark/kernels/fused_moe_triton/common_utils.py195
LOW…hmark/kernels/quantization/tuning_block_wise_kernel.py148
LOW…hmark/kernels/quantization/tuning_block_wise_kernel.py171
LOWbenchmark/kernels/lora_csgmv/tune_lora_csgmv.py133
LOWbenchmark/kernels/lora_csgmv/tune_lora_csgmv.py151
LOWbenchmark/kernels/deepep/deepep_utils.py157
LOWbenchmark/kernels/deepep/tuning_deepep.py30
LOW…s/decoding_attention_triton/triton_flashinfer_cudnn.py159
LOWbenchmark/mmmu/eval_utils.py154
LOWbenchmark/mmmu/eval_utils.py303
LOWbenchmark/mmmu/eval_utils.py423
LOWbenchmark/mmmu/eval_utils.py528
LOWbenchmark/mmmu/eval_utils.py430
LOWbenchmark/json_schema/bench_sglang.py29
LOWbenchmark/line_retrieval/bench_sglang.py30
LOWbenchmark/hicache/bench_mix.py299
LOWbenchmark/hicache/nextqa.py9
LOWbenchmark/hicache/bench_warm_cache.py97
LOWbenchmark/hicache/bench_serving.py71
LOWbenchmark/hicache/bench_serving.py286
LOWbenchmark/hicache/data_processing.py37
LOWbenchmark/hicache/data_processing.py198
LOWbenchmark/hicache/data_processing.py267
LOWbenchmark/hicache/data_processing.py520
LOWbenchmark/hicache/bench_multiturn.py435
LOWbenchmark/hicache/bench_multiturn.py555
LOWbenchmark/prefill_only/util.py106
LOWbenchmark/prefill_only/util.py284
LOWbenchmark/reasoning_benchmark/answer_extraction.py8
LOWbenchmark/reasoning_benchmark/answer_extraction.py182
LOWbenchmark/reasoning_benchmark/answer_extraction.py214
LOWbenchmark/reasoning_benchmark/eval_utils.py59
LOWbenchmark/asr/bench_sglang.py75
LOWbenchmark/json_jump_forward/bench_other.py132
LOWbenchmark/lora/lora_bench.py48
LOWbenchmark/bench_linear_attention/bench_gdn_decode.py359
LOWbenchmark/bench_linear_attention/bench_gdn_prefill.py520
LOWexperimental/sgl-router/tests/e2e/infra/gateway.py237
LOWexperimental/sgl-router/tests/e2e/infra/gateway.py378
LOWexperimental/sgl-router/tests/e2e/infra/model_pool.py106
LOW…outer/tests/e2e/k8s_integration/test_reconciliation.py85
LOW…l-router/tests/e2e/chat_completions/test_validation.py62
LOWtest/srt/cpu/test_decode.py264
LOWtest/srt/cpu/test_topk.py119
LOWtest/manual/test_quick_allreduce.py106
LOWtest/manual/test_quick_allreduce.py274
LOWtest/manual/test_triton_attention_rocm_mla.py224
LOWtest/manual/test_logprobs.py99
LOWtest/manual/test_logprobs.py302
LOWtest/manual/test_triton_moe_wna16.py16
LOWtest/manual/test_mscclpp.py106
LOWtest/manual/test_kv_events.py28
LOWtest/manual/test_kv_events.py164
LOWtest/manual/test_kv_events.py292
1403 more matches not shown…
Over-Commented Block795 hits · 777 pts
SeverityFileLineSnippet
LOW…hmark/kernels/quantization/tuning_block_wise_kernel.py1# Copyright 2025 SGLang Team
LOWbenchmark/lora/lora_bench.py1# Copyright 2023-2024 SGLang Team
LOW…ental/sgl-router/tests/proxy/pd_bootstrap_injection.rs1// SPDX-FileCopyrightText: Copyright (c) 2026 The SGLang Authors
LOW…perimental/sgl-router/tests/proxy/graceful_shutdown.rs1// SPDX-FileCopyrightText: Copyright (c) 2026 The SGLang Authors
LOW…perimental/sgl-router/tests/proxy/pd_pool_isolation.rs1// SPDX-FileCopyrightText: Copyright (c) 2026 The SGLang Authors
LOWexperimental/sgl-router/tests/proxy/chat_routing.rs701 .await;
LOWexperimental/sgl-router/tests/proxy/chat_routing.rs1201 // Wait long enough for the SSE pump to notice the receiver-drop and
LOWexperimental/sgl-router/tests/proxy/common/streaming.rs21 use http_body_util::BodyExt;
LOW…imental/sgl-router/tests/component/tokenizer/parity.rs1// SPDX-FileCopyrightText: Copyright (c) 2026 The SGLang Authors
LOW…sgl-router/tests/component/policies/cache_aware_zmq.rs1// SPDX-FileCopyrightText: Copyright (c) 2026 The SGLang Authors
LOW…/tests/component/policies/kv_events_two_subscribers.rs1// SPDX-FileCopyrightText: Copyright (c) 2026 The SGLang Authors
LOW…/tests/component/policies/kv_events_two_subscribers.rs141 "router_b shutdown after router_a drained took {elapsed:?}; \
LOW…/tests/component/policies/kv_events_tree_concurrent.rs1// SPDX-FileCopyrightText: Copyright (c) 2026 The SGLang Authors
LOW…uter/tests/component/policies/kv_events_hash_parity.rs1// SPDX-FileCopyrightText: Copyright (c) 2026 The SGLang Authors
LOW…sgl-router/tests/component/workers/concurrent_state.rs81 let snapshot = r.workers_for(&model);
LOWexperimental/sgl-router/tests/e2e/conftest.py41# Mirrors SMG's e2e_test/conftest.py sys.path setup.
LOWexperimental/sgl-router/tests/e2e/requirements.txt1httpx==0.27.2
LOWexperimental/sgl-router/benches/tree_lookup.rs1// SPDX-FileCopyrightText: Copyright (c) 2026 The SGLang Authors
LOWexperimental/sgl-router/src/proxy/sse.rs21///
LOWexperimental/sgl-router/src/proxy/sse.rs41/// The opaque `Box<dyn Send + 'static>` accepts any drop-only payload — most
LOWexperimental/sgl-router/src/config/types.rs61 stale_request_timeout_secs: default_stale_request_timeout_secs(),
LOWexperimental/sgl-router/src/config/types.rs141
LOWexperimental/sgl-router/src/config/types.rs201}
LOWexperimental/sgl-router/src/config/types.rs321/// Two operating modes, distinguished by which selector fields are set:
LOWexperimental/sgl-router/src/config/types.rs341/// `WorkerMode` and `bootstrap_port` for each worker are filled in by
LOWexperimental/sgl-router/src/config/types.rs361///
LOWexperimental/sgl-router/src/config/types.rs421}
LOWexperimental/sgl-router/src/config/types.rs501 // watcher (`watcher::Config::default().labels(&label)`
LOWexperimental/sgl-router/src/tokenizer/mod.rs101 // tiny BPE fixture is byte-level and lossless for ASCII.
LOWexperimental/sgl-router/src/tokenizer/mod.rs141 /// registry's `Arc<Tokenizer>` is `Send + Sync` and that
LOWexperimental/sgl-router/src/health/circuit_breaker.rs61 config,
LOWexperimental/sgl-router/src/discovery/types.rs41/// new worker becomes available, and [`DiscoveryEvent::Removed`] when it
LOWexperimental/sgl-router/src/discovery/types.rs61}
LOWexperimental/sgl-router/src/discovery/k8s.rs81///
LOWexperimental/sgl-router/src/discovery/k8s.rs141 });
LOWexperimental/sgl-router/src/discovery/k8s.rs221 Ok(())
LOWexperimental/sgl-router/src/discovery/k8s.rs741
LOWexperimental/sgl-router/src/discovery/static_urls.rs1// SPDX-FileCopyrightText: Copyright (c) 2026 The SGLang Authors
LOWexperimental/sgl-router/src/server/error.rs21 /// build error). `source` captures the full anyhow chain for server-side
LOWexperimental/sgl-router/src/server/error.rs41 UpstreamStatus { status: StatusCode },
LOWexperimental/sgl-router/src/server/error.rs61 /// independently of full-model outages.
LOWexperimental/sgl-router/src/server/error.rs81 #[error("stale request expired for model {model}")]
LOWexperimental/sgl-router/src/server/metrics.rs1// SPDX-FileCopyrightText: Copyright (c) 2026 The SGLang Authors
LOWexperimental/sgl-router/src/server/metrics.rs21//! | `sgl_router_requests_total` | Counter | `worker_url`, `model_id`, `mode`, `outcome` |
LOWexperimental/sgl-router/src/server/routes/chat.rs21/// affinity for a PD-disaggregated request. The router fans the
LOWexperimental/sgl-router/src/server/routes/chat.rs41/// for normal chat traffic (a 200 k-token context tokenized as JSON is well
LOWexperimental/sgl-router/src/server/routes/chat.rs181 // ends, the client disconnects, or the handler returns an error. In
LOWexperimental/sgl-router/src/server/routes/chat.rs201 // Snapshot the labels we need for metrics BEFORE moving the worker
LOWexperimental/sgl-router/src/server/routes/chat.rs221 //
LOWexperimental/sgl-router/src/server/routes/chat.rs341 );
LOWexperimental/sgl-router/src/server/routes/chat.rs421 }
LOWexperimental/sgl-router/src/server/routes/chat.rs441 (body.len() / CHARS_PER_TOKEN_ESTIMATE).max(1)
LOWexperimental/sgl-router/src/server/routes/chat.rs461/// to this address for the KV transfer.
LOWexperimental/sgl-router/src/server/routes/chat.rs501 let bytes = serde_json::to_vec(&obj).map_err(|e| {
LOWexperimental/sgl-router/src/server/routes/chat.rs641 }
LOWexperimental/sgl-router/src/policies/registry.rs1// SPDX-FileCopyrightText: Copyright (c) 2026 The SGLang Authors
LOWexperimental/sgl-router/src/policies/registry.rs21//!
LOWexperimental/sgl-router/src/policies/registry.rs41/// decode peer is considered "too hot" — we fall back to the lowest-load
LOWexperimental/sgl-router/src/policies/registry.rs61 /// empty pool as a transient failure, not as "zero work".
LOWexperimental/sgl-router/src/policies/registry.rs101 /// Returns `Err(NoHealthyWorkers)` only when the model has zero
735 more matches not shown…
Self-Referential Comments142 hits · 432 pts
SeverityFileLineSnippet
MEDIUMbenchmark/dspy/bench_dspy_intro.py95 # Define the predictor.
MEDIUMbenchmark/dspy/bench_dspy_intro.py107 # Define the predictor. Notice we're just changing the class. The signature BasicQA is unchanged.
MEDIUMbenchmark/mmmu/data_utils.py157 # Create a JSON object with the filename as the key and caption as the value
MEDIUMbenchmark/hicache/bench_mix.py24# Create a lock for thread-safe debug log writing
MEDIUMbenchmark/prefill_only/bench_score.py157 # Create the request builder function with shared tokenizer
MEDIUMbenchmark/prefill_only/util.py173 # Create the full request list by cycling through unique requests
MEDIUMbenchmark/multi_turn_chat/long_prompt_multi_turn.py29 # Create a unique cache filename based on the arguments that affect generation
MEDIUMexperimental/sgl-router/tests/e2e/infra/gateway.py45# This file is at `experimental/sgl-router/tests/e2e/infra/gateway.py`,
MEDIUMtest/srt/cpu/test_mamba.py14 """This function is intended to align with the l2norm implementation in the FLA library."""
MEDIUMtest/manual/test_weight_validation.py75 # Create a minimal valid safetensors file
MEDIUMtest/manual/test_weight_validation.py166 # Create a broken symlink for the index file
MEDIUMtest/manual/test_modelopt_fp8kvcache.py15 # Create a ModelOptFp8Config object
MEDIUMtest/manual/test_config_integration.py27 # Create a temporary config file
MEDIUMtest/manual/test_config_integration.py82 # Create a temporary config file
MEDIUMtest/manual/test_config_integration.py113 # Create a temporary config file
MEDIUMtest/manual/test_async_dynamic_batch_tokenizer.py54 # Create a proper BatchEncoding-like object that supports dict operations
MEDIUMtest/manual/test_async_dynamic_batch_tokenizer.py193 # Create a new async tokenizer with a failing tokenizer
MEDIUMtest/manual/attention/test_flashattn_backend.py66 # Create a large enough req_to_token_pool to fit the test usage.
MEDIUMtest/manual/attention/test_flashattn_mla_backend.py54 # Create a proper req_to_token_pool with the req_to_token attribute
MEDIUM…ual/lang_frontend/test_separate_reasoning_execution.py72 # Create a mock backend to avoid AttributeError in __del__
MEDIUM…ual/lang_frontend/test_separate_reasoning_execution.py75 # Create a StreamExecutor with necessary setup
MEDIUM…ual/lang_frontend/test_separate_reasoning_execution.py106 # Create a gen expression and a separate_reasoning expression
MEDIUMtest/manual/debug_utils/run_with_retry.py37 # Create a TestFile with a reasonable estimated time
MEDIUMtest/manual/eval/test_longbench_v2_eval.py73 # Create a temporary JSON file with sample data
MEDIUMtest/registered/quant/test_quant_config_parsing.py23 # Create a raw instance
MEDIUMtest/registered/kernels/test_dsa_indexer.py78 # Create a simple page table for testing
MEDIUMtest/registered/kernels/test_dsa_indexer.py94 # Create a simple page table for testing with page size 1
MEDIUM…st/registered/attention/test_wave_attention_kernels.py186 # Define the varying parameter values
MEDIUM…/registered/attention/test_triton_attention_kernels.py314 # Define the varying parameter values
MEDIUM…egistered/attention/test_normal_decode_set_metadata.py128 # Create a simple SWA KV pool for testing
MEDIUM…egistered/attention/test_normal_decode_set_metadata.py149 # Create a minimal mock that inherits from SWAKVPool to pass isinstance check
MEDIUMtest/registered/unit/distributed/test_parallel_state.py95 # Create a mock group object
MEDIUMtest/registered/unit/distributed/test_parallel_state.py195 # Create a mock group object
MEDIUM…stered/unit/function_call/test_function_call_parser.py517 # Create a concrete implementation of BaseFormatDetector for testing
MEDIUM…tered/unit/mem_cache/test_radix_cache_slru_accuracy.py56 # Create a cache with the memory pools
MEDIUM…ed/unit/mem_cache/test_unified_radix_cache_unittest.py1295 # Create a chain: root -> A -> B -> C (3 levels)
MEDIUM…t/registered/unit/model_loader/test_modelopt_loader.py72 # Create a basic model config with unified quantization flag
MEDIUM…t/registered/unit/model_loader/test_modelopt_loader.py116 # Create a custom load_model method for testing that simulates the real logic
MEDIUM…egistered/unit/entrypoints/openai/test_serving_chat.py524 # Create a mock conversation object that will be returned by generate_chat_conv
MEDIUM…egistered/unit/entrypoints/openai/test_serving_chat.py1211 # Create a mock conversation object
MEDIUMtest/registered/cpu/utils.py465 # Create a new tensor with alternating values
MEDIUMtest/registered/cpu/utils.py488 # Create a tensor with the E2M1 values
MEDIUMtest/registered/cpu/test_mamba.py17 """This function is intended to align with the l2norm implementation in the FLA library."""
MEDIUM…registered/hicache/test_hicache_storage_3fs_backend.py26 # Create a temporary JSON config file for HF3FS
MEDIUMtest/registered/lora/test_chunked_sgmv_backend.py189 # Create a minimal mock ForwardBatch for the test
MEDIUMtest/registered/lora/test_chunked_sgmv_backend.py221 # Create a minimal backend instance to access _get_segments_info
MEDIUMtest/registered/rl/test_update_weights_from_tensor.py136 # Create a small set of parameters for testing
MEDIUMtest/registered/rl/test_update_weights_from_tensor.py150 # Create a flattened bucket
MEDIUMtest/registered/rl/test_update_weights_from_tensor.py157 # Create the dict format expected by _update_weights_from_flattened_bucket
MEDIUMtest/registered/rl/test_update_weights_from_tensor.py167 # Create a list where each rank contains the same serialized data
MEDIUMpython/sglang/bench_serving.py1270 # Create a temporary DatasetRow object for warmup
MEDIUMpython/sglang/bench_serving.py1286 # Create the test input once
MEDIUMpython/sglang/bench_serving.py2309 # Create a mutually exclusive group for profiling URLs
MEDIUMpython/sglang/test/nightly_bench_utils.py72 # Create a combined link or use the first available one
MEDIUMpython/sglang/jit_kernel/ngram_embedding.py93 # Create an empty tensor for ignore_tokens
MEDIUMpython/sglang/srt/layers/attention/flashmla_backend.py524 # Create a dummy forward_mode for draft step
MEDIUM…/sglang/srt/layers/attention/flashattention_backend.py2477 # Create a modified version for local attention that only processes the last token
MEDIUM…/sglang/srt/layers/attention/flashattention_backend.py3262 # Create a block_table for the local attention blocks
MEDIUM…/sglang/srt/layers/attention/dsa/index_buf_accessor.py760 # Define the token range within the block and the K dimension range handled by the thread.
MEDIUMpython/sglang/srt/layers/attention/fla/kda.py4# This file contains code copied from the flash-linear-attention project.
82 more matches not shown…
Redundant / Tautological Comments266 hits · 405 pts
SeverityFileLineSnippet
LOW…shinfer_allreduce_fusion/benchmark_fused_collective.py1153 # Check if running with torchrun (required for collective operations)
LOWbenchmark/json_schema/bench_sglang.py97 # Check if the outputs are valid
LOWbenchmark/ceval/bench_sglang.py112 # Print results
LOWbenchmark/prefill_only/util.py163 # Check if we have any valid requests
LOWbenchmark/boolq/bench_sglang.py89 # Print results
LOWbenchmark/gsm8k/bench_sglang.py143 # Print results
LOWbenchmark/gsm8k/bench_other.py129 # Print results
LOWbenchmark/mmlu/bench_sglang.py179 # Print results
LOWtest/manual/test_schedule_policy.py76 # Check if FCFS keeps the original order
LOWtest/manual/test_schedule_policy.py101 # Check if priority enabled fcfs ordering is applied.
LOWtest/manual/test_schedule_policy.py127 # Check if priority enabled fcfs ordering is applied.
LOWtest/manual/test_schedule_policy.py149 # Check if priority enabled fcfs ordering is applied.
LOWtest/manual/test_schedule_policy.py171 # Check if priority enabled fcfs ordering is applied.
LOWtest/manual/test_schedule_policy.py195 # Check if priority enabled fcfs ordering is applied.
LOW…nual/kv_transfer/test_mooncake_transfer_engine_init.py201 # Check if mooncake should be used
LOW…tered/unit/mem_cache/test_radix_cache_slru_accuracy.py108 # Check if the frequently accessed key-value is still present
LOW…tered/unit/mem_cache/test_radix_cache_slru_accuracy.py114 # Check if the first low-frequency key-value has been evicted
LOW…tered/unit/mem_cache/test_radix_cache_slru_accuracy.py126 # Check if the tensor is empty, which indicates the key was not found (evicted)
LOW…t/registered/unit/model_loader/test_modelopt_export.py27# Check if modelopt is available
LOW…d/unit/batch_invariant_ops/test_batch_invariant_ops.py53 # Check if results are identical
LOW…d/unit/batch_invariant_ops/test_batch_invariant_ops.py193 # Check if results are identical
LOW…registered/disaggregation/test_disaggregation_basic.py206 # Check if servers are still healthy
LOW…/ascend/interface/test_npu_openai_server_ignore_eos.py84 # Check if ignore_eos resulted in more tokens or exactly max_tokens
LOW…nd/basic_function/offloading/test_npu_offload_modes.py70 # Check if server is running (basic functionality test)
LOW…nai_server/validation/test_openai_server_ignore_eos.py75 # Check if ignore_eos resulted in more tokens or exactly max_tokens
LOWtest/registered/observability/test_tracing.py77 # Check if it's a RequestStageConfig (has stage_name and level attributes)
LOW…stered/amd/disaggregation/test_disaggregation_basic.py322 # Check if servers are still healthy
LOW…t/registered/amd/accuracy/mi30x/test_gsm8k_eval_amd.py286 # Print results
LOW…gistered/amd/accuracy/mi30x/test_vlms_mmmu_eval_amd.py255 # Print results
LOW…amd/accuracy/mi35x/test_qwen3_coder_next_eval_mi35x.py218 # Check if model exists
LOW…racy/mi35x/test_deepseek_r1_mxfp4_kv_fp8_eval_mi35x.py197 # Check if model exists
LOW…md/accuracy/mi35x/test_deepseek_r1_mxfp4_eval_mi35x.py194 # Check if model exists
LOW…y/mi35x/test_deepseek_r1_mxfp4_ar_fusion_eval_mi35x.py196 # Check if model exists
LOW…/registered/rl/test_update_weights_from_distributed.py642 # Check if the weights of lm_head are tied with embed_tokens.
LOWpython/sglang/bench_serving.py1309 # Check if at least one warmup request succeeded
LOWpython/sglang/utils.py414 # Check if the cache file already exists
LOWpython/sglang/bench_one_batch.py662 # Print output texts
LOWpython/sglang/benchmark/utils.py106 # Check if the cache file already exists
LOWpython/sglang/test/few_shot_gsm8k_engine.py135 # Print results
LOWpython/sglang/test/bench_one_batch_server_internal.py732 # Print results
LOWpython/sglang/test/send_one.py230 # Print results
LOWpython/sglang/test/runners.py497 # Check if the text is empty or only whitespace.
LOWpython/sglang/test/runners.py792 # Check if the text is empty or only whitespace.
LOWpython/sglang/test/few_shot_gsm8k.py138 # Print results
LOWpython/sglang/test/ci/ci_utils.py238 # Check if we should retry
LOW…/sglang/test/server_fixtures/disaggregation_fixture.py228 # Check if port is active
LOW…sglang/jit_kernel/benchmark/bench_custom_all_reduce.py374 # Print results on rank 0.
LOWpython/sglang/jit_kernel/tests/test_pos_enc.py480 # Print results
LOWpython/sglang/srt/dllm/mixin/scheduler.py245 # Check if batch is full
LOWpython/sglang/srt/disaggregation/decode.py1548 # Check if request was aborted due to corruption
LOWpython/sglang/srt/layers/sampler.py392 # Check if any requests actually need logprobs computation
LOWpython/sglang/srt/layers/attention/xpu_backend.py462 # Check if we should use local attention
LOWpython/sglang/srt/layers/attention/triton_backend.py185 # Set static_kv_splits to False to use deterministic logic instead
LOW…thon/sglang/srt/layers/attention/trtllm_mla_backend.py749 # Check if we're in CUDA graph mode (buffers are pre-allocated)
LOWpython/sglang/srt/layers/attention/dsa_backend.py579 # Check if MHA FP8 dequantization is needed
LOWpython/sglang/srt/layers/attention/dsa_backend.py2249 # Check if sequence meets criteria for MHA_ONE_SHOT
LOW…/sglang/srt/layers/attention/flashattention_backend.py727 # Check if we should use local attention
LOW…/sglang/srt/layers/attention/flashattention_backend.py2981 # Check if we should use the specialized fast path for page_size=1, no SWA
LOW…thon/sglang/srt/layers/attention/flashinfer_backend.py1140 # Check if this specific wrapper's begin_forward has been replaced with fast_decode_plan
LOW…ng/srt/layers/attention/triton_ops/extend_attention.py864 # Check if we can skip this tile
206 more matches not shown…
Verbosity Indicators239 hits · 390 pts
SeverityFileLineSnippet
LOWbenchmark/bench_linear_attention/bench_gdn_decode.py104 # Step 1: split (same as forward_decode)
LOWbenchmark/bench_linear_attention/bench_gdn_decode.py107 # Step 2: view + reshape (same as forward_decode)
LOWbenchmark/bench_linear_attention/bench_gdn_decode.py112 # Step 3: fused gating + recurrent update
LOW…mark/bench_linear_attention/bench_fused_gate_cumsum.py79 # Step 1: gate activation using torch ops
LOW…mark/bench_linear_attention/bench_fused_gate_cumsum.py87 # Step 2: chunk-local cumsum
LOW…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh38# Step 1: Create kind cluster (idempotent)
LOW…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh50# Step 2: Build Docker images (unless SKIP_DOCKER_BUILD=1)
LOW…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh75# Step 3: Load images into kind
LOW…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh82# Step 4: Apply namespace and RBAC
LOW…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh89# Step 5: Deploy 3 fake-worker replicas behind a Service
LOW…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh144# Step 6: Create sgl-router ConfigMap with k8s discovery pointing at the
LOW…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh175# Step 7: Deploy sgl-router
LOWtest/srt/cpu/utils.py72 # Step 2: compute per-block max abs values → scale
LOWtest/manual/test_tokenizer_manager.py336 # Step 1: Detect format
LOWtest/manual/test_tokenizer_manager.py342 # Step 2: Prepare input
LOWtest/manual/test_tokenizer_manager.py348 # Step 3: Extract results (simulated tokenizer output)
LOWtest/manual/test_tokenizer_manager.py367 # Step 1: Detect format
LOWtest/manual/test_tokenizer_manager.py373 # Step 2: Prepare input
LOWtest/manual/test_tokenizer_manager.py379 # Step 3: Extract results (simulated tokenizer output for cross-encoder)
LOWtest/manual/test_tokenizer_manager.py396 # Step 1: Detect format
LOWtest/manual/test_tokenizer_manager.py402 # Step 2: Prepare input
LOWtest/manual/test_tokenizer_manager.py408 # Step 3: Extract results (simulated tokenizer output)
LOWtest/manual/dsv4/test_fused_compress_attn_hip.py312 # Step 1: write current tokens to state (same for both paths)
LOWtest/registered/unit/managers/test_hisparse_unit.py446 # Step 1: load the first TOP_K positions from host (no newest token —
LOWtest/registered/unit/managers/test_hisparse_unit.py455 # Step 2: half overlap (hit) + half new (miss).
LOWtest/registered/unit/parser/test_reasoning_parser.py1147 # Step 1: Send partial end tag when not in reasoning mode
LOWtest/registered/unit/parser/test_reasoning_parser.py1153 # Step 2: Send normal text that doesn't complete the end tag
LOW…egistered/unit/mem_cache/test_decode_radix_lock_ref.py128 # Step 1: inc_lock_ref (pop_preallocated locks the matched node)
LOW…egistered/unit/mem_cache/test_decode_radix_lock_ref.py144 # Step 2: cache_unfinished_req (dec old lock, inc new lock)
LOW…egistered/unit/mem_cache/test_decode_radix_lock_ref.py147 # Step 3: cache_finished_req with is_insert=True (dec lock)
LOW…egistered/unit/mem_cache/test_decode_radix_lock_ref.py176 # Step 1: inc_lock_ref on root (simulates get_new_prebuilt_batch)
LOW…egistered/unit/mem_cache/test_decode_radix_lock_ref.py193 # Step 2: cache_unfinished_req (dec root=no-op, inc new leaf)
LOW…egistered/unit/mem_cache/test_decode_radix_lock_ref.py196 # Step 3: cache_finished_req (dec leaf)
LOWtest/registered/unit/mem_cache/test_mamba_unittest.py684 # Step 1: Insert [1,2,3] to create first node
LOWtest/registered/unit/mem_cache/test_mamba_unittest.py696 # Step 2: Insert [1,2,3,4,5,6,7] with prev_prefix_len=0 (free all matched)
LOWtest/registered/unit/mem_cache/test_mamba_unittest.py713 # Step 3: Insert [1,2,3,4,5,6,7,8] with prev_prefix_len=2
LOWtest/registered/unit/mem_cache/test_mamba_unittest.py731 # Step 4: Insert [1,2,3,4,5,6,7,8,9] with prev_prefix_len=8 (covers all matched)
LOW…ed/unit/mem_cache/test_unified_radix_cache_unittest.py637 # Step 1: insert 1 page
LOW…ed/unit/mem_cache/test_unified_radix_cache_unittest.py641 # Step 2: insert 2 pages with prev_prefix_len=0 → frees overlap of 1 page
LOW…ed/unit/mem_cache/test_unified_radix_cache_unittest.py659 # Step 3: insert 3 pages with prev_prefix_len=len(seq_2p) → nothing freed
LOWtest/registered/cpu/utils.py72 # Step 2: compute per-block max abs values → scale
LOWtest/registered/function_call/test_kimik2_detector.py549 # Step 1: reasoning parser
LOWtest/registered/function_call/test_kimik2_detector.py553 # Step 2: feed normal_text into tool call parser (like serving_chat.py does)
LOWtest/registered/function_call/test_kimik2_detector.py585 # Step 1: reasoning parser
LOWtest/registered/function_call/test_kimik2_detector.py590 # Step 2: tool call parser on normal_text
LOWtest/registered/debug_utils/test_dumper.py4090 # Step 1: graft input. target sends its real q to baseline along
LOWtest/registered/debug_utils/test_dumper.py4099 # Step 2: target runs the (suspected buggy) attention kernel —
LOWtest/registered/debug_utils/test_dumper.py4103 # Step 3: graft output. baseline sends its (good) attn_out to
LOWtest/registered/debug_utils/test_dumper.py4064 # Step 1: graft input. target sends its q to baseline; baseline's
LOWtest/registered/debug_utils/test_dumper.py4075 # Step 2: baseline runs the known-good attention kernel.
LOWtest/registered/debug_utils/test_dumper.py4078 # Step 3: graft output. baseline sends attn_out to target with a
LOW…t/registered/debug_utils/comparator/test_entrypoint.py2206 # Step 0: prefill with 2 sequences (3+2 tokens)
LOW…t/registered/debug_utils/comparator/test_entrypoint.py2215 # Step 1: decode (1 token per sequence)
LOW…t/registered/debug_utils/comparator/test_entrypoint.py2267 # Step 0: prefill — seq A (3 tokens) + seq B (2 tokens)
LOW…t/registered/debug_utils/comparator/test_entrypoint.py4720 # Step 1: Create 4 target ranks where moe_tp is absent from ranks 2-3.
LOW…t/registered/debug_utils/comparator/test_entrypoint.py4723 # Step 2: _is_jointly_determined is called with parent_axes={tp, moe_tp}
LOW…t/registered/debug_utils/comparator/test_entrypoint.py4725 # Step 3: edp remains undeclared → ValueError emitted as error record.
LOW…t/registered/debug_utils/comparator/test_entrypoint.py4822 # Step 1: 4 target ranks with TP=2, CP=2 (replicated), EDP=4.
LOW…t/registered/debug_utils/comparator/test_entrypoint.py4824 # Step 2: _is_jointly_determined is called with parent_axes={tp, cp}, child=edp.
LOW…t/registered/debug_utils/comparator/test_entrypoint.py4826 # Step 3: CP replicated picks one rank per tp group → TP concat → correct shape.
179 more matches not shown…
Docstring Block Structure32 hits · 160 pts
SeverityFileLineSnippet
HIGHpython/sglang/test/ascend/test_ascend_utils.py467Start the service and obtain the inference results. Parameters: model: Model name num_prompts: Tota
HIGHpython/sglang/srt/server_args_config_parser.py52 Merge configuration file arguments with command-line arguments. Configuration arguments are inserted a
HIGHpython/sglang/srt/server_args_config_parser.py101 Parse YAML configuration file and convert to argument list. Args: file_path: Path to the Y
HIGHpython/sglang/srt/layers/model_parallel.py29 Locally shards a full tensor based on indicated sharding arrangement, and returns a DTensor containing the loca
HIGH…thon/sglang/srt/layers/attention/flashinfer_backend.py335Process multi-item scoring tensors for FlashInfer attention. This method handles sequences containing multiple
HIGHpython/sglang/srt/layers/attention/fla/chunk.py146 Args: q (torch.Tensor): queries of shape `[B, T, H, K]` if `head_first=False` else `[B, H, T, K
HIGH…hon/sglang/srt/layers/attention/fla/fused_recurrent.py459 Args: q (torch.Tensor): queries of shape `[B, T, H, K]`. k (torch.Tensor):
HIGHpython/sglang/srt/layers/quantization/fp8_kernel.py1638 Quantize input tensor to FP8 (8-bit floating point) format. Args: input (torch.Tensor): Input tensor to be quantiz
HIGHpython/sglang/srt/layers/moe/cutlass_moe.py55Performs Fused MoE computation using CUTLASS-like kernels with FP8 weights and activations. This function implement
HIGHpython/sglang/srt/function_call/utils.py223 Get consolidated $defs from all tools, validating for conflicts. Args: tools: List of tools to process
HIGHpython/sglang/srt/utils/network.py119Bind a TCP socket on the first available address family (IPv4/IPv6). Iterates over address families returned by _ge
HIGHpython/sglang/srt/utils/network.py309 Automatically detect the local IP address using multiple fallback strategies. This function attempts to obtain
HIGHpython/sglang/srt/utils/nvtx_pytorch_hooks.py46Descends iterators that contains Tensors and prints the Tensor Recursive function that descends iterator type a
HIGHpython/sglang/srt/utils/nvtx_pytorch_hooks.py74Extract the static parameters from LLM and VLM relevant layer types Args: module_obj(class): Module
HIGHpython/sglang/srt/utils/nvtx_pytorch_hooks.py198Callback function that ends the NVTX marker Records the module name and tensor information Called after
HIGHpython/sglang/srt/utils/nvtx_pytorch_hooks.py218Creates an NVTX marker with the module name in it. This function is called before the module executes
HIGHpython/sglang/srt/utils/nvtx_pytorch_hooks.py256User level function that activates all the hooks The user needs to call this method from the network source cod
HIGHpython/sglang/srt/models/minicpmo.py134 In streaming audio generation, determine which `text` positions the TTS model can attend to when generating each ch
HIGHpython/sglang/srt/models/minicpmo.py613Merge `input_ids` and `lm_spk_emb_last_hidden_states` to `inputs_embeds`. Args: input_ids (torch.Te
HIGHpython/sglang/srt/multimodal/processors/qwen_vl.py133calculate the number of frames for video used for model inputs. Args: ele (dict): a dict contains the confi
HIGHpython/sglang/srt/multimodal/processors/ernie45_vl.py133calculate the number of frames for video used for model inputs. Args: ele (dict): a dict contains the confi
HIGHpython/sglang/srt/mem_cache/storage/backend_factory.py73Create a storage backend instance. Args: backend_name: Name of the backend to create sto
HIGHpython/sglang/srt/model_loader/ci_weight_validation.py1840 CI-specific download with validation and automatic retry on corruption. This function handles the download of
HIGHpython/sglang/srt/speculative/dflash_utils.py164Select target layer indices used to build DFlash context features. Args: num_target_layers: Number of trans
HIGHpython/sglang/srt/speculative/dflash_utils.py427Compute DFlash accept lengths and bonus tokens (greedy verify rule). Args: candidates: Token ids proposed b
HIGH…/runtime/managers/memory_managers/layerwise_offload.py424Update consolidated CPU buffers with new weights. When layerwise offload (--dit-layerwise-offload) is enabled,
HIGH…ang/multimodal_gen/runtime/utils/nvtx_pytorch_hooks.py97Walk ``model`` and attach forward pre/post hooks to every module. Args: model: Root module to instr
HIGH…ng/multimodal_gen/runtime/models/encoders/qwen2_5vl.py554 Calculate the 3D rope index based on image and video's temporal, height and width in LLM. Explanation:
HIGH…imodal_gen/runtime/pipelines_core/stages/validators.py345 Add a validation check for a field. Args: field_name: Name of the field being checked
HIGHsgl-model-gateway/e2e_test/infra/run_eval.py64Run an evaluation and return metrics. Args: args: Configuration object with attributes: - base_
HIGHsgl-model-gateway/e2e_test/infra/model_pool.py727Get a model instance by model_id, mode, and worker_type. If the model is not running, it will be launched on-de
HIGHsgl-model-gateway/e2e_test/responses/test_basic_crud.py24Wait for background task to complete. Args: client: OpenAI client response_id: Response ID to poll
AI Slop Vocabulary40 hits · 98 pts
SeverityFileLineSnippet
MEDIUMdocker/k8s-sglang-distributed-sts.yaml58 privileged: true # to leverage RDMA/InfiniBand device, co-work with HostNetwork=true
MEDIUM…t/manual/layers/attention/dsa/test_act_quant_triton.py269 # Run comprehensive benchmark
MEDIUMtest/manual/layers/moe/bench_mxfp4_sm90_kernels.py277# Timing harness
MEDIUMtest/manual/4-gpu-models/test_qwen35_models_archived.py17# This eval harness applies the chat_template, which is critical for qwen3.5
MEDIUMtest/manual/4-gpu-models/test_qwen35_fp4_triton.py5# This eval harness applies the chat_template, which is critical for qwen3.5
MEDIUMtest/manual/eval/validate_longbench_v2_standalone.py212 """Generate comprehensive validation report."""
MEDIUM…tered/attention/unittests/dense/test_flex_attention.py65 # from logical positions, so it's robust to all non-tidy layouts.
MEDIUMtest/registered/attention/unittests/dense/test_fa3.py51 # FlashAttention kernels are most stable in this harness with FA-friendly dims.
MEDIUMtest/registered/attention/unittests/dense/test_fa4.py46 # FlashAttention kernels are most stable in this harness with FA-friendly dims.
MEDIUM…egistered/attention/unittests/swa/test_torch_native.py104 # arithmetic, so it's robust to all non-tidy layouts.
MEDIUMtest/registered/hicache/test_qwen35_hicache.py15# This eval harness applies the chat_template, which is critical for qwen3.5
LOWtest/registered/debug_utils/test_dumper.py3052 # code can simply call `from sglang.srt.debug_utils.dumper import dumper`
MEDIUM…attention_unittest/attention_methods/dsv4_attention.py1255 # In runner-harness flows the reference is called BEFORE
LOWpython/sglang/srt/disaggregation/nixl/conn.py101 ), # hacky just add it into the message that will be sent
MEDIUMpython/sglang/srt/disaggregation/common/conn.py929 # Enable higher PP ranks to be bootstrapped earlier to make PP PD requests bootstrap more robust
LOW…/sglang/srt/layers/attention/flashattention_backend.py3196 # then we can simply use a cdiv for the rest.
LOW…ang/srt/layers/attention/mamba/causal_conv1d_triton.py152 # first chunk and does not have prior-token, so just set to 0
MEDIUM…hon/sglang/srt/layers/attention/dsa/tilelang_kernel.py1102 # to better utilize FP8 dynamic range, then apply the inverse scale after GEMM.
MEDIUMpython/sglang/srt/layers/quantization/fpgemm_fp8.py47 # For GPUs that lack FP8 hardware suspport, we can leverage the Marlin
MEDIUMpython/sglang/srt/layers/quantization/fp8.py230 # Keep both "model." and non-"model." variants for robust prefix matching.
MEDIUMpython/sglang/srt/layers/quantization/fp8.py336 # For GPUs that lack FP8 hardware support, we can leverage the Marlin
MEDIUMpython/sglang/srt/layers/quantization/fp8_utils.py622 # TODO: add more robust shape check here
MEDIUM…hon/sglang/srt/layers/quantization/marlin_utils_fp8.py69 # For GPUs that lack FP8 hardware support, we can leverage the
LOW…srt/layers/quantization/mxfp4_flashinfer_trtllm_moe.py451 # in-place. Otherwise `routed` is already scale-final and we just add
LOWpython/sglang/srt/managers/io_struct.py760 lora_id: Optional[str] = None # None means just use the base model
LOWpython/sglang/srt/managers/io_struct.py1057 lora_id: Optional[str] = None # None means just use the base model
LOWpython/sglang/srt/managers/scheduler_pp_mixin.py438 # otherwise, just pass along previous consensus
LOWpython/sglang/srt/managers/scheduler.py3661 # In-place pause: just set the flag and return immediately.
LOWpython/sglang/srt/function_call/qwen3_coder_detector.py41 # Base class already initializes _buffer, we just use it directly
LOWpython/sglang/srt/parser/harmony_parser.py492 # If no emit, just return the held content
LOWpython/sglang/srt/mem_cache/hiradix_cache.py1143 # unknown prefetch stop policy, just return True
MEDIUM…n/sglang/multimodal_gen/test/server/accuracy_config.py57# of real divergence or unsupported reference loading in the harness.
LOWpython/sglang/multimodal_gen/test/server/gpu_cases.py60# To test different models, simply add more DiffusionCase entries
MEDIUM…on/sglang/multimodal_gen/test/server/accuracy_hooks.py26# These are harness defaults for synthetic accuracy inputs.
MEDIUM…lang/multimodal_gen/runtime/layers/quantization/fp8.py191 # For GPUs that lack FP8 hardware support, we can leverage the Marlin
MEDIUM…imodal_gen/runtime/pipelines_core/stages/validators.py503 """Get a comprehensive summary of all validation failures."""
MEDIUMsgl-kernel/csrc/cpu/decode.cpp1429 // use smaller BLOCK_H when batches is small to utilize all cores
MEDIUM…del-gateway/e2e_test/k8s_integration/test_lifecycle.py374 # "total count == 0" keeps the test robust if a parallel/earlier
MEDIUM…way/e2e_test/chat_completions/test_function_calling.py455 # Make the test more robust by checking type and accepting valid responses
MEDIUM…l-gateway/bindings/python/src/sglang_router/mini_lb.py440# We may utilize `GenerateReqInput`'s logic later
Cross-Language Confusion14 hits · 60 pts
SeverityFileLineSnippet
HIGHbenchmark/hicache/bench_mix.py465 self.user_generator.push(
HIGHexperimental/sgl-router/tests/e2e/infra/gateway.py353 "Build it first: `cd experimental/sgl-router && cargo build --release` "
HIGHtest/manual/models/test_qwen3_asr.py630 f"first commit's previous_item_id must be JSON null, got {committed_1!r}",
HIGHtest/manual/debug_utils/test_log_parser.py16 """[{"line":"(SGLangEngine pid=35555) [2025-10-31 03:45:20 TP0] Decode batch [51341], #running-req: 317, #to
HIGH…tion_unittest/runner_modes/cuda_graph_decode_runner.py417 # capture, which makes the capture-time output undefined; only
HIGH…t/layers/quantization/nvfp4_gemm_swiglu_nvfp4_quant.py2552 :param c_sf_ptr: Pointer to scale factor tensor for C (can be null)
HIGH…t/layers/quantization/nvfp4_gemm_swiglu_nvfp4_quant.py2556 :param norm_const_ptr: Pointer to normalization constant for SFC generation (can be null)
HIGHpython/sglang/srt/function_call/utils.py297 return "string" # If only null, default to string
HIGHpython/sglang/srt/function_call/mimo_detector.py144 <parameter=command>pwd && ls</parameter>
HIGHpython/sglang/srt/utils/field_validators.py66 raise ValueError(f"must be list or null; got {type(v).__name__}")
HIGHpython/sglang/srt/utils/hf_transformers/tokenizer.py366 # When the config omits the key or has null, use the v4 default so that
HIGHpython/sglang/srt/entrypoints/http_server.py429 {"error": {"message": "...", "type": "...", "param": null, "code": <status>}}
HIGH…al-refactor-verify/mechanical_refactor_verify_utils.py30 exec_command(f"git add -A && git commit -m {shlex.quote(message)}", cwd=cwd)
HIGHscripts/ci/utils/runner_utilization_report.py188 equal to `created_at` (not null). The previous code required both a
Hallucination Indicators6 hits · 60 pts
SeverityFileLineSnippet
CRITICAL…tention_unittest/attention_methods/mamba2_attention.py537 self.mixer.norm.weight.data = self.mixer.norm.weight.data.float()
CRITICALpython/sglang/srt/layers/attention/fla/utils.py231 return triton.runtime.driver.active.utils.get_device_properties(tensor_idx)[
CRITICALpython/sglang/srt/layers/attention/fla/utils.py286 triton.runtime.driver.active.utils.get_device_properties(i)[
CRITICALpython/sglang/srt/models/glm4_moe.py490 ) and self.shared_experts.gate_up_proj.quant_method.quant_config.get_name() in {
CRITICALpython/sglang/srt/models/deepseek_v2.py734 ) and self.shared_experts.gate_up_proj.quant_method.quant_config.get_name() in {
CRITICAL…l_gen/runtime/pipelines_core/composed_pipeline_base.py124 task_name = self.server_args.pipeline_config.task_type.name.lower()
Slop Phrases18 hits · 42 pts
SeverityFileLineSnippet
MEDIUM…manual/openai_server/function_call/test_tool_choice.py845# # Start the local OpenAI Server. If necessary, you can add other parameters such as --enable-tools.
MEDIUM…d/ascend/interface/test_npu_openai_function_calling.py53 # Start the local OpenAI Server. If necessary, you can add other parameters such as --enable-tools.
MEDIUM…i_server/function_call/test_openai_function_calling.py43 # Start the local OpenAI Server. If necessary, you can add other parameters such as --enable-tools.
MEDIUM…i_server/function_call/test_openai_function_calling.py926# # Start the local OpenAI Server. If necessary, you can add other parameters such as --enable-tools.
LOWpython/sglang/test/runners.py488 # make sure to disable compile
MEDIUMpython/sglang/jit_kernel/flash_attention_v3.py92 # That means if you use A100/A*0/L20/L40/L40s/4090 you can use fa3.
LOWpython/sglang/srt/models/hunyuan.py784 # make sure to leave KV cache scale factors in a known good (dummy) state
LOWpython/sglang/srt/models/apertus.py384 # make sure to leave KV cache scale factors in a known good (dummy) state
LOWpython/sglang/srt/models/mimo_v2.py968 # make sure to leave KV cache scale factors in a known good (dummy) state
LOWpython/sglang/srt/models/llama.py436 # make sure to leave KV cache scale factors in a known good (dummy) state
LOWpython/sglang/srt/models/glm4.py398 # make sure to leave KV cache scale factors in a known good (dummy) state
LOWpython/sglang/srt/models/qwen2.py403 # make sure to leave KV cache scale factors in a known good (dummy) state
LOW…glang/multimodal_gen/runtime/models/vaes/hunyuanvae.py191 # If you are encountering an error here, make sure to try running encoding/decoding with
MEDIUMsgl-kernel/python/sgl_kernel/flash_attn.py24 # That means if you use A100/A*0/L20/L40/L40s/4090 you can use fa3.
MEDIUMsgl-kernel/tests/test_flash_attention.py28 # That means if you use A100/A*0/L20/L40/L40s/4090 you can use fa3.
MEDIUM…ntend_language/quick_start/local_example_llava_next.py55 # Or you can use the 72B model
MEDIUM…ntend_language/quick_start/local_example_llava_next.py62 # Or you can use API models
MEDIUM…-gateway/examples/wasm/wasm-guest-ratelimit/src/lib.rs37// This is a simplified example for demonstration purposes
Dead Code20 hits · 40 pts
SeverityFileLineSnippet
MEDIUMpython/sglang/srt/entrypoints/openai/serving_base.py146
MEDIUMpython/sglang/srt/entrypoints/openai/serving_base.py149
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py706
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py707
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py708
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py711
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py712
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py714
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py716
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py719
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py720
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py722
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py732
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py747
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py757
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py758
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py759
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py787
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py788
MEDIUM…ang/multimodal_gen/runtime/models/dits/hunyuanvideo.py790
Synthetic Comment Markers2 hits · 15 pts
SeverityFileLineSnippet
HIGHtest/manual/attention/test_trtllm_mla_backend.py1238 # Reshape as requested
HIGH…ang/multimodal_gen/runtime/utils/hf_diffusers_utils.py413 # Special handling for stop token <|eom_id|> generated by llama 3 tool use.
Fake / Example Data12 hits · 12 pts
SeverityFileLineSnippet
LOW…l/sgl-router/tests/scripts/generate_parity_fixtures.py41 "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod "
LOW…l/sgl-router/tests/scripts/generate_parity_fixtures.py41 "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod "
LOWpython/sglang/test/kits/ebnf_constrained_kit.py62 self.__class__.ebnf_grammar = 'root ::= "user@example.com"'
LOW…-model-gateway/tests/security/auth_integration_test.rs174 let claims = create_claims("admin@example.com", vec!["admin"]);
LOW…-model-gateway/tests/security/auth_integration_test.rs187 assert_eq!(validated.subject, "admin@example.com");
LOW…-model-gateway/tests/security/auth_integration_test.rs213 let claims = create_claims("user@example.com", vec!["user", "viewer"]);
LOW…-model-gateway/tests/security/auth_integration_test.rs221 assert_eq!(validated.subject, "user@example.com");
LOW…-model-gateway/tests/security/auth_integration_test.rs252 sub: "user@example.com".to_string(),
LOW…-model-gateway/tests/security/auth_integration_test.rs301 sub: "user@example.com".to_string(),
LOW…-model-gateway/tests/security/auth_integration_test.rs338 let claims = create_claims("user@example.com", vec!["admin"]);
LOW…-model-gateway/tests/security/auth_integration_test.rs590 let claims = create_claims("user@example.com", vec!["admin"]);
LOW…-model-gateway/tests/security/auth_integration_test.rs713 let claims = create_claims("user@example.com", vec!["admin"]);
Example Usage Blocks5 hits · 6 pts
SeverityFileLineSnippet
LOW…rimental/sgl-router/tests/e2e/k8s_integration/setup.sh6# Usage:
LOWscripts/killall_sglang.sh8# Usage:
LOWscripts/ci/musa/rename_wheels_musa.sh10# Usage:
LOWsgl-model-gateway/e2e_test/k8s_integration/setup.sh8# Usage:
LOW…ngs/golang/examples/oai_server/scripts/analyze_tpot.sh6# Usage:
Overly Generic Function Names4 hits · 4 pts
SeverityFileLineSnippet
LOWbenchmark/hicache/bench_mix.py417 async def handle_request(self, user_data):
LOWbenchmark/hicache/bench_multiturn.py388 async def handle_request(self, item):
LOWpython/sglang/srt/debug_utils/dumper.py1329 def handle_request(self, *, method: str, body: dict[str, Any]) -> list[dict]:
LOWpython/sglang/srt/entrypoints/openai/serving_base.py73 async def handle_request(