Universal LLM Deployment Engine with ML Compilation
677 matches across 14 categories. Click a row to expand file-level details.
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | python/mlc_llm/__init__.py | 14 | def _create_socket_session_local_workers(num_workers): |
| LOW | python/mlc_llm/base.py | 26 | def _debug_cuda_profiler_start() -> None: |
| LOW | python/mlc_llm/base.py | 35 | def _debug_cuda_profiler_stop() -> None: |
| LOW | python/mlc_llm/json_ffi/engine.py | 26 | def get_request_stream_callback(self) -> Callable[[str], None]: |
| LOW | python/mlc_llm/json_ffi/engine.py | 34 | def _sync_request_stream_callback(self, chat_completion_stream_responses_json_str: str) -> None: |
| LOW | python/mlc_llm/bench/request_processor.py | 71 | def _sample_from_plain_request_records( |
| LOW | python/mlc_llm/bench/request_processor.py | 94 | def _sample_from_grouped_request_records( |
| LOW | python/mlc_llm/bench/request_processor.py | 274 | def generate_fake_warmup_requests( |
| LOW | python/mlc_llm/bench/__main__.py | 34 | def _parse_num_concurrent_requests(num_str: Optional[str]) -> Optional[List[int]]: # noqa: UP006 |
| LOW | python/mlc_llm/bench/request_record.py | 116 | def _compute_metrics_statistics( |
| LOW | python/mlc_llm/interface/gen_config.py | 28 | def apply_system_defaults_for_missing_fields(mlc_chat_config: MLCChatConfig) -> None: |
| LOW | python/mlc_llm/interface/compile.py | 62 | def _apply_preproc_to_params_and_check_pipeline( |
| LOW | python/mlc_llm/interface/calibrate.py | 106 | async def send_calibration_requests( |
| LOW | python/mlc_llm/interface/convert_weight.py | 68 | def _merge_lora_adapter_with_base_model(base_source: Path, lora_adapter: Path) -> Iterator[Path]: |
| LOW | python/mlc_llm/interface/router.py | 66 | async def completion_stream_generator() -> AsyncGenerator[str, None]: |
| LOW | python/mlc_llm/op/triton.py | 495 | def _compute_expert_id_per_block( |
| LOW | python/mlc_llm/op/triton.py | 527 | def tir_compute_expert_id_per_block( |
| LOW | python/mlc_llm/op/moe_misc.py | 35 | def _gating_topk_init_local_top_k(k_val, dtype, local_top_k, local_top_k_index): |
| LOW | python/mlc_llm/op/moe_misc.py | 42 | def _gating_topk_process_value(k_val, x, local_top_k, local_top_k_index, vi, vk): |
| LOW | python/mlc_llm/op/moe_misc.py | 162 | def _get_topk_softmax_norm_func(k_val: int): |
| LOW | python/mlc_llm/op/moe_misc.py | 238 | def group_limited_greedy_topk( |
| LOW | python/mlc_llm/op/moe_misc.py | 306 | def group_limited_mask_scores( |
| LOW | python/mlc_llm/op/mrope.py | 30 | def _split_indices_from_sizes(sizes: Sequence[int]) -> List[int]: # noqa: UP006 |
| LOW | python/mlc_llm/op/mrope.py | 123 | def apply_multimodal_rotary_pos_emb( |
| LOW | python/mlc_llm/op/mrope.py | 263 | def _build_sequence_position_ids( |
| LOW | python/mlc_llm/op/ft_gemm.py | 11 | def faster_transformer_dequantize_gemm( |
| LOW | python/mlc_llm/op/ft_gemm.py | 95 | def faster_transformer_moe_gemm( |
| LOW | python/mlc_llm/compiler_pass/fuse_add_norm.py | 83 | def _get_add_rms_norm_prefill(hidden_size: int, eps: float, TX: int, in_dtype: str): |
| LOW | python/mlc_llm/compiler_pass/attach_logit_processor.py | 41 | def _get_apply_logit_bias_inplace_cpu(): |
| LOW | python/mlc_llm/compiler_pass/attach_logit_processor.py | 43 | def _apply_logit_bias_inplace( |
| LOW | python/mlc_llm/compiler_pass/attach_logit_processor.py | 72 | def _get_apply_logit_bias_inplace(target: tvm.target.Target): |
| LOW | python/mlc_llm/compiler_pass/attach_logit_processor.py | 79 | def _apply_logit_bias_inplace( |
| LOW | python/mlc_llm/compiler_pass/attach_logit_processor.py | 112 | def _get_apply_penalty_inplace_cpu(): |
| LOW | python/mlc_llm/compiler_pass/attach_logit_processor.py | 156 | def _get_apply_penalty_inplace(target: tvm.target.Target): |
| LOW | python/mlc_llm/compiler_pass/attach_logit_processor.py | 210 | def _get_apply_bitmask_inplace_cpu(): |
| LOW | python/mlc_llm/compiler_pass/attach_logit_processor.py | 246 | def _get_apply_bitmask_inplace(target: tvm.target.Target): |
| LOW | python/mlc_llm/compiler_pass/attach_sampler.py | 69 | def _attach_multinomial_sampling_func(bb: relax.BlockBuilder): |
| LOW | python/mlc_llm/compiler_pass/attach_sampler.py | 153 | def _attach_sample_with_top_p(bb: relax.BlockBuilder): |
| LOW | python/mlc_llm/compiler_pass/attach_sampler.py | 235 | def _attach_renormalize_by_top_p(bb: relax.BlockBuilder, target: tvm.target.Target): |
| LOW | …/mlc_llm/compiler_pass/attach_spec_decode_aux_funcs.py | 76 | def _add_scatter_hidden_states(bb: BlockBuilder, tensor_parallel_shards: int, dtype: str): |
| LOW | …/mlc_llm/compiler_pass/attach_spec_decode_aux_funcs.py | 102 | def _add_gather_hidden_states(bb: BlockBuilder, tensor_parallel_shards: int, dtype: str): |
| LOW | …c_llm/compiler_pass/attach_softmax_with_temperature.py | 101 | def _get_lse_and_softmax_func(target: tvm.target.Target, chunk_size: int, active_vocab_size: int): |
| LOW | …hon/mlc_llm/compiler_pass/pipeline_parallel_rewrite.py | 249 | def _prepare_stage_func_params_and_args( |
| LOW | …hon/mlc_llm/compiler_pass/pipeline_parallel_rewrite.py | 368 | def _analyze_required_func_params( |
| LOW | …on/mlc_llm/compiler_pass/dispatch_kv_cache_creation.py | 147 | def create_tir_paged_kv_cache( |
| LOW | …on/mlc_llm/compiler_pass/dispatch_kv_cache_creation.py | 187 | def create_flashinfer_paged_kv_cache( |
| LOW | python/mlc_llm/protocol/conversation_protocol.py | 229 | def _strip_reasoning_in_history( |
| LOW | python/mlc_llm/protocol/conversation_protocol.py | 255 | def _combine_consecutive_messages(messages: List[Any]) -> List[Any]: # noqa: UP006 |
| LOW | python/mlc_llm/protocol/mlc_chat_config.py | 65 | def get_system_defaults_for_missing_fields(self) -> Dict[str, Any]: # noqa: UP006 |
| LOW | python/mlc_llm/protocol/openai_api_protocol.py | 366 | def check_function_call_usage(self, conv_template: Conversation) -> None: |
| LOW | python/mlc_llm/protocol/openai_api_protocol.py | 450 | def openai_api_get_unsupported_fields( |
| LOW | python/mlc_llm/protocol/error_protocol.py | 33 | async def bad_request_error_handler(_request: fastapi.Request, e: BadRequestError): |
| LOW | python/mlc_llm/cli/package.py | 26 | def _parse_mlc_llm_source_dir(path: str) -> Path: |
| LOW | python/mlc_llm/cli/delivery.py | 219 | def _generate_model_delivery_diff( |
| LOW | python/mlc_llm/contrib/embeddings/openai.py | 146 | async def _aget_len_safe_embeddings( |
| LOW | python/mlc_llm/quantization/per_tensor_quantization.py | 246 | def _compute_quantized_tensor(weight: te.Tensor, scale: Optional[te.Tensor]) -> te.Tensor: |
| LOW | python/mlc_llm/quantization/model_quantization.py | 20 | def make_quantization_functions( |
| LOW | python/mlc_llm/quantization/block_scale_quantization.py | 735 | def broadcast_activation_scale( |
| LOW | python/mlc_llm/testing/debug_chat.py | 406 | def _softmax_with_temperature(self, logits: np.ndarray, temperature: float): |
| LOW | python/mlc_llm/testing/debug_chat.py | 415 | def _apply_presence_and_freq_penalty( |
| 155 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | python/mlc_llm/nn/__init__.py | 0 | common `nn.modules` used to define llms in this project. |
| HIGH | python/mlc_llm/model/phi3/__init__.py | 0 | common `nn.modules` used to define llms in this project. |
| HIGH | python/mlc_llm/model/vision/__init__.py | 0 | common `nn.modules` used to define llms in this project. |
| HIGH | python/mlc_llm/quantization/group_quantization.py | 0 | override to() such that we do not convert bias if there is an out_dtype. otherwise, we might run into dtype mismatch whe |
| HIGH | python/mlc_llm/quantization/per_tensor_quantization.py | 0 | override to() such that we do not convert bias if there is an out_dtype. otherwise, we might run into dtype mismatch whe |
| HIGH | python/mlc_llm/quantization/ft_quantization.py | 0 | override to() such that we do not convert bias if there is an out_dtype. otherwise, we might run into dtype mismatch whe |
| HIGH | python/mlc_llm/quantization/block_scale_quantization.py | 0 | override to() such that we do not convert bias if there is an out_dtype. otherwise, we might run into dtype mismatch whe |
| HIGH | python/mlc_llm/quantization/block_scale_quantization.py | 0 | override to() such that we do not convert bias if there is an out_dtype. otherwise, we might run into dtype mismatch whe |
| HIGH | python/mlc_llm/quantization/awq_quantization.py | 0 | override to() such that we do not convert bias if there is an out_dtype. otherwise, we might run into dtype mismatch whe |
| HIGH | python/mlc_llm/model/llama4/llama4_model.py | 0 | the embedding module that can be shared with the final lm_head. from qwen2embedding. |
| HIGH | python/mlc_llm/model/olmo/olmo_model.py | 0 | the embedding module that can be shared with the final lm_head. from qwen2embedding. |
| HIGH | python/mlc_llm/model/llama/llama_model.py | 0 | the embedding module that can be shared with the final lm_head. from qwen2embedding. |
| HIGH | python/mlc_llm/model/llama4/llama4_model.py | 0 | the lm_head forwarding, which transposes the weight and multiplies with the input tensor. |
| HIGH | python/mlc_llm/model/ministral3/ministral3_model.py | 0 | the lm_head forwarding, which transposes the weight and multiplies with the input tensor. |
| HIGH | python/mlc_llm/model/cohere/cohere_model.py | 0 | the lm_head forwarding, which transposes the weight and multiplies with the input tensor. |
| HIGH | python/mlc_llm/model/gemma/gemma_model.py | 0 | the lm_head forwarding, which transposes the weight and multiplies with the input tensor. |
| HIGH | python/mlc_llm/model/olmo2/olmo2_model.py | 0 | the lm_head forwarding, which transposes the weight and multiplies with the input tensor. |
| HIGH | python/mlc_llm/model/olmo/olmo_model.py | 0 | the lm_head forwarding, which transposes the weight and multiplies with the input tensor. |
| HIGH | python/mlc_llm/model/phi3/phi3_model.py | 0 | the lm_head forwarding, which transposes the weight and multiplies with the input tensor. |
| HIGH | python/mlc_llm/model/qwen2/qwen2_model.py | 0 | the lm_head forwarding, which transposes the weight and multiplies with the input tensor. |
| HIGH | python/mlc_llm/model/minicpm/minicpm_model.py | 0 | the lm_head forwarding, which transposes the weight and multiplies with the input tensor. |
| HIGH | python/mlc_llm/model/qwen3/qwen3_model.py | 0 | the lm_head forwarding, which transposes the weight and multiplies with the input tensor. |
| HIGH | python/mlc_llm/model/nemotron/nemotron_model.py | 0 | the lm_head forwarding, which transposes the weight and multiplies with the input tensor. |
| HIGH | python/mlc_llm/model/llama/llama_model.py | 0 | the lm_head forwarding, which transposes the weight and multiplies with the input tensor. |
| HIGH | python/mlc_llm/model/phi3/phi3_loader.py | 0 | this file specifies how mlc's phi parameter maps from other formats, for example huggingface pytorch, huggingface safete |
| HIGH | python/mlc_llm/model/phi3v/phi3v_loader.py | 0 | this file specifies how mlc's phi parameter maps from other formats, for example huggingface pytorch, huggingface safete |
| HIGH | python/mlc_llm/model/phi/phi_loader.py | 0 | this file specifies how mlc's phi parameter maps from other formats, for example huggingface pytorch, huggingface safete |
| HIGH | python/mlc_llm/model/phi3/phi3_loader.py | 0 | returns a parameter mapping that maps from the names of mlc llm parameters to the names of phi-1/phi-1.5 huggingface pyt |
| HIGH | python/mlc_llm/model/phi3v/phi3v_loader.py | 0 | returns a parameter mapping that maps from the names of mlc llm parameters to the names of phi-1/phi-1.5 huggingface pyt |
| HIGH | python/mlc_llm/model/phi/phi_loader.py | 0 | returns a parameter mapping that maps from the names of mlc llm parameters to the names of phi-1/phi-1.5 huggingface pyt |
| HIGH | python/mlc_llm/model/qwen2/qwen2_loader.py | 0 | this file specifies how mlc's qwen2 parameter maps from other formats, for example huggingface pytorch, huggingface safe |
| HIGH | python/mlc_llm/model/qwen3/qwen3_loader.py | 0 | this file specifies how mlc's qwen2 parameter maps from other formats, for example huggingface pytorch, huggingface safe |
| HIGH | python/mlc_llm/model/qwen2_moe/qwen2_moe_loader.py | 0 | this file specifies how mlc's qwen2 parameter maps from other formats, for example huggingface pytorch, huggingface safe |
| HIGH | python/mlc_llm/model/qwen3_moe/qwen3_moe_loader.py | 0 | this file specifies how mlc's qwen2 parameter maps from other formats, for example huggingface pytorch, huggingface safe |
| HIGH | python/mlc_llm/model/rwkv5/rwkv5_model.py | 0 | same as llamaforcausallm, except for the use of sliding window attention. |
| HIGH | python/mlc_llm/model/mistral/mistral_model.py | 0 | same as llamaforcausallm, except for the use of sliding window attention. |
| HIGH | python/mlc_llm/model/rwkv6/rwkv6_model.py | 0 | same as llamaforcausallm, except for the use of sliding window attention. |
| HIGH | python/mlc_llm/serve/sync_engine.py | 0 | reset the engine, clean up all running data and metrics. |
| HIGH | python/mlc_llm/serve/sync_engine.py | 0 | reset the engine, clean up all running data and metrics. |
| HIGH | python/mlc_llm/serve/entrypoints/debug_entrypoints.py | 0 | reset the engine, clean up all running data and metrics. |
| HIGH | tests/python/serve/test_serve_engine_spec.py | 0 | test engine **with continuous batching**. - add all requests to the engine altogether in the beginning. - all requests h |
| HIGH | tests/python/serve/test_serve_engine_spec.py | 0 | test engine **with continuous batching**. - add all requests to the engine altogether in the beginning. - all requests h |
| HIGH | tests/python/serve/test_serve_sync_engine.py | 0 | test engine **with continuous batching**. - add all requests to the engine altogether in the beginning. - all requests h |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | version.py | 16 | # --------------------------------------------------- |
| MEDIUM | version.py | 27 | # --------------------------------------------------- |
| MEDIUM | python/mlc_llm/op/triton.py | 173 | # ----------------------------------------------------------- |
| MEDIUM | python/mlc_llm/op/triton.py | 186 | # ---------------------------------------------------------- |
| MEDIUM | python/mlc_llm/op/triton.py | 218 | # ----------------------------------------------------------- |
| MEDIUM | python/mlc_llm/op/triton.py | 261 | # ----------------------------------------------------------- |
| MEDIUM | python/mlc_llm/model/qwen35/qwen35_model.py | 214 | # ============================================================================ |
| MEDIUM | python/mlc_llm/model/qwen35/qwen35_model.py | 216 | # ============================================================================ |
| MEDIUM | python/mlc_llm/model/qwen35/qwen35_model.py | 372 | # ============================================================================ |
| MEDIUM | python/mlc_llm/model/qwen35/qwen35_model.py | 374 | # ============================================================================ |
| MEDIUM | python/mlc_llm/model/qwen35/qwen35_model.py | 600 | # ============================================================================ |
| MEDIUM | python/mlc_llm/model/qwen35/qwen35_model.py | 602 | # ============================================================================ |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 65 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 67 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 76 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 78 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 94 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 96 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 114 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 116 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 147 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 149 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 169 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 171 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 183 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 185 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 222 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 224 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 26 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 28 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 204 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 206 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 334 | # =================================================================== |
| MEDIUM | tests/python/serve/test_embedding_engine.py | 336 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 217 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 219 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 229 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 231 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 254 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 256 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 277 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 279 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 322 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 324 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 39 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 41 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 66 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 68 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 121 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 123 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 203 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 205 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 297 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 299 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 344 | # =================================================================== |
| MEDIUM | tests/python/serve/server/test_embedding_server.py | 346 | # =================================================================== |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | examples/rest/nodejs/sample_client.js | 28 | print('\n'); |
| HIGH | examples/rest/nodejs/sample_client.js | 32 | print(color.BOLD + "Without streaming:" + color.END); |
| HIGH | examples/rest/nodejs/sample_client.js | 37 | print(color.GREEN + r.body.choices[0].message.content + color.END); |
| HIGH | examples/rest/nodejs/sample_client.js | 38 | print('\n'); |
| HIGH | examples/rest/nodejs/sample_client.js | 41 | print(color.BOLD + "Reset chat" + color.END); |
| HIGH | examples/rest/nodejs/sample_client.js | 52 | print( color.BOLD + "With streaming:" + color.END); |
| HIGH | examples/rest/nodejs/sample_client.js | 62 | print(color.GREEN + jsData.choices[0].delta.content + color.END); |
| HIGH | examples/rest/nodejs/sample_client.js | 68 | print(color.BOLD + "Runtime stats:" + color.END + txtresp.body); |
| HIGH | examples/rest/nodejs/sample_langchain.ts | 26 | print('\n'); |
| HIGH | examples/rest/nodejs/sample_langchain.ts | 51 | print(color.BOLD + input + "..." + color.END); |
| HIGH | examples/rest/nodejs/sample_langchain.ts | 55 | print(color.GREEN + res.text + color.END); |
| HIGH | examples/rest/nodejs/sample_langchain.ts | 58 | print(color.BOLD + input + "..." + color.END); |
| HIGH | examples/rest/nodejs/sample_langchain.ts | 62 | print(color.GREEN + res.text + color.END); |
| HIGH | examples/rest/nodejs/sample_langchain.ts | 71 | print(color.BOLD + "Query: " + color.END + color.BLUE + query + color.END); |
| HIGH | examples/rest/nodejs/sample_langchain.ts | 74 | print(color.BOLD + "Response: " + color.END + color.GREEN + result.text + color.END); |
| HIGH | examples/rest/nodejs/sample_openai.js | 32 | print('\n'); |
| HIGH | examples/rest/nodejs/sample_openai.js | 37 | print(color.BOLD + "OpenAI chat completion example without streaming:" + color.END); |
| HIGH | examples/rest/nodejs/sample_openai.js | 46 | print(color.GREEN + completion.data.choices[0].message.content + color.END) |
| HIGH | examples/rest/nodejs/sample_openai.js | 53 | print(color.BOLD + "OpenAI chat completion example with streaming:" + color.END); |
| HIGH | examples/rest/nodejs/sample_openai.js | 63 | print(color.GREEN + parsed.choices[0].delta.content + color.END); |
| HIGH | examples/rest/nodejs/sample_openai.js | 70 | print(color.BOLD + "OpenAI completion example:" + color.END) |
| HIGH | examples/rest/nodejs/sample_openai.js | 73 | print(color.GREEN + res.data.choices[0].text + color.END); |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | python/mlc_llm/__init__.py | 8 | |
| LOW | python/mlc_llm/__init__.py | 8 | |
| LOW | python/mlc_llm/__init__.py | 9 | |
| LOW | python/mlc_llm/__init__.py | 10 | |
| LOW | python/mlc_llm/__init__.py | 10 | |
| LOW | python/mlc_llm/tokenizers/__init__.py | 3 | |
| LOW | python/mlc_llm/tokenizers/__init__.py | 3 | |
| LOW | python/mlc_llm/tokenizers/__init__.py | 4 | |
| LOW | python/mlc_llm/json_ffi/__init__.py | 8 | |
| LOW | python/mlc_llm/nn/__init__.py | 3 | |
| LOW | python/mlc_llm/nn/__init__.py | 4 | |
| LOW | python/mlc_llm/nn/__init__.py | 4 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 8 | |
| LOW | python/mlc_llm/conversation_template/__init__.py | 38 | |
| LOW | python/mlc_llm/op/mrope.py | 3 | |
| LOW | python/mlc_llm/op/__init__.py | 3 | |
| LOW | python/mlc_llm/op/__init__.py | 3 | |
| LOW | python/mlc_llm/op/__init__.py | 4 | |
| LOW | python/mlc_llm/op/__init__.py | 5 | |
| LOW | python/mlc_llm/op/__init__.py | 6 | |
| LOW | python/mlc_llm/op/__init__.py | 6 | |
| LOW | python/mlc_llm/op/__init__.py | 6 | |
| LOW | python/mlc_llm/op/__init__.py | 7 | |
| LOW | python/mlc_llm/op/__init__.py | 8 | |
| LOW | python/mlc_llm/op/__init__.py | 8 | |
| LOW | python/mlc_llm/op/__init__.py | 8 | |
| LOW | python/mlc_llm/op/__init__.py | 8 | |
| LOW | python/mlc_llm/op/__init__.py | 14 | |
| LOW | python/mlc_llm/op/__init__.py | 15 | |
| LOW | python/mlc_llm/op/__init__.py | 15 | |
| LOW | python/mlc_llm/compiler_pass/blas_dispatch.py | 8 | |
| LOW | python/mlc_llm/compiler_pass/blas_dispatch.py | 9 | |
| LOW | python/mlc_llm/compiler_pass/__init__.py | 3 | |
| 60 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | version.py | 123 | |
| LOW | python/setup.py | 87 | |
| LOW | python/mlc_llm/__main__.py | 11 | |
| LOW | python/mlc_llm/json_ffi/engine.py | 38 | |
| LOW | python/mlc_llm/bench/dataset.py | 283 | |
| LOW | python/mlc_llm/bench/dataset.py | 735 | |
| LOW | python/mlc_llm/bench/evaluation/mmlu.py | 128 | |
| LOW | python/mlc_llm/interface/gen_config.py | 89 | |
| LOW | python/mlc_llm/interface/chat.py | 249 | |
| LOW | python/mlc_llm/op/moe_misc.py | 63 | |
| LOW | python/mlc_llm/op/moe_misc.py | 135 | |
| LOW | python/mlc_llm/op/moe_misc.py | 453 | |
| LOW | python/mlc_llm/op/moe_misc.py | 87 | |
| LOW | python/mlc_llm/op/moe_misc.py | 162 | |
| LOW | python/mlc_llm/op/moe_misc.py | 493 | |
| LOW | python/mlc_llm/op/moe_misc.py | 89 | |
| LOW | python/mlc_llm/op/moe_misc.py | 176 | |
| LOW | python/mlc_llm/op/moe_matmul.py | 12 | |
| LOW | python/mlc_llm/op/moe_matmul.py | 77 | |
| LOW | python/mlc_llm/op/moe_matmul.py | 179 | |
| LOW | python/mlc_llm/op/moe_matmul.py | 298 | |
| LOW | python/mlc_llm/op/moe_matmul.py | 385 | |
| LOW | python/mlc_llm/op/moe_matmul.py | 562 | |
| LOW | python/mlc_llm/op/moe_matmul.py | 50 | |
| LOW | python/mlc_llm/op/moe_matmul.py | 148 | |
| LOW | python/mlc_llm/op/moe_matmul.py | 237 | |
| LOW | python/mlc_llm/op/moe_matmul.py | 261 | |
| LOW | python/mlc_llm/op/moe_matmul.py | 351 | |
| LOW | python/mlc_llm/op/moe_matmul.py | 423 | |
| LOW | python/mlc_llm/op/moe_matmul.py | 633 | |
| LOW | python/mlc_llm/op/batch_spec_verify.py | 8 | |
| LOW | python/mlc_llm/op/batch_spec_verify.py | 59 | |
| LOW | python/mlc_llm/op/top_p_pivot.py | 11 | |
| LOW | python/mlc_llm/op/top_p_pivot.py | 268 | |
| LOW | python/mlc_llm/op/top_p_pivot.py | 53 | |
| LOW | python/mlc_llm/op/top_p_pivot.py | 301 | |
| LOW | python/mlc_llm/op/cutlass.py | 9 | |
| LOW | python/mlc_llm/compiler_pass/fuse_add_norm.py | 14 | |
| LOW | python/mlc_llm/compiler_pass/fuse_add_norm.py | 83 | |
| LOW | python/mlc_llm/compiler_pass/fuse_add_norm.py | 22 | |
| LOW | python/mlc_llm/compiler_pass/fuse_add_norm.py | 91 | |
| LOW | …thon/mlc_llm/compiler_pass/lift_global_buffer_alloc.py | 156 | |
| LOW | …hon/mlc_llm/compiler_pass/pipeline_parallel_rewrite.py | 35 | |
| LOW | …hon/mlc_llm/compiler_pass/pipeline_parallel_rewrite.py | 105 | |
| LOW | python/mlc_llm/protocol/conversation_protocol.py | 120 | |
| LOW | python/mlc_llm/cli/delivery.py | 219 | |
| LOW | python/mlc_llm/cli/lib_delivery.py | 122 | |
| LOW | python/mlc_llm/cli/model_metadata.py | 88 | |
| LOW | python/mlc_llm/quantization/ft_quantization.py | 170 | |
| LOW | python/mlc_llm/testing/debug_chat.py | 98 | |
| LOW | python/mlc_llm/support/auto_device.py | 24 | |
| LOW | python/mlc_llm/support/auto_device.py | 52 | |
| LOW | python/mlc_llm/support/download_cache.py | 127 | |
| LOW | python/mlc_llm/loader/huggingface_loader.py | 208 | |
| LOW | python/mlc_llm/loader/huggingface_loader.py | 102 | |
| LOW | python/mlc_llm/loader/utils.py | 55 | |
| LOW | python/mlc_llm/loader/standard_loader.py | 23 | |
| LOW | python/mlc_llm/loader/standard_loader.py | 69 | |
| LOW | python/mlc_llm/model/qwen35/qwen35_loader.py | 36 | |
| LOW | python/mlc_llm/model/qwen35/qwen35_model.py | 219 | |
| 46 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | python/setup.py | 76 | """This class is needed in order to create OS specific wheels.""" |
| MEDIUM | python/mlc_llm/json_ffi/engine.py | 84 | # Create the background engine-driving thread and start the loop. |
| MEDIUM | python/mlc_llm/bench/request_processor.py | 85 | # Create a new list so that the in-place shuffle does not mutate the input list. |
| MEDIUM | python/mlc_llm/bench/request_processor.py | 109 | # Create a new list so that the in-place shuffle does not mutate the input list. |
| MEDIUM | python/mlc_llm/bench/dataset.py | 254 | # Create a dummy ChatCompletionRequest. |
| MEDIUM | python/mlc_llm/bench/dataset.py | 576 | # Create a dummy ChatCompletionRequest. |
| MEDIUM | …hon/mlc_llm/compiler_pass/pipeline_parallel_rewrite.py | 164 | # Create the param for the shape variables. |
| MEDIUM | python/mlc_llm/cli/router.py | 11 | # Define a custom argument type for a list of strings |
| MEDIUM | python/mlc_llm/serve/sync_engine.py | 236 | # Define the callback function for request generation results |
| MEDIUM | python/mlc_llm/serve/engine.py | 1343 | # Create the request with the given id, input data, generation |
| MEDIUM | python/mlc_llm/serve/engine.py | 1350 | # Create the unique async request stream of the request. |
| MEDIUM | python/mlc_llm/serve/engine.py | 1867 | # Create the request with the given id, input data, generation |
| MEDIUM | tests/python/serve/test_serve_engine_spec.py | 77 | # Define the callback function for request generation results |
| MEDIUM | tests/python/serve/test_serve_engine_spec.py | 141 | # Define the callback function for request generation results |
| MEDIUM | tests/python/serve/test_serve_engine_spec.py | 213 | # Define the callback class for request generation results |
| MEDIUM | tests/python/serve/test_serve_engine_spec.py | 295 | # Define the callback class for request generation results |
| MEDIUM | tests/python/serve/test_serve_engine_spec.py | 477 | # Define the callback function for request generation results |
| MEDIUM | tests/python/serve/test_serve_engine_spec.py | 536 | # Define the callback function for request generation results |
| MEDIUM | tests/python/serve/test_serve_engine_spec.py | 602 | # Define the callback function for request generation results |
| MEDIUM | tests/python/serve/test_serve_sync_engine.py | 75 | # Define the callback function for request generation results |
| MEDIUM | tests/python/serve/test_serve_sync_engine.py | 137 | # Define the callback class for request generation results |
| MEDIUM | tests/python/serve/test_serve_sync_engine.py | 217 | # Define the callback class for request generation results |
| MEDIUM | tests/python/serve/test_serve_sync_engine.py | 297 | # Define the callback class for request generation results |
| MEDIUM | tests/python/serve/test_serve_sync_engine.py | 409 | # Define the callback class for request generation results |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/python/serve/test_serve_engine_spec.py | 67 | # Hyperparameters for tests (you can try different combinations). |
| MEDIUM | tests/python/serve/test_serve_engine_spec.py | 131 | # Hyperparameters for tests (you can try different combinations). |
| MEDIUM | tests/python/serve/test_serve_engine_spec.py | 201 | # Hyperparameters for tests (you can try different combinations) |
| MEDIUM | tests/python/serve/test_serve_engine_spec.py | 283 | # Hyperparameters for tests (you can try different combinations) |
| MEDIUM | tests/python/serve/test_serve_engine_spec.py | 467 | # Hyperparameters for tests (you can try different combinations). |
| MEDIUM | tests/python/serve/test_serve_engine_spec.py | 526 | # Hyperparameters for tests (you can try different combinations). |
| MEDIUM | tests/python/serve/test_serve_engine_spec.py | 592 | # Hyperparameters for tests (you can try different combinations). |
| MEDIUM | tests/python/serve/test_serve_sync_engine.py | 65 | # Hyperparameters for tests (you can try different combinations). |
| MEDIUM | tests/python/serve/test_serve_sync_engine.py | 125 | # Hyperparameters for tests (you can try different combinations) |
| MEDIUM | tests/python/serve/test_serve_sync_engine.py | 205 | # Hyperparameters for tests (you can try different combinations) |
| MEDIUM | tests/python/serve/test_serve_sync_engine.py | 284 | # Hyperparameters for tests (you can try different combinations) |
| MEDIUM | tests/python/serve/test_serve_sync_engine.py | 398 | # Hyperparameters for tests (you can try different combinations) |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | …on/mlc_llm/compiler_pass/dispatch_kv_cache_creation.py | 30 | # Check if attn_kind is a single value or a list with length of hidden layers |
| LOW | python/mlc_llm/serve/embedding_engine.py | 128 | # Check if the post-processor actually appends a special token at the end |
| LOW | python/mlc_llm/serve/server/popen_server.py | 147 | # Check if the subprocess terminates unexpectedly or |
| LOW | tests/python/json_ffi/test_json_ffi_engine_image.py | 62 | # Print output. |
| LOW | tests/python/json_ffi/test_json_ffi_engine.py | 81 | # Print output. |
| LOW | tests/python/json_ffi/test_json_ffi_engine.py | 140 | # Print output. |
| LOW | tests/python/serve/test_serve_engine_rnn.py | 45 | # Print output. |
| LOW | tests/python/serve/test_serve_engine.py | 46 | # Print output. |
| LOW | tests/python/serve/test_serve_engine.py | 91 | # Print output. |
| LOW | tests/python/serve/test_serve_engine.py | 135 | # Print output. |
| LOW | tests/python/serve/test_serve_engine.py | 179 | # Print output. |
| LOW | tests/python/serve/test_serve_engine.py | 222 | # Print output. |
| LOW | tests/python/serve/test_serve_async_engine_spec.py | 69 | # Print output. |
| LOW | tests/python/serve/test_serve_async_engine.py | 66 | # Print output. |
| LOW | tests/python/serve/test_serve_async_engine.py | 117 | # Print output. |
| LOW | tests/python/serve/test_serve_async_engine.py | 167 | # Print output. |
| LOW | tests/python/serve/test_serve_async_engine.py | 217 | # Print output. |
| LOW | tests/python/serve/test_serve_async_engine.py | 266 | # Print output. |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | .pre-commit-config.yaml | 1 | # To use: |
| LOW | pyproject.toml | 1 | # Licensed to the Apache Software Foundation (ASF) under one |
| LOW | python/mlc_llm/interface/router.py | 81 | output_texts = [""] * request.n |
| LOW | …c_llm/compiler_pass/attach_softmax_with_temperature.py | 101 | def _get_lse_and_softmax_func(target: tvm.target.Target, chunk_size: int, active_vocab_size: int): |
| LOW | python/mlc_llm/protocol/conversation_protocol.py | 41 | # the system message below. |
| LOW | python/mlc_llm/cli/worker.py | 1 | # Licensed to the Apache Software Foundation (ASF) under one |
| LOW | python/mlc_llm/cli/disco_remote_socket_session.py | 1 | # Licensed to the Apache Software Foundation (ASF) under one |
| LOW | python/mlc_llm/model/model_preset.py | 1581 | # "architectures": ["BertModel"], |
| LOW | python/mlc_llm/model/model_preset.py | 1981 | "vocab_size": 49152, |
| LOW | python/mlc_llm/model/model_preset.py | 2001 | # "rms_norm_eps": 1e-05, |
| LOW | tests/python/conftest.py | 1 | # Licensed to the Apache Software Foundation (ASF) under one |
| LOW | ios/MLCSwift/Sources/ObjC/LLMEngine.mm | 1 | // |
| LOW | …gineExample/MLCEngineExample/MLCEngineExampleApp.swift | 1 | // This is a minimum example App to interact with MLC Engine |
| LOW | cpp/base.h | 1 | /*! |
| LOW | cpp/tokenizers/tokenizers.h | 1 | /*! |
| LOW | cpp/json_ffi/openai_api_protocol.h | 1 | /*! |
| LOW | cpp/json_ffi/conv_template.h | 1 | #ifndef MLC_LLM_JSON_FFI_CONV_TEMPLATE_H |
| LOW | cpp/serve/config.h | 1 | /*! |
| LOW | cpp/serve/data.h | 1 | /*! |
| LOW | cpp/serve/model.h | 1 | /*! |
| LOW | cpp/serve/prefix_cache.h | 1 | /*! |
| LOW | cpp/serve/request_state.h | 1 | /*! |
| LOW | cpp/serve/function_table.h | 1 | /*! |
| LOW | examples/rest/python/sample_langchain.py | 161 | |
| LOW | android/mlc4j/src/cpp/tvm_runtime.h | 1 | #define TVM_USE_LIBBACKTRACE 0 |
| LOW | android/mlc4j/src/cpp/tvm_runtime.h | 21 | #include <runtime/cpu_device_api.cc> |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | python/mlc_llm/json_ffi/engine.py | 70 | except Exception as exception: |
| LOW | python/mlc_llm/bench/api_endpoint.py | 139 | except Exception: |
| LOW | python/mlc_llm/bench/api_endpoint.py | 269 | except Exception: |
| LOW | python/mlc_llm/bench/api_endpoint.py | 383 | except Exception: |
| LOW | python/mlc_llm/bench/__main__.py | 125 | except Exception: |
| LOW | python/mlc_llm/interface/gen_config.py | 212 | except Exception: |
| LOW | python/mlc_llm/interface/gen_config.py | 234 | except Exception: |
| LOW | python/mlc_llm/interface/gen_config.py | 281 | except Exception: |
| LOW | …on/mlc_llm/compiler_pass/dispatch_kv_cache_creation.py | 236 | except Exception as e: |
| LOW | python/mlc_llm/cli/delivery.py | 198 | except Exception as exc: |
| LOW | python/mlc_llm/cli/check_device.py | 13 | except Exception: |
| LOW | python/mlc_llm/cli/model_metadata.py | 175 | except Exception: |
| LOW | python/mlc_llm/support/auto_device.py | 40 | except Exception as err: |
| LOW | python/mlc_llm/support/auto_target.py | 119 | except Exception as err: |
| LOW | python/mlc_llm/serve/engine.py | 1034 | except Exception as err: |
| LOW | python/mlc_llm/serve/engine.py | 1233 | except Exception as err: |
| LOW | python/mlc_llm/serve/engine.py | 1305 | except Exception as err: |
| LOW | python/mlc_llm/serve/engine.py | 1381 | except Exception as exception: |
| LOW | python/mlc_llm/serve/server/popen_server.py | 143 | except Exception: |
| LOW | python/mlc_llm/serve/entrypoints/openai_entrypoints.py | 78 | except Exception as exc: |
| LOW | python/mlc_llm/router/router.py | 304 | except Exception as e: |
| LOW | tests/python/integration/test_model_compile.py | 147 | except Exception: |
| LOW | scripts/check_url_validity.py | 36 | except Exception as e: |
| MEDIUM | scripts/check_url_validity.py | 37 | print(f"Error connecting {url}, error: {e}") |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | python/mlc_llm/serve/engine.py | 1354 | stream.push( |
| HIGH | python/mlc_llm/serve/engine_base.py | 518 | stream.push([output]) |
| HIGH | python/mlc_llm/serve/engine_base.py | 545 | stream.push(outputs) |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | version.py | 90 | # Remove the v prefix, mainly to be robust |
| MEDIUM | python/setup.py | 76 | """This class is needed in order to create OS specific wheels.""" |
| MEDIUM | python/mlc_llm/interface/chat.py | 173 | # TODO(mlc-team): possibly leverage debug option |
| LOW | python/mlc_llm/serve/engine.py | 1032 | # for cancelled error, we can simply pass it through |
| LOW | python/mlc_llm/serve/engine.py | 1231 | # for cancelled error, we can simply pass it through |
| LOW | python/mlc_llm/serve/engine.py | 1303 | # for cancelled error, we can simply pass it through |
| LOW | python/mlc_llm/serve/engine.py | 1379 | # for cancelled error, we can simply pass it through |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | …gineExample/MLCEngineExample/MLCEngineExampleApp.swift | 35 | // Step 0: load the engine |