Transforms complex documents like PDFs and Office docs into LLM-ready markdown/JSON for your Agentic workflows.
1125 matches across 12 categories. Click a row to expand file-level details.
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | demo/demo.py | 81 | def prepare_local_api_temp_dir() -> None: |
| LOW | tests/unittest/test_e2e.py | 23 | def test_pipeline_with_two_config(): |
| LOW | mineru/utils/model_utils.py | 116 | def remove_nested_ocr_text_blocks( |
| LOW | mineru/utils/model_utils.py | 148 | def get_res_list_from_layout_res(layout_res, overlap_threshold=0.8): |
| LOW | mineru/utils/pdfium_guard.py | 30 | def get_pdfium_document_page_count(pdf_doc) -> int: |
| LOW | mineru/utils/pdfium_guard.py | 42 | def rewrite_pdf_bytes_with_pdfium( |
| LOW | mineru/utils/models_download_utils.py | 9 | def auto_download_and_get_model_root_path(relative_path: str, repo_mode='pipeline') -> str: |
| LOW | mineru/utils/check_sys_env.py | 26 | def is_mac_os_version_supported(min_version: str = "13.5") -> bool: |
| LOW | mineru/utils/llm_aided.py | 20 | def _get_title_line_avg_height(block): |
| LOW | mineru/utils/llm_aided.py | 44 | def _collect_title_block_refs(page_info_list): |
| LOW | mineru/utils/llm_aided.py | 71 | def _build_title_optimize_prompt(title_dict): |
| LOW | mineru/utils/llm_aided.py | 114 | def _build_relative_title_optimize_prompt(title_dict): |
| LOW | mineru/utils/llm_aided.py | 235 | def _get_title_block_identity(block): |
| LOW | mineru/utils/llm_aided.py | 247 | def _sync_para_titles_to_preproc(page_info_list): |
| LOW | mineru/utils/llm_aided.py | 270 | def _run_single_pass_title_leveling(title_block_refs, title_aided_config): |
| LOW | mineru/utils/llm_aided.py | 276 | def _split_paragraph_title_groups(title_block_refs): |
| LOW | mineru/utils/llm_aided.py | 295 | def _offset_paragraph_title_levels(levels_by_index): |
| LOW | mineru/utils/llm_aided.py | 305 | def _request_paragraph_group_levels(title_block_refs, title_aided_config): |
| LOW | mineru/utils/llm_aided.py | 315 | def _run_grouped_title_leveling(title_block_refs, title_aided_config): |
| LOW | mineru/utils/config_reader.py | 122 | def get_ocr_det_mask_inline_formula_enable(enable): |
| LOW | mineru/utils/config_reader.py | 128 | def get_processing_window_size(default: int = 64) -> int: |
| LOW | mineru/utils/config_reader.py | 142 | def get_max_concurrent_requests(default: int = 3) -> int: |
| LOW | mineru/utils/config_reader.py | 165 | def get_latex_delimiter_config(): |
| LOW | mineru/utils/magic_model_utils.py | 79 | def calc_effective_index_diff(obj_index: int, sub_index: int) -> int: |
| LOW | mineru/utils/visual_magic_model_utils.py | 101 | def fallback_inline_caption_fragments(blocks, visual_main_types): |
| LOW | mineru/utils/visual_magic_model_utils.py | 132 | def fallback_leading_table_continuation_captions(blocks, visual_main_types): |
| LOW | mineru/utils/visual_magic_model_utils.py | 175 | def _is_leading_continuation_text_block(block): |
| LOW | mineru/utils/visual_magic_model_utils.py | 193 | def is_transparent_visual_relation_block(block): |
| LOW | mineru/utils/visual_magic_model_utils.py | 204 | def _is_leading_continuation_cluster_near_table(leading_blocks, table_block): |
| LOW | mineru/utils/visual_magic_model_utils.py | 226 | def fallback_stacked_table_caption_fragments(blocks, visual_main_types): |
| LOW | mineru/utils/visual_magic_model_utils.py | 267 | def find_stacked_table_caption_cluster(table_block, blocks): |
| LOW | mineru/utils/visual_magic_model_utils.py | 303 | def find_last_caption_position(caption_cluster): |
| LOW | mineru/utils/visual_magic_model_utils.py | 311 | def is_horizontally_near_table(block, table_block): |
| LOW | mineru/utils/visual_magic_model_utils.py | 323 | def is_single_line_caption_fragment(block): |
| LOW | mineru/utils/visual_magic_model_utils.py | 333 | def find_previous_effective_block(ordered_blocks, pos): |
| LOW | mineru/utils/visual_magic_model_utils.py | 342 | def find_next_effective_block(ordered_blocks, pos): |
| LOW | mineru/utils/visual_magic_model_utils.py | 351 | def is_inline_caption_fragment(previous_caption, text_block, next_visual): |
| LOW | mineru/utils/visual_magic_model_utils.py | 477 | def absorb_image_block_members(blocks): |
| LOW | mineru/utils/visual_magic_model_utils.py | 622 | def effective_visual_index_diff( |
| LOW | mineru/utils/visual_magic_model_utils.py | 678 | def is_block_outside_visual_gap(between_block, child_block, main_block): |
| LOW | mineru/utils/visual_magic_model_utils.py | 697 | def vertical_gap_between_blocks(first_block, second_block): |
| LOW | mineru/utils/visual_magic_model_utils.py | 708 | def is_bbox_intersecting_vertical_gap(bbox, vertical_gap): |
| LOW | mineru/utils/visual_magic_model_utils.py | 714 | def is_bbox_overlapping_visual_relation_block(bbox, child_bbox, main_bbox): |
| LOW | mineru/utils/guess_suffix_or_lang.py | 41 | def _normalize_text_for_language_guess(code: str) -> str: |
| LOW | mineru/utils/guess_suffix_or_lang.py | 94 | def _ooxml_relationship_targets(root: ElementTree.Element) -> list[str]: |
| LOW | mineru/utils/guess_suffix_or_lang.py | 113 | def _ooxml_content_type_overrides(root: ElementTree.Element) -> dict[str, str]: |
| LOW | mineru/utils/guess_suffix_or_lang.py | 129 | def _guess_ooxml_suffix_from_zip(package: ZipFile) -> str | None: |
| LOW | mineru/utils/guess_suffix_or_lang.py | 142 | def _guess_ooxml_suffix_by_bytes(file_bytes: bytes) -> str | None: |
| LOW | mineru/utils/guess_suffix_or_lang.py | 158 | def _guess_ooxml_suffix_by_path(file_path: Path) -> str | None: |
| LOW | mineru/utils/span_pre_proc.py | 192 | def _candidate_indices_for_block(self, block_bbox): |
| LOW | mineru/utils/office_rich_text.py | 76 | def has_non_visible_text_style(format_obj: Any) -> bool: |
| LOW | mineru/utils/office_rich_text.py | 86 | def normalize_format_for_text( |
| LOW | mineru/utils/office_rich_text.py | 172 | def is_valid_hyperlink_target(hyperlink: Any) -> bool: |
| LOW | mineru/utils/office_rich_text.py | 180 | def format_text_with_hyperlink( |
| LOW | mineru/utils/office_rich_text.py | 195 | def _format_hyperlink_segments(group: list[OfficeRichTextSegment]) -> str: |
| LOW | mineru/utils/office_rich_text.py | 300 | def build_rich_text_from_segments( |
| LOW | mineru/utils/office_rich_text.py | 350 | def build_text_mappings_from_elements( |
| LOW | mineru/utils/table_continuation.py | 23 | def is_table_continuation_text(text: str) -> bool: |
| LOW | mineru/utils/table_continuation.py | 38 | def _matches_continuation_end_marker(text: str, marker: str) -> bool: |
| LOW | mineru/utils/table_merge.py | 354 | def _serialize_table_state_html(state: TableMergeState) -> None: |
| 543 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | mineru/utils/model_utils.py | 148 | |
| LOW | mineru/utils/model_utils.py | 183 | |
| LOW | mineru/utils/model_utils.py | 217 | |
| LOW | mineru/utils/pdfium_guard.py | 42 | |
| LOW | mineru/utils/llm_aided.py | 160 | |
| LOW | mineru/utils/config_reader.py | 75 | |
| LOW | mineru/utils/magic_model_utils.py | 32 | |
| LOW | mineru/utils/guess_suffix_or_lang.py | 41 | |
| LOW | mineru/utils/guess_suffix_or_lang.py | 185 | |
| LOW | mineru/utils/span_pre_proc.py | 34 | |
| LOW | mineru/utils/span_pre_proc.py | 280 | |
| LOW | mineru/utils/office_rich_text.py | 300 | |
| LOW | mineru/utils/office_rich_text.py | 350 | |
| LOW | mineru/utils/engine_utils.py | 10 | |
| LOW | mineru/utils/table_merge.py | 78 | |
| LOW | mineru/utils/table_merge.py | 402 | |
| LOW | mineru/utils/table_merge.py | 483 | |
| LOW | mineru/utils/table_merge.py | 769 | |
| LOW | mineru/utils/table_merge.py | 868 | |
| LOW | mineru/utils/table_merge.py | 938 | |
| LOW | mineru/utils/pdf_classify.py | 249 | |
| LOW | mineru/utils/draw_bbox.py | 102 | |
| LOW | mineru/utils/draw_bbox.py | 146 | |
| LOW | mineru/utils/draw_bbox.py | 317 | |
| LOW | mineru/utils/draw_bbox.py | 325 | |
| LOW | mineru/utils/boxbase.py | 40 | |
| LOW | mineru/backend/pipeline/model_json_to_middle_json.py | 204 | |
| LOW | mineru/backend/pipeline/model_json_to_middle_json.py | 247 | |
| LOW | mineru/backend/pipeline/batch_analyze.py | 347 | |
| LOW | …eru/backend/pipeline/pipeline_middle_json_mkcontent.py | 18 | |
| LOW | …eru/backend/pipeline/pipeline_middle_json_mkcontent.py | 124 | |
| LOW | …eru/backend/pipeline/pipeline_middle_json_mkcontent.py | 365 | |
| LOW | …eru/backend/pipeline/pipeline_middle_json_mkcontent.py | 518 | |
| LOW | …eru/backend/pipeline/pipeline_middle_json_mkcontent.py | 550 | |
| LOW | …eru/backend/pipeline/pipeline_middle_json_mkcontent.py | 609 | |
| LOW | …eru/backend/pipeline/pipeline_middle_json_mkcontent.py | 745 | |
| LOW | …eru/backend/pipeline/pipeline_middle_json_mkcontent.py | 968 | |
| LOW | mineru/backend/pipeline/para_split.py | 60 | |
| LOW | mineru/backend/pipeline/para_split.py | 265 | |
| LOW | mineru/backend/pipeline/para_split.py | 309 | |
| LOW | mineru/backend/pipeline/para_split.py | 369 | |
| LOW | mineru/backend/pipeline/para_split.py | 415 | |
| LOW | mineru/backend/pipeline/pipeline_magic_model.py | 202 | |
| LOW | mineru/backend/pipeline/pipeline_magic_model.py | 225 | |
| LOW | mineru/backend/pipeline/model_init.py | 164 | |
| LOW | mineru/backend/pipeline/pipeline_analyze.py | 157 | |
| LOW | mineru/backend/pipeline/pipeline_analyze.py | 325 | |
| LOW | mineru/backend/office/model_output_to_middle_json.py | 11 | |
| LOW | mineru/backend/office/model_output_to_middle_json.py | 94 | |
| LOW | mineru/backend/office/model_output_to_middle_json.py | 126 | |
| LOW | mineru/backend/office/office_magic_model.py | 227 | |
| LOW | mineru/backend/office/office_magic_model.py | 393 | |
| LOW | mineru/backend/office/office_magic_model.py | 441 | |
| LOW | mineru/backend/office/office_magic_model.py | 668 | |
| LOW | mineru/backend/office/office_magic_model.py | 737 | |
| LOW | mineru/backend/office/office_magic_model.py | 14 | |
| LOW | mineru/backend/office/mkcontent/inline_renderer.py | 515 | |
| LOW | mineru/backend/office/mkcontent/inline_renderer.py | 889 | |
| LOW | mineru/backend/office/mkcontent/output_builders.py | 79 | |
| LOW | mineru/backend/office/mkcontent/output_builders.py | 104 | |
| 130 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | update_version.py | 15 | except Exception as e: |
| MEDIUM | tests/clean_coverage.py | 16 | print(f"Error deleting file '{path}': {e}") |
| MEDIUM | tests/clean_coverage.py | 22 | print(f"Error deleting directory '{path}': {e}") |
| LOW | tests/unittest/test_e2e.py | 148 | except Exception as e: |
| MEDIUM | tests/unittest/test_e2e.py | 144 | def validate_html(html_content): |
| LOW | mineru/utils/pdf_reader.py | 28 | except Exception as e: |
| LOW | mineru/utils/pdf_text_tool.py | 52 | except Exception: |
| LOW | mineru/utils/llm_aided.py | 212 | except Exception as e: |
| LOW | mineru/utils/config_reader.py | 88 | except Exception as e: |
| LOW | mineru/utils/config_reader.py | 92 | except Exception as e: |
| LOW | mineru/utils/config_reader.py | 96 | except Exception as e: |
| LOW | mineru/utils/config_reader.py | 100 | except Exception as e: |
| LOW | mineru/utils/config_reader.py | 104 | except Exception as e: |
| LOW | mineru/utils/guess_suffix_or_lang.py | 81 | except Exception: |
| LOW | mineru/utils/guess_suffix_or_lang.py | 199 | except Exception as e: |
| LOW | mineru/utils/span_pre_proc.py | 41 | except Exception as exc: |
| LOW | mineru/utils/pdf_image_tools.py | 311 | except Exception: |
| LOW | mineru/utils/pdf_image_tools.py | 320 | except Exception: |
| LOW | mineru/utils/pdf_image_tools.py | 332 | except Exception: |
| LOW | mineru/utils/pdf_image_tools.py | 340 | except Exception: |
| LOW | mineru/utils/pdf_classify.py | 153 | except Exception as e: |
| LOW | mineru/backend/pipeline/batch_analyze.py | 481 | except Exception as e: |
| LOW | mineru/backend/pipeline/batch_analyze.py | 493 | except Exception as e: |
| LOW | mineru/backend/pipeline/model_init.py | 328 | except Exception as e: |
| LOW | mineru/backend/pipeline/pipeline_analyze.py | 102 | except Exception: |
| LOW | mineru/backend/pipeline/pipeline_analyze.py | 341 | except Exception as e: |
| LOW | mineru/backend/utils/office_chart.py | 78 | except Exception: |
| LOW | mineru/backend/utils/office_chart.py | 299 | except Exception: |
| LOW | mineru/backend/utils/ocr_det_utils.py | 15 | except Exception as e: |
| MEDIUM | mineru/backend/utils/ocr_det_utils.py | 12 | def get_ch_lite_ocr_det_model(): |
| LOW | mineru/backend/utils/html_image_utils.py | 46 | except Exception as exc: |
| LOW | mineru/backend/utils/html_image_utils.py | 62 | except Exception: |
| LOW | mineru/backend/utils/html_image_utils.py | 87 | except Exception as exc: |
| LOW | mineru/backend/vlm/vlm_magic_model.py | 60 | except Exception as e: |
| LOW | mineru/backend/vlm/vlm_analyze.py | 320 | except Exception as exc: |
| LOW | mineru/backend/vlm/vlm_analyze.py | 419 | except Exception: |
| LOW | mineru/backend/vlm/utils.py | 108 | except Exception as e: |
| MEDIUM | mineru/backend/vlm/utils.py | 95 | def set_default_batch_size() -> int: |
| LOW | mineru/backend/hybrid/hybrid_analyze.py | 709 | except Exception: |
| LOW | mineru/backend/hybrid/hybrid_magic_model.py | 121 | except Exception as e: |
| LOW | mineru/cli/client.py | 350 | except Exception as exc: |
| LOW | mineru/cli/client.py | 386 | except Exception as exc: |
| LOW | mineru/cli/client.py | 409 | except Exception as exc: |
| LOW | mineru/cli/client.py | 750 | except Exception as exc: |
| LOW | mineru/cli/client.py | 847 | except Exception as exc: |
| MEDIUM | mineru/cli/client.py | 344 | def create_visualization_context() -> Optional[VisualizationContext]: |
| LOW | mineru/cli/models_download.py | 182 | except Exception as e: |
| LOW | mineru/cli/visualization.py | 52 | except Exception as exc: |
| LOW | mineru/cli/visualization.py | 76 | except Exception as exc: |
| MEDIUM | mineru/cli/fast_api.py | 106 | def is_main_multiprocessing_process() -> bool: |
| MEDIUM | mineru/cli/fast_api.py | 289 | def shutdown_runtime_resources() -> None: |
| MEDIUM | mineru/cli/fast_api.py | 1092 | def _dispatcher_loop(self) -> None: |
| MEDIUM | mineru/cli/fast_api.py | 1111 | def _cleanup_loop(self) -> None: |
| LOW | mineru/cli/fast_api.py | 109 | except Exception: |
| LOW | mineru/cli/fast_api.py | 121 | except Exception: |
| LOW | mineru/cli/fast_api.py | 292 | except Exception as exc: |
| LOW | mineru/cli/fast_api.py | 297 | except Exception as exc: |
| LOW | mineru/cli/fast_api.py | 272 | except Exception: |
| LOW | mineru/cli/fast_api.py | 357 | except Exception as e: |
| LOW | mineru/cli/fast_api.py | 611 | except Exception: |
| 80 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/get_coverage.py | 6 | |
| LOW | mineru/utils/engine_utils.py | 2 | |
| LOW | mineru/utils/engine_utils.py | 41 | |
| LOW | mineru/utils/engine_utils.py | 50 | |
| LOW | mineru/utils/engine_utils.py | 63 | |
| LOW | mineru/utils/engine_utils.py | 54 | |
| LOW | mineru/utils/cli_parser.py | 4 | |
| LOW | mineru/utils/title_level_postprocess.py | 2 | |
| LOW | mineru/backend/pipeline/para_split.py | 3 | |
| LOW | mineru/backend/office/office_middle_json_mkcontent.py | 2 | |
| LOW | mineru/backend/office/office_middle_json_mkcontent.py | 2 | |
| LOW | mineru/backend/office/office_middle_json_mkcontent.py | 2 | |
| LOW | mineru/backend/office/office_middle_json_mkcontent.py | 2 | |
| LOW | mineru/backend/office/office_middle_json_mkcontent.py | 2 | |
| LOW | mineru/backend/office/office_middle_json_mkcontent.py | 2 | |
| LOW | mineru/backend/office/office_middle_json_mkcontent.py | 10 | |
| LOW | mineru/backend/office/office_middle_json_mkcontent.py | 10 | |
| LOW | mineru/backend/office/office_middle_json_mkcontent.py | 10 | |
| LOW | mineru/backend/office/office_middle_json_mkcontent.py | 10 | |
| LOW | mineru/backend/office/office_middle_json_mkcontent.py | 10 | |
| LOW | mineru/backend/office/office_middle_json_mkcontent.py | 10 | |
| LOW | mineru/backend/office/office_middle_json_mkcontent.py | 10 | |
| LOW | mineru/backend/office/office_middle_json_mkcontent.py | 10 | |
| LOW | mineru/backend/office/office_magic_model.py | 7 | |
| LOW | mineru/backend/vlm/vlm_middle_json_mkcontent.py | 4 | |
| LOW | mineru/cli/vlm_server.py | 32 | |
| LOW | mineru/cli/vlm_server.py | 48 | |
| LOW | mineru/cli/vlm_server.py | 55 | |
| LOW | mineru/cli/vlm_server.py | 38 | |
| LOW | mineru/cli/client_side_output.py | 2 | |
| LOW | mineru/model/mfr/unimernet/unimernet_hf/__init__.py | 2 | |
| LOW | mineru/model/mfr/unimernet/unimernet_hf/__init__.py | 2 | |
| LOW | mineru/model/mfr/unimernet/unimernet_hf/__init__.py | 2 | |
| LOW | mineru/model/mfr/unimernet/unimernet_hf/__init__.py | 3 | |
| LOW | mineru/model/mfr/unimernet/unimernet_hf/__init__.py | 3 | |
| LOW | mineru/model/mfr/unimernet/unimernet_hf/__init__.py | 3 | |
| LOW | mineru/model/mfr/unimernet/unimernet_hf/__init__.py | 4 | |
| LOW | …model/mfr/unimernet/unimernet_hf/modeling_unimernet.py | 8 | |
| LOW | …el/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py | 2 | |
| LOW | …el/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py | 3 | |
| LOW | …el/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py | 3 | |
| LOW | …del/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py | 2 | |
| LOW | …del/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py | 3 | |
| LOW | …del/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py | 4 | |
| LOW | mineru/model/mfr/pp_formulanet_plus_m/processors.py | 11 | |
| LOW | …neru/model/utils/pytorchocr/modeling/necks/__init__.py | 19 | |
| LOW | …neru/model/utils/pytorchocr/modeling/necks/__init__.py | 19 | |
| LOW | …neru/model/utils/pytorchocr/modeling/necks/__init__.py | 19 | |
| LOW | …neru/model/utils/pytorchocr/modeling/necks/__init__.py | 20 | |
| LOW | …neru/model/utils/pytorchocr/modeling/heads/__init__.py | 20 | |
| LOW | …neru/model/utils/pytorchocr/modeling/heads/__init__.py | 20 | |
| LOW | …neru/model/utils/pytorchocr/modeling/heads/__init__.py | 23 | |
| LOW | …neru/model/utils/pytorchocr/modeling/heads/__init__.py | 24 | |
| LOW | …neru/model/utils/pytorchocr/modeling/heads/__init__.py | 25 | |
| LOW | …neru/model/utils/pytorchocr/modeling/heads/__init__.py | 28 | |
| LOW | …/utils/pytorchocr/modeling/heads/rec_unimernet_head.py | 4 | |
| LOW | …/utils/pytorchocr/modeling/heads/rec_unimernet_head.py | 5 | |
| LOW | …ils/pytorchocr/modeling/heads/rec_ppformulanet_head.py | 17 | |
| LOW | …ils/pytorchocr/modeling/heads/rec_ppformulanet_head.py | 18 | |
| LOW | …ils/pytorchocr/modeling/heads/rec_ppformulanet_head.py | 19 | |
| 72 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | …ernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py | 0 | drop paths (stochastic depth) per sample (when applied in main path of residual blocks). |
| HIGH | …del/utils/pytorchocr/modeling/backbones/rec_svtrnet.py | 0 | drop paths (stochastic depth) per sample (when applied in main path of residual blocks). |
| HIGH | …/utils/pytorchocr/modeling/backbones/rec_donut_swin.py | 0 | drop paths (stochastic depth) per sample (when applied in main path of residual blocks). |
| HIGH | mineru/data/data_reader_writer/filebase.py | 0 | read at offset and limit. args: path (str): the path of file, if the path is relative path, it will be joined with paren |
| HIGH | mineru/data/io/s3.py | 0 | read at offset and limit. args: path (str): the path of file, if the path is relative path, it will be joined with paren |
| HIGH | mineru/data/io/base.py | 0 | read at offset and limit. args: path (str): the path of file, if the path is relative path, it will be joined with paren |
| HIGH | mineru/data/data_reader_writer/filebase.py | 0 | write file with data. args: path (str): the path of file, if the path is relative path, it will be joined with parent_di |
| HIGH | mineru/data/io/http.py | 0 | write file with data. args: path (str): the path of file, if the path is relative path, it will be joined with parent_di |
| HIGH | mineru/data/io/s3.py | 0 | write file with data. args: path (str): the path of file, if the path is relative path, it will be joined with parent_di |
| HIGH | mineru/data/io/base.py | 0 | write file with data. args: path (str): the path of file, if the path is relative path, it will be joined with parent_di |
| HIGH | mineru/data/data_reader_writer/base.py | 0 | read the file. args: path (str): file path to read returns: bytes: the content of the file |
| HIGH | mineru/data/io/http.py | 0 | read the file. args: path (str): file path to read returns: bytes: the content of the file |
| HIGH | mineru/data/io/s3.py | 0 | read the file. args: path (str): file path to read returns: bytes: the content of the file |
| HIGH | mineru/data/io/base.py | 0 | read the file. args: path (str): file path to read returns: bytes: the content of the file |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | demo/demo.py | 201 | |
| LOW | demo/demo.py | 221 | # Available options: |
| LOW | mineru/model/ocr/seal_det_warp.py | 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. |
| LOW | mineru/model/ocr/seal_crop.py | 1 | # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. |
| LOW | …net/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py | 1 | # coding=utf-8 |
| LOW | …net/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py | 421 | if self.is_decoder: |
| LOW | …nimernet_hf/unimer_mbart/configuration_unimer_mbart.py | 1 | # coding=utf-8 |
| LOW | …ernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py | 1 | # coding=utf-8 |
| LOW | …/unimernet_hf/unimer_swin/configuration_unimer_swin.py | 1 | # coding=utf-8 |
| LOW | mineru/model/utils/tools/__init__.py | 1 | # Copyright (c) Opendatalab. All rights reserved. |
| LOW | …neru/model/utils/pytorchocr/modeling/necks/__init__.py | 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. |
| LOW | mineru/model/utils/pytorchocr/modeling/necks/rnn.py | 21 | # def forward(self, x): |
| LOW | …el/utils/pytorchocr/modeling/architectures/__init__.py | 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. |
| LOW | …neru/model/utils/pytorchocr/modeling/heads/__init__.py | 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. |
| LOW | …ils/pytorchocr/modeling/heads/rec_ppformulanet_head.py | 1 | # Copyright (c) Opendatalab. All rights reserved. |
| LOW | …/model/utils/pytorchocr/modeling/backbones/__init__.py | 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. |
| LOW | …del/utils/pytorchocr/modeling/backbones/rec_lcnetv3.py | 1 | # Copyright (c) Opendatalab. All rights reserved. |
| LOW | …/model/utils/pytorchocr/postprocess/rec_postprocess.py | 1 | # Copyright (c) Opendatalab. All rights reserved. |
| LOW | mineru/model/utils/pytorchocr/data/imaug/__init__.py | 1 | # Copyright (c) Opendatalab. All rights reserved. |
| LOW | mineru/model/utils/pytorchocr/data/imaug/operators.py | 1 | """ |
| LOW | mineru/model/table/rec/slanet_plus/matcher.py | 1 | # Copyright (c) Opendatalab. All rights reserved. |
| LOW | mineru/model/table/rec/slanet_plus/table_structure.py | 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. |
| LOW | …u/model/table/rec/slanet_plus/table_structure_utils.py | 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. |
| LOW | mineru/model/table/rec/slanet_plus/matcher_utils.py | 1 | # Copyright (c) Opendatalab. All rights reserved. |
| LOW | mineru/model/table/rec/slanet_plus/matcher_utils.py | 121 | if not has_span_in_head: |
| LOW | mineru/model/table/rec/unet_table/main.py | 161 | # sorted_polygons: np.ndarray, |
| LOW | .github/workflows/cla.yml | 41 | #custom-allsigned-prcomment: 'pull request comment when all contributors has signed, defaults to **CLA Assista |
| LOW | .github/workflows/cli.yml | 41 | # notify_to_feishu: |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | mineru.template.json | 18 | "api_key": "your_api_key", |
| HIGH | docs/zh/usage/quick_usage.md | 138 | "api_key": "your_api_key", |
| HIGH | docs/zh/usage/quick_usage.md | 148 | "api_key": "your_api_key", |
| HIGH | docs/en/usage/quick_usage.md | 138 | "api_key": "your_api_key", |
| HIGH | docs/en/usage/quick_usage.md | 148 | "api_key": "your_api_key", |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | mineru/utils/model_utils.py | 59 | # Create a white background array |
| MEDIUM | mineru/utils/model_utils.py | 69 | # Create a white background array |
| MEDIUM | mineru/utils/draw_bbox.py | 91 | rect = cal_canvas_rect(page, bbox) # Define the rectangle |
| MEDIUM | mineru/utils/draw_bbox.py | 110 | rect = cal_canvas_rect(page, bbox) # Define the rectangle |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | …ils/pytorchocr/modeling/heads/rec_ppformulanet_head.py | 971 | # 1. Check whether the user has defined `decoder_input_ids` manually. To facilitate in terms of input naming, |
| MEDIUM | …/model/utils/pytorchocr/postprocess/rec_postprocess.py | 112 | ): # grouping word with '-', such as 'state-of-the-art' |
| MEDIUM | mineru/model/docx/main.py | 19 | # provide a more robust command-line interface and resolve the demo |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | demo/demo.py | 209 | # Set this to an existing MinerU FastAPI base URL, for example: |
| LOW | mineru/utils/model_utils.py | 95 | # Check if intersection is valid |
| LOW | mineru/utils/model_utils.py | 112 | # Check if overlap exceeds threshold |
| LOW | mineru/model/xlsx/xlsx_converter.py | 389 | # Check if file exists in zip |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | mineru/model/mfr/utils.py | 312 | r'\\(?:lefteqn|boldmath|ensuremath|centering|textsubscript|sides|textsl|textcent|emph|protect|null)') |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | …l/utils/pytorchocr/modeling/backbones/rec_pphgnetv2.py | 546 | use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'. Args: layer_name_pa |