Get your documents ready for gen AI
1931 matches across 16 categories. Click a row to expand file-level details.
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_kserve_v2_binary.py | 14 | def test_bytes_tensor_binary_encoding_round_trip() -> None: |
| LOW | tests/test_kserve_v2_binary.py | 49 | def test_http_binary_request_serialization() -> None: |
| LOW | tests/test_kserve_v2_binary.py | 103 | def test_http_binary_response_decoding() -> None: |
| LOW | tests/test_backend_mets_gbs.py | 81 | def test_max_file_bytes_limit(test_doc_path): |
| LOW | tests/test_backend_mets_gbs.py | 95 | def test_max_total_bytes_limit(test_doc_path): |
| LOW | tests/test_backend_mets_gbs.py | 112 | def test_max_member_count_limit(test_doc_path): |
| LOW | tests/test_backend_mets_gbs.py | 126 | def test_limits_with_valid_values(test_doc_path): |
| LOW | tests/test_backend_mets_gbs.py | 153 | def test_total_bytes_tracking_across_pages(test_doc_path): |
| LOW | tests/test_run_pr_fast_checks.py | 46 | def test_collect_targets_limits_scope_to_supported_paths(tmp_path: Path) -> None: |
| LOW | tests/test_run_pr_fast_checks.py | 95 | def test_collect_targets_uses_smoke_target_for_tooling_only_changes( |
| LOW | tests/test_run_pr_fast_checks.py | 115 | def test_collect_targets_skips_unrelated_changes(tmp_path: Path) -> None: |
| LOW | tests/test_run_pr_fast_checks.py | 131 | def test_build_check_units_uses_ty_check(monkeypatch) -> None: |
| LOW | tests/test_run_pr_fast_checks.py | 153 | def test_git_helpers_accept_synthetic_merge_tree(tmp_path: Path) -> None: |
| LOW | tests/test_run_pr_fast_checks.py | 196 | def test_log_result_suppresses_success_output(capsys) -> None: |
| LOW | tests/test_run_pr_fast_checks.py | 214 | def test_log_result_prints_failure_output(capsys) -> None: |
| LOW | tests/test_run_pr_fast_checks.py | 232 | def test_significant_regression_requires_same_successful_target_set() -> None: |
| LOW | tests/test_rapid_ocr_lang.py | 34 | def test_rapidocr_uses_english_mobile_assets(monkeypatch, tmp_path: Path) -> None: |
| LOW | tests/test_rapid_ocr_lang.py | 58 | def test_rapidocr_defaults_to_chinese_mobile_assets( |
| LOW | tests/test_rapid_ocr_lang.py | 86 | def test_download_models_uses_language_specific_mobile_paths( |
| LOW | tests/test_rapid_ocr_lang.py | 91 | def fake_download_url_with_progress(url: str, *, progress: bool) -> BytesIO: |
| LOW | tests/test_rapid_ocr_lang.py | 116 | def test_model_downloader_fetches_both_rapidocr_language_sets( |
| LOW | tests/test_input_doc.py | 20 | def test_in_doc_from_valid_path(): |
| LOW | tests/test_input_doc.py | 27 | def test_in_doc_from_invalid_path(): |
| LOW | tests/test_input_doc.py | 35 | def test_in_doc_from_valid_buf(): |
| LOW | tests/test_input_doc.py | 43 | def test_in_doc_from_invalid_buf(): |
| LOW | tests/test_input_doc.py | 51 | def test_in_doc_with_page_range(): |
| LOW | tests/test_input_doc.py | 85 | def test_in_doc_with_backend_options(): |
| LOW | tests/test_input_doc.py | 305 | def _make_input_doc_from_stream(doc_stream): |
| LOW | tests/test_failed_pages.py | 35 | def test_normal_pages_all_present(normal_4pages_path): |
| LOW | tests/test_failed_pages.py | 169 | def test_failed_pages_have_size_info(skipped_1page_path): |
| LOW | tests/test_failed_pages.py | 197 | def test_errors_recorded_for_failed_pages(skipped_1page_path): |
| LOW | tests/test_layout_postprocessor.py | 25 | def test_sort_cells_uses_native_cell_index_order() -> None: |
| LOW | tests/test_picture_description_vlm_model.py | 86 | def test_legacy_picture_description_vlm_batches_generation() -> None: |
| LOW | tests/test_picture_description_vlm_model.py | 120 | def test_legacy_picture_description_vlm_skips_empty_batch() -> None: |
| LOW | tests/test_picture_description_vlm_model.py | 134 | def test_legacy_picture_description_vlm_init_uses_configured_padding_side( |
| LOW | tests/test_backend_patent_uspto.py | 438 | def test_patent_uspto_grant_aps(patents): |
| LOW | tests/test_table_structure_granite_vision.py | 83 | def test_parse_multiple_rowspan(): |
| LOW | tests/test_table_structure_granite_vision.py | 171 | def test_model_disabled_skips_pages(): |
| LOW | tests/test_table_structure_granite_vision.py | 200 | def test_model_invalid_backend_returns_empty_prediction(): |
| LOW | tests/test_table_structure_granite_vision.py | 247 | def test_parse_ecel_self_closing(): |
| LOW | tests/test_table_structure_granite_vision.py | 257 | def test_factory_registration(): |
| LOW | tests/test_options.py | 31 | def get_converters_with_table_options(): |
| LOW | tests/test_options.py | 176 | def test_ocr_coverage_threshold(test_doc_path): |
| LOW | tests/test_options.py | 221 | def test_pipeline_cache_after_initialize(test_doc_path): |
| LOW | tests/test_options.py | 257 | def test_pipeline_cache_with_chart_extraction(): |
| LOW | tests/test_page_assemble_model.py | 30 | def test_fi_ligature_no_space(self, model): |
| LOW | tests/test_page_assemble_model.py | 34 | def test_fl_ligature_no_space(self, model): |
| LOW | tests/test_page_assemble_model.py | 38 | def test_fi_ligature_with_spurious_space(self, model): |
| LOW | tests/test_page_assemble_model.py | 42 | def test_fl_ligature_with_spurious_space(self, model): |
| LOW | tests/test_page_assemble_model.py | 74 | def test_ligature_space_at_word_boundary_preserved(self, model): |
| LOW | tests/test_page_assemble_model.py | 78 | def test_multiple_ligatures_in_text(self, model): |
| LOW | tests/test_page_assemble_model.py | 83 | def test_ligature_with_spurious_space_in_multiline(self, model): |
| LOW | tests/test_page_assemble_model.py | 95 | def test_private_use_glyph_stripped(self, model): |
| LOW | tests/test_page_assemble_model.py | 99 | def test_private_use_glyph_with_spurious_space_stripped(self, model): |
| LOW | tests/test_page_assemble_model.py | 108 | def test_pua_glyph_at_string_start(self, model): |
| LOW | tests/test_page_assemble_model.py | 112 | def test_pua_glyph_at_string_end(self, model): |
| LOW | tests/test_page_assemble_model.py | 120 | def test_pua_glyph_preserves_word_boundary_space(self, model): |
| LOW | tests/test_page_assemble_model.py | 124 | def test_pua_glyph_no_space_merges(self, model): |
| LOW | tests/test_page_assemble_model.py | 128 | def test_ij_capital_standalone(self, model): |
| LOW | tests/test_page_assemble_model.py | 133 | def test_regex_matches_new_codepoints(self, model): |
| 562 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_run_pr_fast_checks.py | 1 | |
| LOW | tests/test_deepseekocr_vlm.py | 3 | |
| LOW | tests/test_deepseekocr_vlm.py | 5 | |
| LOW | tests/test_deepseekocr_vlm.py | 19 | |
| LOW | tests/test_deepseekocr_vlm.py | 19 | |
| LOW | tests/test_backend_webp.py | 6 | |
| LOW | tests/test_backend_webp.py | 9 | |
| LOW | tests/test_page_assemble_model.py | 13 | |
| LOW | tests/test_page_assemble_model.py | 135 | |
| LOW | tests/test_extraction.py | 12 | |
| LOW | tests/test_pytest_marker_selection.py | 1 | |
| LOW | tests/test_asr_mlx_whisper.py | 12 | |
| LOW | tests/test_asr_mlx_whisper.py | 12 | |
| LOW | tests/test_asr_mlx_whisper.py | 12 | |
| LOW | tests/test_api_kserve_v2_engine_scaffolding.py | 3 | |
| LOW | tests/test_conversion_result_json.py | 1 | |
| LOW | tests/test_conversion_result_json.py | 4 | |
| LOW | tests/test_conversion_result_json.py | 6 | |
| LOW | tests/test_conversion_result_json.py | 10 | |
| LOW | tests/test_kserve_v2_ocr_integration.py | 4 | |
| LOW | tests/test_check_needs_results.py | 1 | |
| LOW | tests/test_backend_asciidoc.py | 5 | |
| LOW | tests/test_backend_asciidoc.py | 5 | |
| LOW | tests/test_backend_image_native.py | 4 | |
| LOW | tests/test_threaded_pipeline.py | 1 | |
| LOW | tests/test_threaded_pipeline.py | 2 | |
| LOW | tests/test_e2e_ocr_conversion.py | 8 | |
| LOW | tests/test_vlm_pipeline_status.py | 22 | |
| LOW | tests/test_asr_pipeline.py | 2 | |
| LOW | tests/test_backend_docling_parse.py | 9 | |
| LOW | tests/test_granite_vision_extraction.py | 7 | |
| LOW | tests/test_latex/test_basic.py | 5 | |
| LOW | tests/test_latex/conftest.py | 1 | |
| LOW | tests/test_latex/conftest.py | 5 | |
| LOW | tests/test_latex/conftest.py | 5 | |
| LOW | tests/test_latex/conftest.py | 7 | |
| LOW | tests/test_latex/conftest.py | 8 | |
| LOW | tests/test_latex/conftest.py | 9 | |
| LOW | tests/test_latex/conftest.py | 10 | |
| LOW | tests/test_latex/conftest.py | 10 | |
| LOW | tests/test_latex/conftest.py | 10 | |
| LOW | tests/test_latex/conftest.py | 11 | |
| LOW | tests/test_latex/conftest.py | 14 | |
| LOW | tests/test_latex/conftest.py | 14 | |
| LOW | tests/test_latex/test_macros.py | 5 | |
| LOW | tests/test_latex/test_macros.py | 6 | |
| LOW | tests/test_latex/test_macros.py | 9 | |
| LOW | tests/test_latex/test_macros.py | 11 | |
| LOW | tests/test_latex/test_macros.py | 11 | |
| LOW | tests/test_latex/test_macros.py | 12 | |
| LOW | tests/test_latex/test_macros.py | 15 | |
| LOW | tests/test_latex/test_macros.py | 15 | |
| LOW | tests/test_latex/test_tables.py | 4 | |
| LOW | tests/test_latex/test_tables.py | 5 | |
| LOW | tests/test_latex/test_tables.py | 5 | |
| LOW | tests/test_latex/test_tables.py | 8 | |
| LOW | tests/test_latex/test_tables.py | 10 | |
| LOW | tests/test_latex/test_tables.py | 10 | |
| LOW | tests/test_latex/test_tables.py | 11 | |
| LOW | tests/test_latex/test_tables.py | 14 | |
| 351 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | pyproject.toml | 75 | # ============================================================================ |
| MEDIUM | pyproject.toml | 77 | # ============================================================================ |
| MEDIUM | pyproject.toml | 90 | # ============================================================================ |
| MEDIUM | pyproject.toml | 92 | # ============================================================================ |
| MEDIUM | pyproject.toml | 158 | # ============================================================================ |
| MEDIUM | pyproject.toml | 160 | # ============================================================================ |
| MEDIUM | pyproject.toml | 187 | # ============================================================================ |
| MEDIUM | pyproject.toml | 189 | # ============================================================================ |
| MEDIUM | pyproject.toml | 217 | # ============================================================================ |
| MEDIUM | pyproject.toml | 219 | # ============================================================================ |
| MEDIUM | pyproject.toml | 235 | # ============================================================================ |
| MEDIUM | pyproject.toml | 237 | # ============================================================================ |
| MEDIUM | tests/test_vlm_presets_and_runtime_options.py | 44 | # ============================================================================= |
| MEDIUM | tests/test_vlm_presets_and_runtime_options.py | 46 | # ============================================================================= |
| MEDIUM | tests/test_vlm_presets_and_runtime_options.py | 305 | # ============================================================================= |
| MEDIUM | tests/test_vlm_presets_and_runtime_options.py | 307 | # ============================================================================= |
| MEDIUM | tests/test_vlm_presets_and_runtime_options.py | 400 | # ============================================================================= |
| MEDIUM | tests/test_vlm_presets_and_runtime_options.py | 402 | # ============================================================================= |
| MEDIUM | tests/test_vlm_presets_and_runtime_options.py | 512 | # ============================================================================= |
| MEDIUM | tests/test_vlm_presets_and_runtime_options.py | 514 | # ============================================================================= |
| MEDIUM | tests/test_vlm_presets_and_runtime_options.py | 607 | # ============================================================================= |
| MEDIUM | tests/test_vlm_presets_and_runtime_options.py | 609 | # ============================================================================= |
| MEDIUM | tests/test_vlm_presets_and_runtime_options.py | 167 | # ============================================================================= |
| MEDIUM | tests/test_vlm_presets_and_runtime_options.py | 169 | # ============================================================================= |
| MEDIUM | docling/pipeline/standard_pdf_pipeline.py | 76 | # ────────────────────────────────────────────────────────────────────────────── |
| MEDIUM | docling/pipeline/standard_pdf_pipeline.py | 78 | # ────────────────────────────────────────────────────────────────────────────── |
| MEDIUM | docling/pipeline/standard_pdf_pipeline.py | 465 | # ────────────────────────────────────────────────────────────────────────────── |
| MEDIUM | docling/pipeline/standard_pdf_pipeline.py | 467 | # ────────────────────────────────────────────────────────────────────────────── |
| MEDIUM | docling/pipeline/standard_pdf_pipeline.py | 482 | # ──────────────────────────────────────────────────────────────────────── |
| MEDIUM | docling/pipeline/standard_pdf_pipeline.py | 484 | # ──────────────────────────────────────────────────────────────────────── |
| MEDIUM | docling/pipeline/standard_pdf_pipeline.py | 577 | # ──────────────────────────────────────────────────────────────────────── |
| MEDIUM | docling/pipeline/standard_pdf_pipeline.py | 579 | # ──────────────────────────────────────────────────────────────────────── |
| MEDIUM | docling/datamodel/pipeline_options.py | 950 | # ============================================================================= |
| MEDIUM | docling/datamodel/pipeline_options.py | 952 | # ============================================================================= |
| MEDIUM | docling/datamodel/pipeline_options.py | 988 | # ============================================================================= |
| MEDIUM | docling/datamodel/pipeline_options.py | 990 | # ============================================================================= |
| MEDIUM | docling/datamodel/stage_model_specs.py | 865 | # ============================================================================= |
| MEDIUM | docling/datamodel/stage_model_specs.py | 867 | # ============================================================================= |
| MEDIUM | docling/datamodel/stage_model_specs.py | 869 | # ----------------------------------------------------------------------------- |
| MEDIUM | docling/datamodel/stage_model_specs.py | 871 | # ----------------------------------------------------------------------------- |
| MEDIUM | docling/datamodel/stage_model_specs.py | 41 | # ============================================================================= |
| MEDIUM | docling/datamodel/stage_model_specs.py | 43 | # ============================================================================= |
| MEDIUM | docling/datamodel/stage_model_specs.py | 114 | # ============================================================================= |
| MEDIUM | docling/datamodel/stage_model_specs.py | 116 | # ============================================================================= |
| MEDIUM | docling/datamodel/stage_model_specs.py | 311 | # ============================================================================= |
| MEDIUM | docling/datamodel/stage_model_specs.py | 313 | # ============================================================================= |
| MEDIUM | docling/datamodel/stage_model_specs.py | 380 | # ============================================================================= |
| MEDIUM | docling/datamodel/stage_model_specs.py | 382 | # ============================================================================= |
| MEDIUM | docling/datamodel/stage_model_specs.py | 423 | # ============================================================================= |
| MEDIUM | docling/datamodel/stage_model_specs.py | 425 | # ============================================================================= |
| MEDIUM | docling/datamodel/stage_model_specs.py | 936 | # ----------------------------------------------------------------------------- |
| MEDIUM | docling/datamodel/stage_model_specs.py | 938 | # ----------------------------------------------------------------------------- |
| MEDIUM | docling/datamodel/stage_model_specs.py | 959 | # ----------------------------------------------------------------------------- |
| MEDIUM | docling/datamodel/stage_model_specs.py | 961 | # ----------------------------------------------------------------------------- |
| MEDIUM | docling/datamodel/stage_model_specs.py | 976 | # ----------------------------------------------------------------------------- |
| MEDIUM | docling/datamodel/stage_model_specs.py | 978 | # ----------------------------------------------------------------------------- |
| MEDIUM | docling/datamodel/stage_model_specs.py | 1365 | # ----------------------------------------------------------------------------- |
| MEDIUM | docling/datamodel/stage_model_specs.py | 1367 | # ----------------------------------------------------------------------------- |
| MEDIUM | docling/datamodel/stage_model_specs.py | 1461 | # ----------------------------------------------------------------------------- |
| MEDIUM | docling/datamodel/stage_model_specs.py | 1463 | # ----------------------------------------------------------------------------- |
| 28 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_backend_jats.py | 54 | "Jane Doe", |
| LOW | tests/test_backend_jats.py | 62 | "Jane Doe", |
| LOW | tests/test_backend_jats.py | 107 | assert "Jane Doe" in md |
| LOW | tests/test_latex/test_basic.py | 348 | assert "Jane Doe" in md |
| LOW | …/data/groundtruth/docling_v2/picture_classification.md | 3 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | …/data/groundtruth/docling_v2/picture_classification.md | 3 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | …/data/groundtruth/docling_v2/picture_classification.md | 9 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | …/data/groundtruth/docling_v2/picture_classification.md | 9 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | …/data/groundtruth/docling_v2/picture_classification.md | 11 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | …/data/groundtruth/docling_v2/picture_classification.md | 11 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | …/data/groundtruth/docling_v2/picture_classification.md | 17 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | …/data/groundtruth/docling_v2/picture_classification.md | 17 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 2 | <text><loc_109><loc_95><loc_390><loc_183>Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 2 | <text><loc_109><loc_95><loc_390><loc_183>Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 3 | <text><loc_109><loc_185><loc_390><loc_213>Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 3 | <text><loc_109><loc_185><loc_390><loc_213>Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 5 | <text><loc_109><loc_265><loc_390><loc_353>Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmo |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 5 | <text><loc_109><loc_265><loc_390><loc_353>Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmo |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 6 | <text><loc_109><loc_355><loc_390><loc_383>Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 6 | <text><loc_109><loc_355><loc_390><loc_383>Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 10 | <text><loc_112><loc_89><loc_401><loc_172>Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 10 | <text><loc_112><loc_89><loc_401><loc_172>Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 11 | <text><loc_112><loc_174><loc_401><loc_208>Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 11 | <text><loc_112><loc_174><loc_401><loc_208>Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 13 | <text><loc_112><loc_227><loc_401><loc_311>Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmo |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 13 | <text><loc_112><loc_227><loc_401><loc_311>Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmo |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 14 | <text><loc_112><loc_313><loc_401><loc_353>Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 14 | <text><loc_112><loc_313><loc_401><loc_353>Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 15 | <text><loc_112><loc_355><loc_401><loc_396>Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie |
| LOW | …ta/groundtruth/docling_v2/code_and_formula.doctags.txt | 15 | <text><loc_112><loc_355><loc_401><loc_396>Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie |
| LOW | …a/groundtruth/docling_v2/inline_and_formatting.md.yaml | 733 | orig: ': Lorem ipsum.' |
| LOW | …a/groundtruth/docling_v2/inline_and_formatting.md.yaml | 738 | text: ': Lorem ipsum.' |
| LOW | tests/data/groundtruth/docling_v2/lorem_ipsum.docx.md | 1 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin elit mi, fermentum vitae dolor facilisis, porttitor molli |
| LOW | tests/data/groundtruth/docling_v2/lorem_ipsum.docx.md | 1 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin elit mi, fermentum vitae dolor facilisis, porttitor molli |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 3 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 3 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 5 | Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nul |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 5 | Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nul |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 13 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 13 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 15 | Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nul |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 15 | Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nul |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 19 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 19 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 21 | Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nul |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 21 | Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nul |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 25 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 25 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magn |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 27 | Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nul |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 27 | Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nul |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 29 | Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nul |
| LOW | tests/data/groundtruth/docling_v2/code_and_formula.md | 29 | Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nul |
| LOW | …ata/groundtruth/docling_v2/picture_classification.json | 106 | "orig": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore |
| LOW | …ata/groundtruth/docling_v2/picture_classification.json | 106 | "orig": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore |
| LOW | …ata/groundtruth/docling_v2/picture_classification.json | 107 | "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore |
| LOW | …ata/groundtruth/docling_v2/picture_classification.json | 107 | "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore |
| LOW | …ata/groundtruth/docling_v2/picture_classification.json | 160 | "orig": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore |
| LOW | …ata/groundtruth/docling_v2/picture_classification.json | 160 | "orig": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore |
| LOW | …ata/groundtruth/docling_v2/picture_classification.json | 161 | "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore |
| LOW | …ata/groundtruth/docling_v2/picture_classification.json | 161 | "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore |
| 134 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | tests/test_glmocr_vlm.py | 0 | verify preset is registered with correct metadata and model spec. |
| HIGH | tests/test_falcon_ocr_vlm.py | 0 | verify preset is registered with correct metadata and model spec. |
| HIGH | tests/test_lightonocr_vlm.py | 0 | verify preset is registered with correct metadata and model spec. |
| HIGH | tests/test_nanonets_ocr_vlm.py | 0 | verify preset is registered with correct metadata and model spec. |
| HIGH | tests/test_glmocr_vlm.py | 0 | verify engine overrides propagate correctly through get_engine_config. |
| HIGH | tests/test_falcon_ocr_vlm.py | 0 | verify engine overrides propagate correctly through get_engine_config. |
| HIGH | tests/test_lightonocr_vlm.py | 0 | verify engine overrides propagate correctly through get_engine_config. |
| HIGH | tests/test_nanonets_ocr_vlm.py | 0 | verify engine overrides propagate correctly through get_engine_config. |
| HIGH | tests/test_glmocr_vlm.py | 0 | verify from_preset produces a usable vlmconvertoptions with engine options. |
| HIGH | tests/test_falcon_ocr_vlm.py | 0 | verify from_preset produces a usable vlmconvertoptions with engine options. |
| HIGH | tests/test_lightonocr_vlm.py | 0 | verify from_preset produces a usable vlmconvertoptions with engine options. |
| HIGH | tests/test_nanonets_ocr_vlm.py | 0 | verify from_preset produces a usable vlmconvertoptions with engine options. |
| HIGH | tests/test_glmocr_vlm.py | 0 | verify legacy inlinevlmoptions/apivlmoptions specs are consistent. |
| HIGH | tests/test_lightonocr_vlm.py | 0 | verify legacy inlinevlmoptions/apivlmoptions specs are consistent. |
| HIGH | tests/test_nanonets_ocr_vlm.py | 0 | verify legacy inlinevlmoptions/apivlmoptions specs are consistent. |
| HIGH | tests/test_glmocr_vlm.py | 0 | e2e test with vllm server (skipped in ci and when server is unavailable). |
| HIGH | tests/test_lightonocr_vlm.py | 0 | e2e test with vllm server (skipped in ci and when server is unavailable). |
| HIGH | tests/test_nanonets_ocr_vlm.py | 0 | e2e test with vllm server (skipped in ci and when server is unavailable). |
| HIGH | tests/test_latex/test_figures.py | 0 | \documentclass{article} \begin{document} \begin{tikzpicture} \draw (0,0) -- (1,1); \end{tikzpicture} \end{document} |
| HIGH | tests/test_latex/test_figures.py | 0 | \documentclass{article} \begin{document} \begin{tikzpicture} \draw (0,0) -- (1,1); \end{tikzpicture} \end{document} |
| HIGH | tests/test_latex/test_figures.py | 0 | \documentclass{article} \begin{document} \begin{tikzpicture} \draw (0,0) -- (1,1); \end{tikzpicture} \end{document} |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | represents an element of interest in the patent application document. |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | represents an element of interest in the patent application document. |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | represents an element of interest in the patent application document. |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | signal the start of an element. args: tag: the element tag. attributes: the element attributes. |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | signal the start of an element. args: tag: the element tag. attributes: the element attributes. |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | signal the start of an element. args: tag: the element tag. attributes: the element attributes. |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | receive notification of a skipped entity. html entities will be skipped by the parser. this method will unescape them an |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | receive notification of a skipped entity. html entities will be skipped by the parser. this method will unescape them an |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | receive notification of a skipped entity. html entities will be skipped by the parser. this method will unescape them an |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | signal the end of an element. args: tag: the element tag. |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | signal the end of an element. args: tag: the element tag. |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | signal the end of an element. args: tag: the element tag. |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | receive notification of character data. args: content: data reported by the handler. |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | receive notification of character data. args: content: data reported by the handler. |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | receive notification of character data. args: content: data reported by the handler. |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | apply an html style to text. args: text: a string containing plain text. style_tag: an html tag name for styling text. i |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | apply an html style to text. args: text: a string containing plain text. style_tag: an html tag name for styling text. i |
| HIGH | docling/backend/xml/uspto_backend.py | 0 | apply an html style to text. args: text: a string containing plain text. style_tag: an html tag name for styling text. i |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_backend_html.py | 138 | |
| LOW | tests/test_backend_html.py | 509 | |
| LOW | tests/test_backend_msword.py | 149 | |
| LOW | tests/test_backend_msword.py | 165 | |
| LOW | tests/test_latex/conftest.py | 21 | |
| LOW | docling/document_extractor.py | 190 | |
| LOW | docling/document_converter.py | 544 | |
| LOW | docling/pipeline/legacy_standard_pdf_pipeline.py | 156 | |
| LOW | docling/pipeline/base_pipeline.py | 106 | |
| LOW | docling/pipeline/base_pipeline.py | 239 | |
| LOW | docling/pipeline/standard_pdf_pipeline.py | 132 | |
| LOW | docling/pipeline/standard_pdf_pipeline.py | 153 | |
| LOW | docling/pipeline/standard_pdf_pipeline.py | 257 | |
| LOW | docling/pipeline/standard_pdf_pipeline.py | 360 | |
| LOW | docling/pipeline/standard_pdf_pipeline.py | 663 | |
| LOW | docling/pipeline/standard_pdf_pipeline.py | 854 | |
| LOW | docling/pipeline/standard_pdf_pipeline.py | 704 | |
| LOW | docling/pipeline/extraction_vlm_pipeline.py | 59 | |
| LOW | docling/pipeline/extraction_vlm_pipeline.py | 160 | |
| LOW | docling/pipeline/extraction_vlm_pipeline.py | 198 | |
| LOW | docling/pipeline/vlm_pipeline.py | 132 | |
| LOW | docling/pipeline/vlm_pipeline.py | 270 | |
| LOW | docling/pipeline/vlm_pipeline.py | 435 | |
| LOW | …/experimental/pipeline/threaded_layout_vlm_pipeline.py | 81 | |
| LOW | …/experimental/pipeline/threaded_layout_vlm_pipeline.py | 238 | |
| LOW | …/experimental/pipeline/threaded_layout_vlm_pipeline.py | 363 | |
| LOW | docling/utils/orientation.py | 9 | |
| LOW | docling/utils/accelerator_utils.py | 9 | |
| LOW | docling/utils/glm_utils.py | 21 | |
| LOW | docling/utils/glm_utils.py | 70 | |
| LOW | docling/utils/glm_utils.py | 332 | |
| LOW | docling/utils/api_image_request.py | 146 | |
| LOW | docling/utils/layout_postprocessor.py | 319 | |
| LOW | docling/utils/layout_postprocessor.py | 389 | |
| LOW | docling/utils/layout_postprocessor.py | 460 | |
| LOW | docling/utils/deepseekocr_utils.py | 27 | |
| LOW | docling/utils/deepseekocr_utils.py | 122 | |
| LOW | docling/utils/deepseekocr_utils.py | 177 | |
| LOW | docling/utils/deepseekocr_utils.py | 231 | |
| LOW | docling/backend/webvtt_backend.py | 100 | |
| LOW | docling/backend/webvtt_backend.py | 116 | |
| LOW | docling/backend/md_backend.py | 174 | |
| LOW | docling/backend/md_backend.py | 329 | |
| LOW | docling/backend/msword_backend.py | 256 | |
| LOW | docling/backend/msword_backend.py | 925 | |
| LOW | docling/backend/msword_backend.py | 995 | |
| LOW | docling/backend/msword_backend.py | 1124 | |
| LOW | docling/backend/msword_backend.py | 1225 | |
| LOW | docling/backend/msword_backend.py | 1407 | |
| LOW | docling/backend/msword_backend.py | 1606 | |
| LOW | docling/backend/msword_backend.py | 1837 | |
| LOW | docling/backend/msword_backend.py | 1972 | |
| LOW | docling/backend/msword_backend.py | 2081 | |
| LOW | docling/backend/msword_backend.py | 2153 | |
| LOW | docling/backend/msword_backend.py | 2196 | |
| LOW | docling/backend/msword_backend.py | 2344 | |
| LOW | docling/backend/msword_backend.py | 2435 | |
| LOW | docling/backend/csv_backend.py | 52 | |
| LOW | docling/backend/msexcel_backend.py | 302 | |
| LOW | docling/backend/msexcel_backend.py | 473 | |
| 149 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | .actor/actor.sh | 268 | except Exception as e: |
| LOW | tests/test_deepseekocr_vlm.py | 107 | except Exception: |
| LOW | tests/test_options.py | 93 | except Exception as ex: |
| LOW | tests/test_backend_msword.py | 89 | except Exception: |
| LOW | tests/test_glmocr_vlm.py | 128 | except Exception: |
| LOW | tests/test_falcon_ocr_vlm.py | 93 | except Exception: |
| LOW | tests/test_lightonocr_vlm.py | 135 | except Exception: |
| LOW | tests/test_nanonets_ocr_vlm.py | 144 | except Exception: |
| LOW | …ified-python/references/advanced/exception-handling.md | 49 | except Exception: |
| LOW | …ified-python/references/advanced/exception-handling.md | 158 | except Exception: |
| LOW | …ified-python/references/advanced/exception-handling.md | 167 | except Exception as e: |
| LOW | …ified-python/references/advanced/exception-handling.md | 178 | except Exception: |
| LOW | docling/pipeline/asr_pipeline.py | 104 | except Exception as e: |
| LOW | docling/pipeline/asr_pipeline.py | 259 | except Exception as exc: |
| LOW | docling/pipeline/asr_pipeline.py | 269 | except Exception as e: |
| LOW | docling/pipeline/asr_pipeline.py | 365 | except Exception as exc: |
| LOW | docling/pipeline/base_extraction_pipeline.py | 42 | except Exception as e: |
| LOW | docling/pipeline/base_pipeline.py | 83 | except Exception as e: |
| LOW | docling/pipeline/base_pipeline.py | 303 | except Exception as e: |
| MEDIUM | docling/pipeline/standard_pdf_pipeline.py | 242 | def _run(self) -> None: |
| MEDIUM | docling/pipeline/standard_pdf_pipeline.py | 704 | def _produce_pages() -> None: |
| LOW | docling/pipeline/standard_pdf_pipeline.py | 250 | except Exception: # pragma: no cover - top-level guard |
| LOW | docling/pipeline/standard_pdf_pipeline.py | 320 | except Exception as exc: |
| LOW | docling/pipeline/standard_pdf_pipeline.py | 440 | except Exception as exc: |
| LOW | docling/pipeline/standard_pdf_pipeline.py | 716 | except Exception: |
| LOW | docling/pipeline/standard_pdf_pipeline.py | 728 | except Exception as exc: |
| LOW | docling/pipeline/extraction_vlm_pipeline.py | 130 | except Exception as e: |
| LOW | docling/pipeline/extraction_vlm_pipeline.py | 137 | except Exception as e: |
| LOW | docling/pipeline/extraction_vlm_pipeline.py | 190 | except Exception as e: |
| LOW | docling/pipeline/extraction_vlm_pipeline.py | 193 | except Exception as e: |
| LOW | docling/pipeline/vlm_pipeline.py | 407 | except Exception as exc: |
| LOW | docling/utils/glm_utils.py | 32 | except Exception: |
| LOW | docling/utils/api_image_request.py | 89 | except Exception as e: |
| LOW | docling/utils/api_image_request.py | 139 | except Exception as e: |
| LOW | docling/utils/api_image_request.py | 239 | except Exception as e: |
| LOW | docling/utils/deepseekocr_utils.py | 117 | except Exception as e: |
| LOW | docling/backend/webvtt_backend.py | 73 | except Exception as e: |
| LOW | docling/backend/md_backend.py | 168 | except Exception as e: |
| LOW | docling/backend/msword_backend.py | 195 | except Exception as e: |
| LOW | docling/backend/msword_backend.py | 340 | except Exception: |
| LOW | docling/backend/msword_backend.py | 504 | except Exception as e: |
| LOW | docling/backend/msword_backend.py | 605 | except Exception as e: |
| LOW | docling/backend/msword_backend.py | 2149 | except Exception as e: |
| LOW | docling/backend/msword_backend.py | 2405 | except Exception as e: |
| LOW | docling/backend/msword_backend.py | 2483 | except Exception as e: |
| LOW | docling/backend/csv_backend.py | 30 | except Exception as e: |
| LOW | docling/backend/msexcel_backend.py | 162 | except Exception as e: |
| LOW | docling/backend/msexcel_backend.py | 686 | except Exception: |
| LOW | docling/backend/noop_backend.py | 38 | except Exception as e: |
| LOW | docling/backend/asciidoc_backend.py | 44 | except Exception as e: |
| LOW | docling/backend/mspowerpoint_backend.py | 62 | except Exception as e: |
| LOW | docling/backend/mspowerpoint_backend.py | 431 | except Exception: |
| LOW | docling/backend/mets_gbs_backend.py | 185 | except Exception: |
| LOW | docling/backend/mets_gbs_backend.py | 197 | except Exception: |
| LOW | docling/backend/html_backend.py | 455 | except Exception as e: |
| LOW | docling/backend/image_backend.py | 169 | except Exception as e: |
| LOW | docling/backend/docling_parse_backend.py | 318 | except Exception: |
| LOW | docling/backend/latex/backend.py | 102 | except Exception as e: |
| LOW | docling/backend/latex/backend.py | 125 | except Exception as e: |
| LOW | docling/backend/latex/backend.py | 147 | except Exception as e: |
| 94 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | .actor/actor.sh | 19 | # Create a temporary home directory with write permissions |
| MEDIUM | .actor/actor.sh | 183 | # Create a dedicated working directory in /tmp (writable) |
| MEDIUM | .actor/actor.sh | 288 | # Create the request JSON |
| MEDIUM | tests/test_deepseekocr_vlm.py | 42 | # Create a page with the DeepSeek OCR markdown as VLM response |
| MEDIUM | tests/test_backend_webp.py | 28 | # Define the directory you want to search |
| MEDIUM | tests/test_backend_msexcel.py | 23 | # Define the directory you want to search |
| MEDIUM | tests/test_backend_msexcel.py | 286 | # Create an InputDocument with the BytesIO stream |
| MEDIUM | tests/test_backend_html.py | 259 | # Define the directory you want to search |
| MEDIUM | tests/test_backend_msword.py | 34 | # Define the directory you want to search |
| MEDIUM | tests/test_backend_msword.py | 364 | # Create a backend instance using any existing docx file |
| MEDIUM | tests/test_e2e_conversion.py | 24 | # Define the directory you want to search |
| MEDIUM | tests/test_backend_csv.py | 18 | # Define the directory you want to search |
| MEDIUM | tests/test_e2e_ocr_conversion.py | 31 | # Define the directory you want to search |
| MEDIUM | tests/test_interfaces.py | 122 | # Create an InlineVlmOptions with an invalid enum by patching attribute directly |
| MEDIUM | tests/test_backend_markdown.py | 77 | # Define the directory you want to search |
| MEDIUM | tests/test_asr_pipeline.py | 115 | # Create an empty ConversionResult with proper InputDocument |
| MEDIUM | tests/test_asr_pipeline.py | 152 | # Create a proper NoOpBackend instance |
| MEDIUM | tests/test_asr_pipeline.py | 285 | # Create a real file so backend initializes |
| MEDIUM | tests/test_asr_pipeline.py | 459 | # Create a real file so backend initializes |
| MEDIUM | tests/test_asr_pipeline.py | 511 | # Create a real file so backend initializes |
| MEDIUM | tests/test_cli.py | 192 | # Create a dummy audio file for testing |
| MEDIUM | tests/test_cli.py | 212 | # Create a dummy audio file for testing |
| MEDIUM | tests/test_backend_pptx.py | 16 | # Define the directory you want to search |
| MEDIUM | tests/test_latex/test_figures.py | 78 | # Create a temporary directory and test image |
| MEDIUM | tests/test_latex/test_figures.py | 84 | # Create a simple test image with known DPI |
| MEDIUM | docling/pipeline/standard_pdf_pipeline.py | 522 | # Create a copy to avoid mutating pipeline_options in-place, |
| MEDIUM | docling/utils/visualization.py | 15 | # Create a smaller font for the labels |
| MEDIUM | docling/utils/deepseekocr_utils.py | 280 | # Create a new document |
| MEDIUM | docling/backend/msword_backend.py | 306 | # Create a paragraph-like element to process with standard handler |
| MEDIUM | docling/backend/msword_backend.py | 1063 | # Create a textbox group to contain all text from the textbox |
| MEDIUM | docling/backend/msword_backend.py | 1105 | # Create a unique identifier based on content and position |
| MEDIUM | docling/backend/msword_backend.py | 2110 | # Create a temporary document with just these elements |
| MEDIUM | docling/backend/msword_backend.py | 2408 | # Create a group for this comment in NOTES and add the comment there |
| MEDIUM | docling/backend/html_backend.py | 2195 | # Create the list container |
| MEDIUM | docling/models/base_ocr_model.py | 63 | ) # Create a 20x20 structure element (10 pixels in all directions) |
| MEDIUM | docling/models/stages/layout/layout_model.py | 130 | # Create a deep copy of the original image for both sides |
| MEDIUM | docling/models/stages/vlm_convert/vlm_convert_model.py | 73 | # Create the engine - pass model_spec, let factory handle config generation |
| MEDIUM | …cling/models/stages/chart_extraction/granite_vision.py | 230 | # Create a batch of conversations |
| MEDIUM | …ing/models/inference_engines/vlm/auto_inline_engine.py | 177 | # Create the actual engine |
| MEDIUM | docling/models/inference_engines/vlm/vllm_engine.py | 164 | # Create a temporary mixin instance for downloading |
| MEDIUM | docling/datamodel/pipeline_options.py | 1014 | # Define an enum for the backend options |
| MEDIUM | docling/datamodel/pipeline_options.py | 1076 | # Define an enum for the ocr engines |
| MEDIUM | docling/datamodel/base_models.py | 404 | # Create a type alias for score values |
| MEDIUM | docling/datamodel/asr_model_specs.py | 121 | # Create the model instance |
| MEDIUM | docling/datamodel/asr_model_specs.py | 69 | # Create the model instance |
| MEDIUM | docling/datamodel/asr_model_specs.py | 173 | # Create the model instance |
| MEDIUM | docling/datamodel/asr_model_specs.py | 225 | # Create the model instance |
| MEDIUM | docling/datamodel/asr_model_specs.py | 277 | # Create the model instance |
| MEDIUM | docling/datamodel/asr_model_specs.py | 329 | # Create the model instance |
| MEDIUM | docling/datamodel/service/options.py | 1 | # Define the input options for the API |
| MEDIUM | docs/examples/enrich_doclingdocument.py | 50 | # The following function is responsible for taking an item and applying the required pre-processing for the model. |
| MEDIUM | docs/examples/legacy/vlm_pipeline_api_model_legacy.py | 266 | # Create the DocumentConverter and launch the conversion. |
| MEDIUM | docs/examples/legacy/minimal_vlm_pipeline_legacy.py | 6 | # This file is kept to validate backward compatibility with the old API. |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | .actor/actor.sh | 208 | # Check if process is still running |
| LOW | tests/test_deepseekocr_vlm.py | 100 | # Check if ollama is available |
| LOW | tests/test_backend_msword.py | 141 | # Verify if a particular textbox content is extracted |
| LOW | tests/test_asr_pipeline.py | 57 | # Check if the test audio file exists |
| LOW | tests/test_latex/test_macros.py | 372 | # Check if macros were registered |
| LOW | docling/pipeline/vlm_pipeline.py | 87 | # Check if using new VlmConvertOptions |
| LOW | docling/utils/layout_postprocessor.py | 435 | # Check if areas are similar (within 20% of each other) |
| LOW | docling/utils/deepseekocr_utils.py | 354 | # Check if NEXT annotation is a caption for this table/figure/image |
| LOW | docling/backend/msword_backend.py | 676 | # Check if this is a heading style |
| LOW | docling/backend/msword_backend.py | 1246 | # Check if this paragraph contains a checkbox |
| LOW | docling/backend/msword_backend.py | 1264 | # Check if this is actually a numbered list by examining the numFmt |
| LOW | docling/backend/msword_backend.py | 2362 | # Check if document has any comments |
| LOW | docling/backend/noop_backend.py | 27 | # Check if stream has content |
| LOW | docling/backend/noop_backend.py | 33 | # Check if file exists |
| LOW | docling/backend/mspowerpoint_backend.py | 340 | # Check if it's definitely a list item |
| LOW | docling/backend/mspowerpoint_backend.py | 353 | # Check if it's definitely not a list item |
| LOW | docling/backend/mspowerpoint_backend.py | 458 | # Check if master has marker information |
| LOW | docling/backend/html_backend.py | 1487 | # Check if cell is in a column header or row header |
| LOW | docling/backend/xml/xbrl_backend.py | 91 | # Check if arelle is available before proceeding |
| LOW | docling/backend/xml/jats_backend.py | 676 | # Check if cell is in a column header or row header |
| LOW | docling/backend/docx/latex/omml.py | 648 | # Check if base is a known limit function |
| LOW | docling/backend/docx/latex/omml.py | 653 | # Check if base is a grouping function (underbrace, overbrace, etc.) |
| LOW | …/stages/page_preprocessing/page_preprocessing_model.py | 129 | ) # Check if text is mostly slash-number pattern |
| LOW | …ling/models/stages/reading_order/readingorder_model.py | 228 | # Check if table has no structure prediction |
| LOW | docling/models/stages/ocr/tesseract_ocr_model.py | 183 | # Check if the detected language is present in the system |
| LOW | docling/models/stages/ocr/tesseract_ocr_cli_model.py | 175 | # Check if the detected language has been installed |
| LOW | …models/stages/table_structure/table_structure_model.py | 228 | # Check if word-level cells are available from backend: |
| LOW | …cling/models/stages/chart_extraction/granite_vision.py | 168 | # Check if the value is numeric - non-numeric cells are row headers |
| LOW | …ing/models/inference_engines/vlm/auto_inline_engine.py | 96 | # Check if model has explicit MLX export |
| LOW | …/inference_engines/vlm/api_openai_compatible_engine.py | 168 | # Check if stopped by custom criteria |
| LOW | …ng/models/vlm_pipeline_models/hf_transformers_model.py | 291 | # Check if it's a GenerationStopper class |
| LOW | docling/datamodel/accelerator_options.py | 98 | # Check if to set the num_threads from the alternative envvar |
| LOW | docling/datamodel/asr_model_specs.py | 132 | # Check if MPS is available (Apple Silicon) |
| LOW | docling/datamodel/asr_model_specs.py | 140 | # Check if mlx-whisper is available |
| LOW | docling/datamodel/asr_model_specs.py | 28 | # Check if MPS is available (Apple Silicon) |
| LOW | docling/datamodel/asr_model_specs.py | 36 | # Check if mlx-whisper is available |
| LOW | docling/datamodel/asr_model_specs.py | 80 | # Check if MPS is available (Apple Silicon) |
| LOW | docling/datamodel/asr_model_specs.py | 88 | # Check if mlx-whisper is available |
| LOW | docling/datamodel/asr_model_specs.py | 184 | # Check if MPS is available (Apple Silicon) |
| LOW | docling/datamodel/asr_model_specs.py | 192 | # Check if mlx-whisper is available |
| LOW | docling/datamodel/asr_model_specs.py | 236 | # Check if MPS is available (Apple Silicon) |
| LOW | docling/datamodel/asr_model_specs.py | 244 | # Check if mlx-whisper is available |
| LOW | docling/datamodel/asr_model_specs.py | 288 | # Check if MPS is available (Apple Silicon) |
| LOW | docling/datamodel/asr_model_specs.py | 296 | # Check if mlx-whisper is available |
| LOW | docling/datamodel/service/options.py | 921 | # Check if using legacy fields with new fields |
| LOW | docling/datamodel/service/options.py | 952 | # Check if using legacy fields with new fields |
| LOW | docs/examples/minimal_asr_pipeline.py | 66 | # Check if the test audio file exists |
| LOW | docs/examples/chart_extraction.py | 80 | # Check if the picture was classified as a chart. |
| LOW | docs/examples/chart_extraction.py | 86 | # Check if chart data was extracted. |
| LOW | docs/examples/pictures_description_api.py | 101 | # Check if running in CI environment |
| LOW | docs/examples/pictures_description_api.py | 111 | # Check if credentials are available |
| LOW | docs/examples/picture_description_inline.py | 45 | # Check if running in CI |
| LOW | docs/examples/asr_pipeline_performance_comparison.py | 156 | # Check if we're on Apple Silicon |
| LOW | docs/examples/post_process_ocr_with_vlm.py | 674 | # Read file names (strip whitespace, ignore empty lines) |
| LOW | docs/examples/vlm_pipeline_api_model.py | 54 | # Check if model is already loaded |
| LOW | docs/examples/vlm_pipeline_api_model.py | 102 | # Check if model exists |
| LOW | docs/examples/vlm_pipeline_api_model.py | 168 | # Check if LM Studio is running |
| LOW | docs/examples/vlm_pipeline_api_model.py | 238 | # Check if Ollama is running |
| LOW | docs/examples/vlm_pipeline_api_model.py | 300 | # Check if VLLM is running |
| LOW | docs/examples/vlm_pipeline_api_model.py | 358 | # Check if running in CI environment |
| 2 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | docling/document_converter.py | 343 | Convert one document fetched from a file path, URL, or DocumentStream. Note: If the document content is given a |
| HIGH | docling/document_converter.py | 407 | Convert multiple documents from file paths, URLs, or DocumentStreams. Args: source: Source of input |
| HIGH | docling/document_converter.py | 484 | Convert a document given as a string using the specified format. Only Markdown (`InputFormat.MD`) and HTML (`In |
| HIGH | …g/models/stages/code_formula/code_formula_vlm_model.py | 136 | Construct the prompt for the model based on the element type. Args: label: The type of input, eithe |
| HIGH | docling/models/stages/vlm_convert/vlm_convert_model.py | 194 | Process raw images without page metadata. This method provides a simpler interface for processing images direct |
| HIGH | docling/models/inference_engines/vlm/factory.py | 35 | Create a VLM inference engine from options. Args: options: Engine configuration options model_spec: |
| HIGH | docling/models/inference_engines/vlm/_utils.py | 21 | Convert any image format to RGB PIL Image. Args: image: Input image as PIL Image or numpy array Return |
| HIGH | docling/models/inference_engines/vlm/_utils.py | 99 | Resolve the path to model artifacts, downloading if needed. This standardizes the logic for finding or downloading |
| HIGH | …ling/models/inference_engines/common/kserve_v2_http.py | 225 | Execute HTTP request with consistent error handling. Args: url: Target URL method: HTTP |
| HIGH | …ling/models/inference_engines/common/kserve_v2_http.py | 322 | Execute inference request against KServe v2 endpoint. Args: inputs: Mapping of input tensor names t |
| HIGH | …ling/models/extraction/nuextract_transformers_model.py | 28 | Process vision information from both messages and in-context examples, supporting batch processing. Args: |
| HIGH | docling/datamodel/pipeline_options.py | 1046 | Normalize deprecated backend enum values to current ones. Args: backend: The PDF backend enum value to norm |
| HIGH | docling/datamodel/stage_model_specs.py | 252 | Check if this model has an explicit export for the given engine. An explicit export means either: 1. Th |
| HIGH | docling/datamodel/stage_model_specs.py | 518 | Get a specific preset. Args: preset_id: The preset identifier Returns: The req |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/verify_utils.py | 141 | # for l, true_item in enumerate(doc_true.main_text): |
| LOW | …ata/groundtruth/docling_v2/powerpoint_bad_text.pptx.md | 1 | # X-Library The fully customisable and copyright-free standard content template collection exclusively for our customers |
| LOW | tests/data/groundtruth/docling_v2/unit_test_01.html.md | 1 | # Title |
| LOW | tests/data/groundtruth/docling_v2/hyperlink_02.html.md | 1 | ## [Home](/home.html) |
| LOW | tests/data/uspto/pftaps057006474.txt | 861 | ##STR83## |
| LOW | docling/service_client/watchers.py | 241 | # |
| LOW | docs/examples/granitedocling_repetition_stopping.py | 1 | # %% [markdown] |
| LOW | docs/examples/granitedocling_repetition_stopping.py | 81 | # model=vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.repo_id, |
| LOW | docs/examples/granitedocling_repetition_stopping.py | 101 | # converter = DocumentConverter( |
| LOW | docs/examples/export_multimodal.py | 1 | # %% [markdown] |
| LOW | docs/examples/export_multimodal.py | 121 | f"Document converted and multimodal pages generated in {end_time:.2f} seconds." |
| LOW | docs/examples/inspect_picture_content.py | 1 | # %% [markdown] |
| LOW | docs/examples/compare_vlm_models.py | 1 | # %% [markdown] |
| LOW | docs/examples/translate.py | 1 | # %% [markdown] |
| LOW | docs/examples/export_figures.py | 1 | # %% [markdown] |
| LOW | docs/examples/enrich_doclingdocument.py | 1 | # %% [markdown] |
| LOW | docs/examples/gpu_vlm_pipeline.py | 1 | # %% [markdown] |
| LOW | docs/examples/model_family_engines_example.py | 1 | # %% [markdown] |
| LOW | docs/examples/run_with_formats.py | 1 | # %% [markdown] |
| LOW | docs/examples/minimal_asr_pipeline.py | 1 | # %% [markdown] |
| LOW | docs/examples/gpu_standard_pipeline.py | 1 | # %% [markdown] |
| LOW | docs/examples/chart_extraction.py | 1 | # %% [markdown] |
| LOW | docs/examples/pii_obfuscate.py | 1 | # %% [markdown] |
| LOW | docs/examples/pictures_description_api.py | 1 | # %% [markdown] |
| LOW | docs/examples/pictures_description_api.py | 181 | # Run watsonx.ai example (skips if in CI or credentials not found) |
| LOW | docs/examples/pictures_description_api.py | 201 | # ### Custom API Configuration |
| LOW | docs/examples/tesseract_lang_detection.py | 1 | # %% [markdown] |
| LOW | docs/examples/picture_description_inline.py | 1 | # %% [markdown] |
| LOW | docs/examples/picture_description_inline.py | 161 | # |
| LOW | docs/examples/minimal_vlm_pipeline.py | 1 | # %% [markdown] |
| LOW | docs/examples/custom_convert.py | 1 | # %% [markdown] |
| LOW | docs/examples/custom_convert.py | 21 | # - If you uncomment a backend or OCR option that is not imported above, also |
| LOW | docs/examples/custom_convert.py | 61 | # The sections below demo combinations of PdfPipelineOptions and backends. |
| LOW | docs/examples/custom_convert.py | 81 | # pipeline_options = PdfPipelineOptions() |
| LOW | docs/examples/custom_convert.py | 121 | |
| LOW | docs/examples/custom_convert.py | 141 | |
| LOW | docs/examples/custom_convert.py | 161 | # pipeline_options.table_structure_options = TableStructureOptions(do_cell_matching=True) |
| LOW | docs/examples/minimal.py | 1 | # %% [markdown] |
| LOW | docs/examples/suryaocr_with_custom_models.py | 1 | # Example: Integrating SuryaOCR with Docling for PDF OCR and Markdown Export |
| LOW | docs/examples/rapidocr_with_custom_models.py | 1 | # %% [markdown] |
| LOW | docs/examples/granite_vision_table_structure.py | 1 | # %% [markdown] |
| LOW | docs/examples/develop_formula_understanding.py | 1 | # %% [markdown] |
| LOW | docs/examples/export_tables.py | 1 | # %% [markdown] |
| LOW | docs/examples/parquet_images.py | 1 | # %% [markdown] |
| LOW | docs/examples/develop_picture_enrichment.py | 1 | # %% [markdown] |
| LOW | docs/examples/vlm_pipeline_api_model.py | 1 | # %% [markdown] |
| LOW | docs/examples/vlm_pipeline_api_model.py | 481 | # |
| LOW | docs/examples/vlm_pipeline_api_model.py | 501 | # |
| LOW | docs/examples/batch_convert.py | 1 | # %% [markdown] |
| LOW | docs/examples/run_with_accelerator.py | 1 | # %% [markdown] |
| LOW | docs/examples/run_with_accelerator.py | 41 | num_threads=8, device=AcceleratorDevice.CPU |
| LOW | docs/examples/full_page_ocr.py | 1 | # %% [markdown] |
| LOW | docs/examples/legacy/vlm_pipeline_api_model_legacy.py | 1 | # %% [markdown] |
| LOW | docs/examples/legacy/vlm_pipeline_api_model_legacy.py | 221 | enable_remote_services=True # required when calling remote VLM endpoints |
| LOW | docs/examples/legacy/vlm_pipeline_api_model_legacy.py | 241 | format=ResponseFormat.DOCTAGS, |
| LOW | docs/examples/legacy/pictures_description_api_legacy.py | 1 | # %% [markdown] |
| LOW | docs/examples/legacy/pictures_description_api_legacy.py | 141 | |
| LOW | …s/examples/legacy/picture_description_inline_legacy.py | 1 | # %% [markdown] |
| LOW | docs/examples/legacy/minimal_vlm_pipeline_legacy.py | 1 | # %% [markdown] |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | docling/backend/html_backend.py | 709 | const width = rect.width || 0; |
| HIGH | docling/backend/html_backend.py | 710 | const height = rect.height || 0; |
| HIGH | docling/backend/html_backend.py | 711 | if (width <= 0 && height <= 0) { |
| HIGH | docling/backend/html_backend.py | 718 | let textLeft = null; |
| HIGH | docling/backend/html_backend.py | 719 | let textTop = null; |
| HIGH | docling/backend/html_backend.py | 720 | let textRight = null; |
| HIGH | docling/backend/html_backend.py | 721 | let textBottom = null; |
| HIGH | docling/backend/html_backend.py | 734 | const tWidth = tRect.width || 0; |
| HIGH | docling/backend/html_backend.py | 735 | const tHeight = tRect.height || 0; |
| HIGH | docling/backend/html_backend.py | 736 | if (tWidth <= 0 && tHeight <= 0) { |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/data/uspto/ipg07997973.xml | 3813 | <p id="p-0056" num="0055">An enumerated list of items (which may or may not be numbered) does not imply that any or all |
| MEDIUM | tests/data/uspto/ipg07997973.xml | 3988 | <li id="ul0012-0008" num="0196"> The various methods of disguising a game described herein may pr |
| MEDIUM | tests/data/uspto/ipg07997973.xml | 4017 | <li id="ul0002-0035" num="0215"> In various embodiments, the data about the games of a primary player may |
| MEDIUM | tests/data/uspto/ipg07997973.xml | 4247 | <li id="ul0028-0018" num="0367"> Any physical game described herein may be implemented electronically in |
| MEDIUM | tests/data/jats/elife-56337.xml | 3 | <article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" article-type="researc |
| MEDIUM | tests/data/xbrl/grve_10q_htm.xml | 2459 | <us-gaap:SignificantAccountingPoliciesTextBlock contextRef="From2025-04-01to2025-12-31" id="fid_281"><p style="FO |
| MEDIUM | tests/data/xbrl/grve_10q_htm.xml | 2459 | <us-gaap:SignificantAccountingPoliciesTextBlock contextRef="From2025-04-01to2025-12-31" id="fid_281"><p style="FO |
| MEDIUM | tests/data/xbrl/grve_10q_htm.xml | 2460 | <us-gaap:BasisOfAccountingPolicyPolicyTextBlock contextRef="From2025-04-01to2025-12-31" id="fid_289"><p style="FO |
| MEDIUM | tests/data/xbrl/grve_10q_htm.xml | 2460 | <us-gaap:BasisOfAccountingPolicyPolicyTextBlock contextRef="From2025-04-01to2025-12-31" id="fid_289"><p style="FO |
| LOW | docling/backend/html_backend.py | 4234 | # Do not fetch the image, just add a placeholder |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | docling/datamodel/backend_options.py | 95 | examples=[{"Authorization": "Bearer TOKEN"}, {"X-API-Key": "your-api-key"}], |
| HIGH | docs/examples/granitedocling_repetition_stopping.py | 87 | # # "Authorization": "Bearer YOUR_API_KEY", # if needed |
| HIGH | docs/examples/service_client/README.md | 13 | export DOCLING_SERVICE_API_KEY="your-api-key" # optional |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | docs/examples/picture_description_inline.py | 7 | # - Enriches documents with AI-generated image captions |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | …roundtruth/docling_v2/arXiv-2501.01300v2_main.tex.json | 972 | "orig": "[fig:enhan] are obtained by multiplying continuum extrapolated $\\chi^C_4$ values to ratios $P^C_B/P_C$ a |
| MEDIUM | …roundtruth/docling_v2/arXiv-2501.01300v2_main.tex.json | 973 | "text": "[fig:enhan] are obtained by multiplying continuum extrapolated $\\chi^C_4$ values to ratios $P^C_B/P_C$ a |