OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched
650 matches across 13 categories. Click a row to expand file-level details.
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_multi_font_manager.py | 65 | def test_missing_font_directory(): |
| LOW | tests/test_multi_font_manager.py | 75 | def test_select_font_for_arabic_language(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 83 | def test_select_font_for_persian_language(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 91 | def test_select_font_for_urdu_language(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 111 | def test_select_font_for_hindi_language(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 119 | def test_select_font_for_sanskrit_language(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 127 | def test_select_font_for_marathi_language(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 135 | def test_select_font_for_nepali_language(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 147 | def test_select_font_for_chinese_language(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 155 | def test_select_font_for_chinese_generic(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 163 | def test_select_font_for_chinese_simplified(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 171 | def test_select_font_for_chinese_traditional(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 179 | def test_select_font_for_japanese_language(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 187 | def test_select_font_for_korean_language(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 198 | def test_select_font_for_english_text(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 204 | def test_select_font_without_language_hint(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 213 | def test_select_font_arabic_text_without_language_hint(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 222 | def test_devanagari_text_without_language_hint(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 230 | def test_cjk_text_without_language_hint(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 238 | def test_fallback_to_occulta_font(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 246 | def test_fallback_fonts_constant(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 261 | def test_has_all_glyphs_for_english(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 267 | def test_has_all_glyphs_for_arabic(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 274 | def test_has_all_glyphs_for_devanagari(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 281 | def test_has_all_glyphs_for_cjk(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 288 | def test_empty_text_has_all_glyphs(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 293 | def test_has_all_glyphs_missing_font(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 301 | def test_font_selection_caching(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 53 | def test_init_loads_builtin_fonts(multi_font_manager): |
| LOW | tests/test_multi_font_manager.py | 315 | def test_language_font_map_coverage(): |
| LOW | tests/test_multi_font_manager.py | 362 | def test_custom_font_provider(font_dir): |
| LOW | tests/test_multi_font_manager.py | 378 | def test_missing_font_uses_fallback(font_dir): |
| LOW | tests/test_multi_font_manager.py | 393 | def test_builtin_font_provider_loads_expected_fonts(font_dir): |
| LOW | tests/test_multi_font_manager.py | 406 | def test_builtin_font_provider_get_font(font_dir): |
| LOW | tests/test_multi_font_manager.py | 418 | def test_builtin_font_provider_get_fallback(font_dir): |
| LOW | tests/test_multi_font_manager.py | 427 | def test_builtin_font_provider_missing_font_logs_warning(tmp_path, font_dir, caplog): |
| LOW | tests/test_multi_font_manager.py | 443 | def test_builtin_font_provider_missing_occulta_raises(tmp_path): |
| LOW | tests/test_ocr_engine_selection.py | 18 | def test_ocr_engine_option_exists(self): |
| LOW | tests/test_ocr_engine_selection.py | 30 | def test_ocr_engine_accepts_tesseract(self): |
| LOW | tests/test_ocr_engine_selection.py | 39 | def test_ocr_engine_accepts_auto(self): |
| LOW | tests/test_ocr_engine_selection.py | 48 | def test_ocr_engine_accepts_none(self): |
| LOW | tests/test_ocr_engine_selection.py | 57 | def test_ocr_engine_default_is_auto(self): |
| LOW | tests/test_ocr_engine_selection.py | 66 | def test_ocr_engine_rejects_invalid(self): |
| LOW | tests/test_ocr_engine_selection.py | 79 | def test_ocr_options_has_ocr_engine_field(self): |
| LOW | tests/test_ocr_engine_selection.py | 90 | def test_tesseract_selected_when_auto(self): |
| LOW | tests/test_ocr_engine_selection.py | 103 | def test_tesseract_selected_when_tesseract(self): |
| LOW | tests/test_ocr_engine_selection.py | 116 | def test_null_selected_when_none(self): |
| LOW | tests/test_ocr_engine_selection.py | 129 | def test_null_returns_none_when_auto(self): |
| LOW | tests/test_pipeline_generate_ocr.py | 22 | def test_ocr_engine_direct_function_exists(self): |
| LOW | tests/test_pipeline_generate_ocr.py | 28 | def test_ocr_engine_direct_returns_tuple(self, tmp_path): |
| LOW | tests/test_pipeline_generate_ocr.py | 54 | def test_page_result_has_ocr_tree_field(self): |
| LOW | tests/test_pipeline_generate_ocr.py | 61 | def test_page_result_ocr_tree_default_none(self): |
| LOW | tests/test_pipeline_generate_ocr.py | 89 | def test_hocr_result_has_ocr_tree_field(self): |
| LOW | tests/test_pipeline_generate_ocr.py | 96 | def test_hocr_result_ocr_tree_default_none(self): |
| LOW | tests/test_system_font_provider.py | 38 | def test_get_platform_windows(self): |
| LOW | tests/test_system_font_provider.py | 44 | def test_get_platform_freebsd(self): |
| LOW | tests/test_system_font_provider.py | 72 | def test_windows_font_dirs_with_windir(self): |
| LOW | tests/test_system_font_provider.py | 86 | def test_windows_font_dirs_default(self): |
| LOW | tests/test_system_font_provider.py | 99 | def test_windows_font_dirs_with_localappdata(self): |
| LOW | tests/test_system_font_provider.py | 138 | def test_get_font_unknown_name_returns_none(self): |
| 243 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | misc/synology.py | 7 | |
| LOW | misc/pdf_text_diff.py | 6 | |
| LOW | misc/batch.py | 15 | |
| LOW | misc/example_plugin.py | 22 | |
| LOW | misc/webservice.py | 7 | |
| LOW | misc/webservice.py | 13 | |
| LOW | misc/pdf_compare.py | 6 | |
| LOW | misc/watcher.py | 8 | |
| LOW | misc/bisect_pdf.py | 6 | |
| LOW | misc/_webservice.py | 13 | |
| LOW | misc/ocrmypdf_compare.py | 6 | |
| LOW | bin/bump_version.py | 7 | |
| LOW | tests/test_multi_font_manager.py | 6 | |
| LOW | tests/test_ocr_engine_selection.py | 10 | |
| LOW | tests/test_pipeline_generate_ocr.py | 10 | |
| LOW | tests/test_system_font_provider.py | 6 | |
| LOW | tests/test_concurrency.py | 4 | |
| LOW | tests/conftest.py | 4 | |
| LOW | tests/test_rasterizer.py | 6 | |
| LOW | tests/test_rasterizer.py | 23 | |
| LOW | tests/test_validation.py | 4 | |
| LOW | tests/test_logging.py | 4 | |
| LOW | tests/test_hocr_parser.py | 6 | |
| LOW | tests/test_check_pdf.py | 4 | |
| LOW | tests/test_null_ocr_engine.py | 10 | |
| LOW | tests/test_tagged.py | 4 | |
| LOW | tests/test_optimize.py | 4 | |
| LOW | tests/test_stdio.py | 4 | |
| LOW | tests/test_image_input.py | 4 | |
| LOW | tests/test_metadata.py | 4 | |
| LOW | tests/__init__.py | 6 | |
| LOW | tests/test_pdf_renderer.py | 6 | |
| LOW | tests/test_semfree.py | 4 | |
| LOW | tests/test_preprocessing.py | 4 | |
| LOW | tests/test_rotation.py | 4 | |
| LOW | tests/test_ocr_element.py | 6 | |
| LOW | tests/test_page_boxes.py | 4 | |
| LOW | tests/test_ocr_engine_interface.py | 10 | |
| LOW | tests/test_pdfinfo.py | 4 | |
| LOW | tests/test_json_serialization.py | 2 | |
| LOW | tests/test_acroform.py | 4 | |
| LOW | tests/test_hocrtransform.py | 4 | |
| LOW | tests/test_userunit.py | 4 | |
| LOW | tests/test_ghostscript.py | 4 | |
| LOW | tests/test_multilingual_direct.py | 13 | |
| LOW | tests/test_unpaper.py | 4 | |
| LOW | tests/test_annots.py | 4 | |
| LOW | tests/test_tesseract.py | 4 | |
| LOW | tests/test_imageops.py | 4 | |
| LOW | tests/test_page_numbers.py | 4 | |
| LOW | tests/test_helpers.py | 4 | |
| LOW | tests/test_verapdf.py | 6 | |
| LOW | tests/test_api.py | 4 | |
| LOW | tests/test_graft.py | 4 | |
| LOW | tests/test_pdfa.py | 4 | |
| LOW | tests/test_watcher.py | 1 | |
| LOW | tests/test_main.py | 4 | |
| LOW | tests/test_fpdf_renderer.py | 6 | |
| LOW | tests/test_soft_error.py | 4 | |
| LOW | tests/test_completion.py | 4 | |
| 153 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/test_rasterizer.py | 252 | # Create an image with gradients to detect rasterization errors |
| MEDIUM | tests/test_rasterizer.py | 286 | # Create an image with gradients to detect rasterization errors |
| MEDIUM | tests/test_ocr_engine_interface.py | 35 | # Create a minimal concrete implementation |
| MEDIUM | tests/test_pdfinfo.py | 142 | # Create an RGB image and save as JPEG |
| MEDIUM | tests/test_pdfinfo.py | 151 | # Create a PDF with the flate+jpeg image |
| MEDIUM | tests/test_ghostscript.py | 418 | # Create an invalid image object that has both ColorSpace and ImageMask set |
| MEDIUM | tests/test_annots.py | 19 | # Create a broken named destination |
| MEDIUM | tests/test_annots.py | 21 | # Create a valid named destination |
| MEDIUM | tests/test_graft.py | 53 | # Create a PDF with a non-zero mediabox origin |
| MEDIUM | tests/test_fpdf_renderer.py | 107 | # Create a non-page element |
| MEDIUM | tests/test_fpdf_renderer.py | 138 | # Create a simple page with one word |
| MEDIUM | tests/test_fpdf_renderer.py | 412 | # Create a page with multiple words on one line |
| MEDIUM | tests/test_fpdf_renderer.py | 480 | # Create a page with CJK words (Chinese characters) |
| MEDIUM | docs/conf.py | 10 | # This file is execfile()d with the current directory set to its |
| MEDIUM | src/ocrmypdf/_options.py | 485 | # Create a copy of the model data for serialization |
| MEDIUM | src/ocrmypdf/_pipeline.py | 797 | # Create a new single page PDF to hold |
| MEDIUM | src/ocrmypdf/_annots.py | 41 | # Create a set of all named destinations |
| MEDIUM | src/ocrmypdf/fpdf_renderer/renderer.py | 948 | # Create a renderer for this page |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/test_multilingual_direct.py | 69 | # ============================================================================= |
| MEDIUM | tests/test_multilingual_direct.py | 71 | # ============================================================================= |
| MEDIUM | tests/test_multilingual_direct.py | 537 | # ============================================================================= |
| MEDIUM | tests/test_multilingual_direct.py | 539 | # ============================================================================= |
| MEDIUM | tests/test_multilingual_direct.py | 141 | # ============================================================================= |
| MEDIUM | tests/test_multilingual_direct.py | 143 | # ============================================================================= |
| MEDIUM | tests/test_multilingual_direct.py | 216 | # ============================================================================= |
| MEDIUM | tests/test_multilingual_direct.py | 218 | # ============================================================================= |
| MEDIUM | tests/test_multilingual_direct.py | 314 | # ============================================================================= |
| MEDIUM | tests/test_multilingual_direct.py | 316 | # ============================================================================= |
| MEDIUM | tests/test_multilingual_direct.py | 381 | # ============================================================================= |
| MEDIUM | tests/test_multilingual_direct.py | 383 | # ============================================================================= |
| MEDIUM | tests/test_multilingual_direct.py | 511 | # ============================================================================= |
| MEDIUM | tests/test_multilingual_direct.py | 513 | # ============================================================================= |
| MEDIUM | src/ocrmypdf/_plugin_manager.py | 113 | # ========================================================================= |
| MEDIUM | src/ocrmypdf/_plugin_manager.py | 115 | # ========================================================================= |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | misc/pdf_compare.py | 34 | |
| LOW | misc/ocrmypdf_compare.py | 50 | |
| LOW | bin/bump_version.py | 101 | |
| LOW | tests/test_pdf_renderer.py | 690 | |
| LOW | tests/test_ghostscript.py | 483 | |
| LOW | tests/test_ghostscript.py | 517 | |
| LOW | tests/test_ghostscript.py | 486 | |
| LOW | tests/plugins/tesseract_cache.py | 66 | |
| LOW | tests/plugins/tesseract_cache.py | 67 | |
| LOW | src/ocrmypdf/optimize.py | 142 | |
| LOW | src/ocrmypdf/optimize.py | 202 | |
| LOW | src/ocrmypdf/_options.py | 89 | |
| LOW | src/ocrmypdf/_options.py | 483 | |
| LOW | src/ocrmypdf/_options.py | 585 | |
| LOW | src/ocrmypdf/_options.py | 489 | |
| LOW | src/ocrmypdf/_graft.py | 177 | |
| LOW | src/ocrmypdf/_graft.py | 512 | |
| LOW | src/ocrmypdf/api.py | 286 | |
| LOW | src/ocrmypdf/imageops.py | 29 | |
| LOW | src/ocrmypdf/_pipeline.py | 67 | |
| LOW | src/ocrmypdf/_pipeline.py | 165 | |
| LOW | src/ocrmypdf/_pipeline.py | 323 | |
| LOW | src/ocrmypdf/_pipeline.py | 511 | |
| LOW | src/ocrmypdf/_pipeline.py | 1234 | |
| LOW | src/ocrmypdf/_validation.py | 159 | |
| LOW | src/ocrmypdf/helpers.py | 252 | |
| LOW | src/ocrmypdf/pdfinfo/layout.py | 294 | |
| LOW | src/ocrmypdf/pdfinfo/_contentstream.py | 81 | |
| LOW | src/ocrmypdf/pdfinfo/info.py | 133 | |
| LOW | src/ocrmypdf/builtin_plugins/pypdfium.py | 117 | |
| LOW | src/ocrmypdf/builtin_plugins/ghostscript.py | 245 | |
| LOW | src/ocrmypdf/builtin_plugins/ghostscript.py | 310 | |
| LOW | src/ocrmypdf/builtin_plugins/ghostscript.py | 266 | |
| LOW | src/ocrmypdf/subprocess/_windows.py | 90 | |
| LOW | src/ocrmypdf/subprocess/_run.py | 79 | |
| LOW | src/ocrmypdf/_pipelines/_common.py | 514 | |
| LOW | src/ocrmypdf/extra_plugins/semfree.py | 120 | |
| LOW | src/ocrmypdf/_exec/tesseract.py | 281 | |
| LOW | src/ocrmypdf/_exec/ghostscript.py | 107 | |
| LOW | src/ocrmypdf/font/system_font_provider.py | 202 |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | src/ocrmypdf/api.py | 76 | Set up plugin infrastructure with proper initialization. This function handles: 1. Creating or validating the p |
| HIGH | src/ocrmypdf/api.py | 317 | Construct an options object from the input/output files and keyword arguments. Args: input_file: Input file |
| HIGH | src/ocrmypdf/api.py | 513 | Run OCRmyPDF on one PDF or image. This function supports two calling conventions: **New style (recommended):** |
| HIGH | src/ocrmypdf/pdfa.py | 219 | Attempt to convert a PDF to PDF/A by adding required structures. This function creates a copy of the input PDF and |
| HIGH | src/ocrmypdf/builtin_plugins/tesseract_ocr.py | 30 | Convert string argument to ThresholdingMethod enum. Args: value: String name of thresholding method (auto, |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | misc/batch.py | 86 | except Exception: |
| LOW | tests/conftest.py | 51 | except Exception: # pylint: disable=broad-except |
| MEDIUM | tests/conftest.py | 48 | def have_unpaper(): |
| LOW | tests/test_metadata.py | 216 | except Exception: # pylint: disable=broad-except |
| MEDIUM | tests/test_metadata.py | 204 | def libxmp_file_to_dict(): |
| LOW | src/ocrmypdf/optimize.py | 356 | except Exception: # pylint: disable=broad-except |
| LOW | src/ocrmypdf/api.py | 373 | except Exception as e: |
| LOW | src/ocrmypdf/api.py | 837 | except Exception as e: |
| LOW | src/ocrmypdf/api.py | 953 | except Exception as e: |
| LOW | src/ocrmypdf/_pipeline.py | 1036 | except Exception as e: |
| LOW | src/ocrmypdf/builtin_plugins/concurrency.py | 56 | except Exception: # pylint: disable=broad-except |
| LOW | src/ocrmypdf/builtin_plugins/concurrency.py | 171 | except Exception: |
| LOW | src/ocrmypdf/_pipelines/_common.py | 317 | except Exception: # pylint: disable=broad-except |
| LOW | src/ocrmypdf/extra_plugins/semfree.py | 106 | except Exception as e: # pylint: disable=broad-except |
| LOW | src/ocrmypdf/font/font_provider.py | 99 | except Exception as e: |
| LOW | src/ocrmypdf/font/system_font_provider.py | 263 | except Exception as e: |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_rasterizer.py | 21 | # Check if pypdfium2 is available |
| LOW | tests/plugins/tesseract_cache.py | 101 | # Check if cache has all required files |
| LOW | src/ocrmypdf/_options.py | 673 | # Check if this is a plugin namespace |
| LOW | src/ocrmypdf/_annots.py | 30 | # Check if there are any named destinations |
| LOW | src/ocrmypdf/pdfa.py | 205 | # Check if sRGB OutputIntent already exists |
| LOW | src/ocrmypdf/fpdf_renderer/renderer.py | 820 | # Check if character is in CJK ranges |
| LOW | src/ocrmypdf/builtin_plugins/pypdfium.py | 238 | # Check if user explicitly requested a different rasterizer |
| LOW | src/ocrmypdf/builtin_plugins/ghostscript.py | 225 | # Check if user explicitly requested a different rasterizer |
| LOW | src/ocrmypdf/builtin_plugins/ghostscript.py | 277 | # Check if it's an image with DCTDecode |
| LOW | src/ocrmypdf/builtin_plugins/ghostscript.py | 345 | # Check if output is 1-15 bytes shorter |
| LOW | src/ocrmypdf/builtin_plugins/ghostscript.py | 350 | # Check if the bytes are identical up to the truncation point |
| LOW | src/ocrmypdf/font/multi_font_manager.py | 252 | # Check if text contains non-ASCII characters |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_optimize.py | 341 | image.Filter = Name.CCITTFaxDecode |
| LOW | docs/conf.py | 1 | #!/usr/bin/env python3 |
| LOW | docs/conf.py | 121 | # non-false value, then it is used: |
| LOW | docs/conf.py | 141 | # add_function_parentheses = True |
| LOW | docs/conf.py | 181 | # Add any paths that contain custom themes here, relative to this directory. |
| LOW | docs/conf.py | 201 | # |
| LOW | docs/conf.py | 221 | # If true, SmartyPants will be used to convert quotes and dashes to |
| LOW | docs/conf.py | 241 | # html_use_index = True |
| LOW | docs/conf.py | 261 | # base URL from which the finished HTML is served. |
| LOW | docs/conf.py | 281 | # The name of a javascript file (relative to the configuration directory) that |
| LOW | docs/conf.py | 301 | # Latex figure (float) alignment |
| LOW | docs/conf.py | 321 | # latex_use_parts = False |
| LOW | docs/conf.py | 341 | # If false, no module index is generated. |
| LOW | docs/conf.py | 381 | |
| LOW | src/ocrmypdf/_jobcontext.py | 121 | # Otherwise, we have a fallback Namespace (shouldn't happen in normal operation) |
| LOW | src/ocrmypdf/_exec/ghostscript.py | 321 | stop_on_error = False |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | src/ocrmypdf/_pipelines/hocr_to_ocr_pdf.py | 0 | implements the concurrent and page synchronous parts of the pipeline. |
| HIGH | src/ocrmypdf/_pipelines/ocr.py | 0 | implements the concurrent and page synchronous parts of the pipeline. |
| HIGH | src/ocrmypdf/_pipelines/pdf_to_hocr.py | 0 | implements the concurrent and page synchronous parts of the pipeline. |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/test_fpdf_renderer.py | 326 | """Test rendering comprehensive multilingual 'Hello!' hOCR file. |
| MEDIUM | src/ocrmypdf/_validation_coordinator.py | 28 | """Run comprehensive validation on all options. |
| MEDIUM | src/ocrmypdf/_validation.py | 139 | # Finally, run comprehensive validation using the coordinator |
| LOW | src/ocrmypdf/_exec/unpaper.py | 62 | # No changes, PNG input, just use the file we already have |
| LOW | src/ocrmypdf/_exec/unpaper.py | 65 | # adds a few seconds to test suite - so just use pnm |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | tests/test_api.py | 150 | '"textpdf": {"Path": "c"}, "orientation_correction": 180, "ocr_tree": null}' |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | src/ocrmypdf/_validation_coordinator.py | 38 | # Step 1: Plugin context validation |
| LOW | src/ocrmypdf/_validation_coordinator.py | 41 | # Step 2: Cross-cutting validation |