Convert PDF to markdown + JSON quickly with high accuracy
233 matches across 9 categories. Click a row to expand file-level details.
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | examples/marker_modal_deployment.py | 92 | except Exception as e: |
| LOW | examples/marker_modal_deployment.py | 117 | except Exception as e: |
| LOW | examples/marker_modal_deployment.py | 286 | except Exception as e: |
| LOW | examples/marker_modal_deployment.py | 343 | except Exception as e: |
| MEDIUM | examples/marker_modal_deployment.py | 344 | print(f"Error getting web URL: {e}") |
| LOW | examples/marker_modal_deployment.py | 363 | except Exception as e: |
| MEDIUM | examples/marker_modal_deployment.py | 394 | print(f"Error: {response.text}") |
| LOW | examples/marker_modal_deployment.py | 396 | except Exception as e: |
| LOW | benchmarks/table/inference.py | 156 | except Exception as e: |
| LOW | benchmarks/overall/elo.py | 119 | except Exception as e: |
| MEDIUM | benchmarks/overall/elo.py | 120 | print(f"Error: {e}") |
| LOW | benchmarks/overall/elo.py | 159 | except Exception as e: |
| MEDIUM | benchmarks/overall/elo.py | 160 | print(f"Error: {e}") |
| LOW | benchmarks/overall/overall.py | 60 | except Exception as e: |
| LOW | benchmarks/overall/overall.py | 72 | except Exception as e: |
| LOW | benchmarks/overall/methods/olmocr.py | 66 | except Exception: |
| LOW | benchmarks/overall/methods/olmocr.py | 69 | except Exception: |
| LOW | benchmarks/overall/scorers/clean.py | 87 | except Exception as e: |
| MEDIUM | benchmarks/overall/scorers/clean.py | 78 | def standardize_math(self, match): |
| MEDIUM | benchmarks/overall/download/base.py | 48 | print(f"Error with sample {idx}: {e}") |
| LOW | benchmarks/overall/download/base.py | 50 | except Exception as e: |
| MEDIUM | benchmarks/overall/download/base.py | 51 | print(f"Error with sample {idx}: {e}") |
| LOW | benchmarks/overall/display/dataset.py | 34 | except Exception as e: |
| LOW | marker/builders/ocr.py | 369 | except Exception: |
| LOW | marker/config/parser.py | 148 | except Exception as e: |
| LOW | marker/config/parser.py | 159 | except Exception as e: |
| LOW | marker/providers/spreadsheet.py | 40 | except Exception as e: |
| LOW | marker/providers/registry.py | 79 | except Exception: |
| LOW | marker/providers/html.py | 16 | except Exception as e: |
| LOW | marker/providers/pdf.py | 449 | except Exception: |
| LOW | marker/providers/powerpoint.py | 53 | except Exception as e: |
| LOW | marker/providers/powerpoint.py | 223 | except Exception as e: |
| LOW | marker/providers/document.py | 61 | except Exception as e: |
| LOW | marker/providers/document.py | 100 | except Exception as e: |
| MEDIUM | marker/providers/document.py | 89 | def convert_image(match): |
| LOW | marker/providers/epub.py | 56 | except Exception as e: |
| LOW | marker/utils/gpu.py | 122 | except Exception as e: |
| MEDIUM | marker/utils/gpu.py | 98 | def stop_mps_server(self) -> None: |
| LOW | marker/processors/block_relabel.py | 55 | except Exception as e: |
| LOW | marker/processors/llm/llm_meta.py | 60 | except Exception as e: |
| LOW | marker/processors/llm/__init__.py | 127 | except Exception as e: |
| LOW | marker/processors/llm/__init__.py | 143 | except Exception as e: |
| LOW | marker/processors/llm/__init__.py | 189 | except Exception as e: |
| LOW | marker/processors/llm/llm_page_correction.py | 196 | except Exception as e: |
| LOW | marker/processors/llm/llm_page_correction.py | 217 | except Exception as e: |
| LOW | marker/processors/llm/llm_page_correction.py | 264 | except Exception as e: |
| LOW | marker/scripts/server.py | 106 | except Exception as e: |
| LOW | marker/scripts/convert.py | 56 | except Exception: |
| LOW | marker/scripts/convert.py | 96 | except Exception as e: |
| LOW | marker/scripts/extraction_app.py | 166 | except Exception as e: |
| LOW | marker/scripts/extraction_app.py | 228 | except Exception as e: |
| LOW | marker/scripts/file_to_s3.py | 38 | except Exception as e: |
| MEDIUM | marker/scripts/file_to_s3.py | 39 | print(f"Error uploading {filepath}: {str(e)}") |
| LOW | marker/scripts/common.py | 156 | except Exception as e: |
| MEDIUM | marker/scripts/common.py | 157 | print(f"Error parsing schema: {e}") |
| LOW | marker/services/claude.py | 52 | except Exception: |
| LOW | marker/services/claude.py | 58 | except Exception: |
| LOW | marker/services/claude.py | 130 | except Exception as e: |
| LOW | marker/services/gemini.py | 126 | except Exception as e: |
| LOW | marker/services/openai.py | 123 | except Exception as e: |
| 2 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | benchmarks/table/inference.py | 14 | |
| LOW | benchmarks/table/inference.py | 15 | |
| LOW | benchmarks/table/table.py | 6 | |
| LOW | benchmarks/overall/elo.py | 3 | |
| LOW | benchmarks/overall/elo.py | 5 | |
| LOW | benchmarks/overall/elo.py | 6 | |
| LOW | benchmarks/overall/elo.py | 6 | |
| LOW | benchmarks/overall/elo.py | 6 | |
| LOW | benchmarks/overall/elo.py | 18 | |
| LOW | benchmarks/overall/methods/__init__.py | 2 | |
| LOW | benchmarks/overall/methods/__init__.py | 4 | |
| LOW | benchmarks/overall/methods/olmocr.py | 7 | |
| LOW | benchmarks/overall/scorers/llm.py | 13 | |
| LOW | benchmarks/overall/scorers/schema.py | 1 | |
| LOW | benchmarks/overall/display/table.py | 2 | |
| LOW | marker/builders/line.py | 6 | |
| LOW | marker/schema/polygon.py | 1 | |
| LOW | marker/schema/polygon.py | 5 | |
| LOW | marker/schema/document.py | 1 | |
| LOW | marker/schema/blocks/__init__.py | 1 | |
| LOW | marker/schema/blocks/__init__.py | 3 | |
| LOW | marker/schema/blocks/__init__.py | 3 | |
| LOW | marker/schema/blocks/__init__.py | 3 | |
| LOW | marker/schema/blocks/__init__.py | 4 | |
| LOW | marker/schema/blocks/__init__.py | 5 | |
| LOW | marker/schema/blocks/__init__.py | 6 | |
| LOW | marker/schema/blocks/__init__.py | 7 | |
| LOW | marker/schema/blocks/__init__.py | 8 | |
| LOW | marker/schema/blocks/__init__.py | 9 | |
| LOW | marker/schema/blocks/__init__.py | 10 | |
| LOW | marker/schema/blocks/__init__.py | 11 | |
| LOW | marker/schema/blocks/__init__.py | 12 | |
| LOW | marker/schema/blocks/__init__.py | 13 | |
| LOW | marker/schema/blocks/__init__.py | 14 | |
| LOW | marker/schema/blocks/__init__.py | 15 | |
| LOW | marker/schema/blocks/__init__.py | 16 | |
| LOW | marker/schema/blocks/__init__.py | 17 | |
| LOW | marker/schema/blocks/__init__.py | 18 | |
| LOW | marker/schema/blocks/__init__.py | 19 | |
| LOW | marker/schema/blocks/__init__.py | 20 | |
| LOW | marker/schema/blocks/__init__.py | 21 | |
| LOW | marker/schema/blocks/__init__.py | 22 | |
| LOW | marker/schema/blocks/form.py | 1 | |
| LOW | marker/schema/blocks/base.py | 1 | |
| LOW | marker/schema/groups/__init__.py | 1 | |
| LOW | marker/schema/groups/__init__.py | 2 | |
| LOW | marker/schema/groups/__init__.py | 3 | |
| LOW | marker/schema/groups/__init__.py | 4 | |
| LOW | marker/schema/groups/__init__.py | 5 | |
| LOW | marker/schema/groups/__init__.py | 6 | |
| LOW | marker/schema/text/__init__.py | 1 | |
| LOW | marker/schema/text/__init__.py | 2 | |
| LOW | marker/processors/util.py | 1 | |
| LOW | marker/processors/order.py | 1 | |
| LOW | marker/scripts/file_to_s3.py | 1 | |
| LOW | marker/scripts/file_to_s3.py | 2 | |
| LOW | marker/scripts/file_to_s3.py | 3 | |
| LOW | marker/scripts/file_to_s3.py | 7 | |
| LOW | marker/scripts/chunk_convert.py | 4 |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/conftest.py | 130 | |
| LOW | benchmarks/table/inference.py | 45 | |
| LOW | benchmarks/overall/elo.py | 180 | |
| LOW | benchmarks/overall/overall.py | 24 | |
| LOW | benchmarks/overall/scorers/heuristic.py | 50 | |
| LOW | benchmarks/overall/display/dataset.py | 11 | |
| LOW | benchmarks/overall/display/table.py | 17 | |
| LOW | marker/output.py | 55 | |
| LOW | marker/renderers/ocr_json.py | 55 | |
| LOW | marker/renderers/html.py | 50 | |
| LOW | marker/renderers/html.py | 81 | |
| LOW | marker/renderers/markdown.py | 29 | |
| LOW | marker/renderers/markdown.py | 120 | |
| LOW | marker/builders/layout.py | 94 | |
| LOW | marker/builders/ocr.py | 165 | |
| LOW | marker/builders/ocr.py | 252 | |
| LOW | marker/config/printer.py | 9 | |
| LOW | marker/config/crawler.py | 34 | |
| LOW | marker/config/crawler.py | 99 | |
| LOW | marker/providers/spreadsheet.py | 81 | |
| LOW | marker/providers/pdf.py | 127 | |
| LOW | marker/providers/pdf.py | 202 | |
| LOW | marker/providers/powerpoint.py | 64 | |
| LOW | marker/providers/powerpoint.py | 140 | |
| LOW | marker/converters/__init__.py | 24 | |
| LOW | marker/converters/pdf.py | 154 | |
| LOW | marker/schema/polygon.py | 78 | |
| LOW | marker/schema/polygon.py | 117 | |
| LOW | marker/schema/polygon.py | 193 | |
| LOW | marker/schema/groups/page.py | 262 | |
| LOW | marker/schema/text/line.py | 60 | |
| LOW | marker/schema/text/span.py | 82 | |
| LOW | marker/processors/list.py | 57 | |
| LOW | marker/processors/ignoretext.py | 44 | |
| LOW | marker/processors/order.py | 15 | |
| LOW | marker/processors/debug.py | 62 | |
| LOW | marker/processors/blockquote.py | 35 | |
| LOW | marker/processors/table.py | 85 | |
| LOW | marker/processors/table.py | 234 | |
| LOW | marker/processors/table.py | 312 | |
| LOW | marker/processors/table.py | 457 | |
| LOW | marker/processors/table.py | 604 | |
| LOW | marker/processors/line_numbers.py | 40 | |
| LOW | marker/processors/line_numbers.py | 76 | |
| LOW | marker/processors/sectionheader.py | 38 | |
| LOW | marker/processors/llm/llm_table_merge.py | 156 | |
| LOW | marker/processors/llm/llm_table.py | 249 | |
| LOW | marker/scripts/common.py | 96 | |
| LOW | marker/services/gemini.py | 43 |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/renderers/test_html_renderer.py | 14 | def test_html_renderer_block_ids(pdf_document, config): |
| LOW | tests/renderers/test_markdown_renderer.py | 18 | def test_markdown_renderer_auto_ocr(pdf_document): |
| LOW | tests/renderers/test_markdown_renderer.py | 27 | def test_markdown_renderer_pagination(pdf_document): |
| LOW | tests/renderers/test_markdown_renderer.py | 36 | def test_markdown_renderer_pagination_blank_last_page(pdf_document): |
| LOW | tests/renderers/test_markdown_renderer.py | 52 | def test_markdown_renderer_metadata(pdf_document): |
| LOW | tests/renderers/test_markdown_renderer.py | 59 | def test_markdown_renderer_images(pdf_document): |
| LOW | tests/renderers/test_markdown_renderer.py | 68 | def test_markdown_renderer_tables(pdf_document): |
| LOW | tests/renderers/test_json_renderer.py | 7 | def test_markdown_renderer_pagination(pdf_document): |
| LOW | tests/renderers/test_extract_images.py | 8 | def test_disable_extract_images(pdf_document): |
| LOW | tests/builders/test_ocr_pipeline.py | 44 | def test_ocr_with_inline_pipeline(pdf_document): |
| LOW | tests/builders/test_document_builder.py | 28 | def test_document_builder_inline_eq(pdf_document): |
| LOW | tests/providers/test_image_provider.py | 13 | def test_image_provider_conversion(pdf_converter, temp_image): |
| LOW | tests/converters/test_extraction_converter.py | 52 | def test_extraction_converter(config, model_dict, mock_llm_service, temp_doc): |
| LOW | tests/converters/test_extraction_converter.py | 66 | def test_extraction_converter_multiple_pages(extraction_converter, temp_doc): |
| LOW | tests/processors/test_llm_processors.py | 19 | def test_llm_form_processor_no_config(pdf_document, llm_service): |
| LOW | tests/processors/test_llm_processors.py | 30 | def test_llm_form_processor_no_cells(pdf_document, llm_service): |
| LOW | tests/processors/test_llm_processors.py | 107 | def test_llm_caption_processor_disabled(pdf_document): |
| LOW | tests/processors/test_llm_processors.py | 119 | def test_llm_caption_processor(pdf_document): |
| LOW | tests/processors/test_llm_processors.py | 141 | def test_llm_complex_region_processor(pdf_document): |
| LOW | tests/processors/test_llm_processors.py | 167 | def test_multi_llm_processors(pdf_document): |
| LOW | tests/processors/test_table_merge.py | 11 | def test_llm_table_processor_nomerge(pdf_document, table_rec_model, recognition_model, detection_model, mocker): |
| LOW | tests/processors/test_document_toc_processor.py | 7 | def test_document_toc_processor(pdf_document, detection_model, recognition_model, table_rec_model): |
| LOW | tests/processors/test_ignoretext.py | 10 | def test_ignoretext_processor(pdf_document): |
| LOW | examples/marker_modal_deployment.py | 33 | def setup_models_with_cache_check(logger, commit_volume=False): |
| LOW | marker/renderers/__init__.py | 117 | def generate_document_metadata(self, document: Document, document_output): |
| LOW | marker/renderers/chunk.py | 36 | def assemble_html_with_images(block: JSONBlockOutput, image_blocks: set[str]) -> str: |
| LOW | marker/renderers/json.py | 29 | def reformat_section_hierarchy(section_hierarchy): |
| LOW | marker/builders/ocr.py | 96 | def get_recognition_batch_size(self): |
| LOW | marker/builders/ocr.py | 105 | def select_ocr_blocks_by_mode( |
| LOW | marker/builders/ocr.py | 120 | def get_ocr_images_polygons_ids( |
| LOW | marker/config/crawler.py | 64 | def _gather_super_annotations(cls: Type) -> Dict[str, Type]: |
| LOW | marker/utils/batch.py | 4 | def get_batch_sizes_worker_counts(gpu_manager: GPUManager, peak_worker_vram: int): |
| LOW | marker/schema/groups/page.py | 139 | def compute_line_block_intersections( |
| LOW | marker/schema/groups/page.py | 163 | def compute_max_structure_block_intersection_pct(self): |
| LOW | marker/processors/table.py | 705 | def get_recognition_batch_size(self): |
| LOW | marker/processors/line_numbers.py | 61 | def ignore_line_number_blocks(self, document: Document): |
| LOW | marker/scripts/common.py | 96 | def extract_root_pydantic_class(schema_code: str) -> Optional[str]: |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | examples/marker_modal_deployment.py | 41 | # Check if models exist in cache |
| LOW | examples/marker_modal_deployment.py | 207 | # Read file content |
| LOW | marker/providers/spreadsheet.py | 95 | # Check if this cell is the start of a merged range |
| LOW | marker/providers/registry.py | 76 | # Check if there are any HTML tags |
| LOW | marker/schema/groups/page.py | 60 | # Check if RGB, convert if needed |
| LOW | marker/processors/footnote.py | 25 | # Check if it is top-level |
| LOW | marker/processors/llm/llm_mathblock.py | 126 | # Check if the ratio of math blocks to additional blocks is high enough |
| LOW | marker/processors/llm/llm_table_merge.py | 297 | # Check if the number of rows is the same |
| LOW | marker/processors/llm/llm_table_merge.py | 302 | # Check if the number of columns is the same |
| LOW | marker/scripts/chunk_convert.sh | 5 | # Check if NUM_DEVICES is set |
| LOW | marker/scripts/extraction_app.py | 83 | # Check if this is a new file |
| LOW | marker/scripts/extraction_app.py | 196 | # Check if schema is provided before running |
| LOW | marker/scripts/common.py | 107 | # Check if this class inherits from BaseModel |
| LOW | marker/scripts/common.py | 129 | # Check if this field references another class |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | examples/marker_modal_deployment.py | 9 | # Define the Modal app |
| MEDIUM | examples/marker_modal_deployment.py | 14 | # Define the container image with all dependencies |
| MEDIUM | examples/marker_modal_deployment.py | 30 | # Create a persistent volume for model caching |
| MEDIUM | marker/builders/structure.py | 68 | # Create a merged block |
| MEDIUM | marker/scripts/extraction_app.py | 80 | # Create a unique identifier for the current file |
| MEDIUM | marker/scripts/extraction_app.py | 109 | # Initialize schema variable |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | data/examples/json/multicolcnn.json | 1164 | "html": "<p block-type=\"Text\">We propose the use of dilated convolutions as an attractive alternative to the |
| MEDIUM | data/examples/json/multicolcnn.json | 1865 | "html": "<p block-type=\"Text\">Furthermore, we performed a set of experiments in which we varied the number o |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | examples/marker_modal_deployment.py | 304 | # that you can use to test your deployment. It'll store the |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | …/examples/markdown/switch_transformers/switch_trans.md | 561 | import mesh tensorflow as mtf |
| LOW | …/examples/markdown/switch_transformers/switch_trans.md | 661 | # d model = model hidden size (scalar). |
| LOW | …/examples/markdown/switch_transformers/switch_trans.md | 681 | # probability. |