A Python library for extracting structured information from unstructured text using LLMs with precise source grounding and interactive visualization.
787 matches across 15 categories. Click a row to expand file-level details.
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/chunking_test.py | 69 | def test_multi_sentence_chunk(self): |
| LOW | tests/chunking_test.py | 96 | def test_sentence_with_multiple_newlines_and_right_interval(self): |
| LOW | tests/chunking_test.py | 164 | def test_long_token_gets_own_chunk(self): |
| LOW | tests/chunking_test.py | 207 | def test_newline_at_chunk_boundary_does_not_create_empty_interval(self): |
| LOW | tests/chunking_test.py | 261 | def test_newlines_is_secondary_sentence_break(self): |
| LOW | tests/chunking_test.py | 312 | def test_tokenizer_propagation(self): |
| LOW | tests/chunking_test.py | 425 | def test_make_batches_of_textchunk( |
| LOW | tests/chunking_test.py | 473 | def test_text_chunk_additional_context(self): |
| LOW | tests/chunking_test.py | 486 | def test_chunk_iterator_without_additional_context(self): |
| LOW | tests/chunking_test.py | 497 | def test_multiple_chunks_with_additional_context(self): |
| LOW | tests/chunking_test.py | 549 | def test_text_chunk_properties( |
| LOW | tests/progress_test.py | 27 | def test_download_progress_bar(self): |
| LOW | tests/progress_test.py | 37 | def test_extraction_progress_bar(self): |
| LOW | tests/progress_test.py | 47 | def test_save_load_progress_bars(self): |
| LOW | tests/progress_test.py | 57 | def test_model_info_extraction(self): |
| LOW | tests/progress_test.py | 68 | def test_formatting_functions(self): |
| LOW | tests/format_handler_test.py | 147 | def test_end_to_end_integration_with_prompt_and_resolver(self): |
| LOW | tests/format_handler_test.py | 232 | def test_format_parse_roundtrip( |
| LOW | tests/format_handler_test.py | 260 | def test_think_tags_stripped_before_parsing(self): |
| LOW | tests/format_handler_test.py | 276 | def test_top_level_list_accepted_as_fallback(self): |
| LOW | tests/annotation_test.py | 47 | def assert_char_interval_match_source( |
| LOW | tests/annotation_test.py | 80 | def test_annotate_text_single_chunk(self): |
| LOW | tests/annotation_test.py | 206 | def test_annotate_text_without_index_suffix(self): |
| LOW | tests/annotation_test.py | 325 | def test_annotate_text_with_attributes_suffix(self): |
| LOW | tests/annotation_test.py | 469 | def test_annotate_text_multiple_chunks(self): |
| LOW | tests/annotation_test.py | 569 | def test_annotate_text_no_extractions(self): |
| LOW | tests/annotation_test.py | 766 | def test_annotate_documents_exceptions( |
| LOW | tests/annotation_test.py | 817 | def test_multipass_extraction_non_overlapping(self): |
| LOW | tests/annotation_test.py | 867 | def test_multipass_extraction_overlapping(self): |
| LOW | tests/annotation_test.py | 918 | def test_multipass_extraction_single_pass(self): |
| LOW | tests/annotation_test.py | 948 | def test_multipass_extraction_empty_passes(self): |
| LOW | tests/annotation_test.py | 1052 | def test_merge_non_overlapping_extractions( |
| LOW | tests/annotation_test.py | 1160 | def test_yields_documents_not_generators(self): |
| LOW | tests/annotation_test.py | 1220 | def test_context_window_includes_previous_chunk_text(self): |
| LOW | tests/annotation_test.py | 1271 | def test_no_context_included_when_disabled(self): |
| LOW | tests/annotation_test.py | 1309 | def test_context_window_per_document_isolation(self): |
| LOW | tests/schema_test.py | 47 | def _openai_attribute_properties(openai_schema, extraction_class): |
| LOW | tests/schema_test.py | 56 | def test_abstract_methods_required(self): |
| LOW | tests/schema_test.py | 61 | def test_subclass_must_implement_all_methods(self): |
| LOW | tests/schema_test.py | 77 | def test_get_schema_class_returns_none_by_default(self): |
| LOW | tests/schema_test.py | 87 | def test_apply_schema_stores_instance(self): |
| LOW | tests/schema_test.py | 256 | def test_from_examples_constructs_expected_schema( |
| LOW | tests/schema_test.py | 263 | def test_to_provider_config_returns_response_schema(self): |
| LOW | tests/schema_test.py | 285 | def test_requires_raw_output_returns_true(self): |
| LOW | tests/schema_test.py | 306 | def test_response_format_returns_json_schema_response_format(self): |
| LOW | tests/schema_test.py | 339 | def test_to_provider_config_uses_provider_schema_hook(self): |
| LOW | tests/schema_test.py | 347 | def test_from_examples_constructs_strict_openai_schema(self): |
| LOW | tests/schema_test.py | 445 | def test_from_examples_preserves_list_attribute_schema(self): |
| LOW | tests/schema_test.py | 472 | def test_from_examples_empty_examples_allow_empty_extraction_objects(self): |
| LOW | tests/schema_test.py | 486 | def test_validate_format_rejects_yaml(self): |
| LOW | tests/schema_test.py | 497 | def test_requires_raw_output_returns_true(self): |
| LOW | tests/schema_test.py | 503 | def test_validate_format_warns_when_fences_enabled(self): |
| LOW | tests/schema_test.py | 516 | def test_validate_format_warns_with_wrong_wrapper_key(self): |
| LOW | tests/schema_test.py | 531 | def test_from_examples_preserves_scalar_attribute_types(self): |
| LOW | tests/schema_test.py | 568 | def test_from_examples_preserves_mixed_numeric_attribute_types(self): |
| LOW | tests/schema_test.py | 619 | def test_from_examples_allows_none_attribute_values(self): |
| LOW | tests/schema_test.py | 641 | def test_from_examples_strict_false_emits_non_strict_response_format(self): |
| LOW | tests/schema_test.py | 647 | def test_response_format_returns_isolated_schema_dict(self): |
| LOW | tests/schema_test.py | 658 | def test_instance_is_frozen_and_dict_is_isolated(self): |
| LOW | tests/schema_test.py | 748 | def test_base_schema_no_validation(self): |
| 403 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | langextract/plugins.py | 125 | Load a provider class from module:Class specification. Args: spec: Import specification in format "module.path:Cl |
| HIGH | langextract/plugins.py | 186 | Get a provider class by name. Args: name: Provider name (e.g., "gemini", "openai", "ollama"). allow_override: |
| HIGH | langextract/annotation.py | 124 | Iterates over documents to yield text chunks along with the document ID. Args: documents: A sequence of Document |
| HIGH | langextract/annotation.py | 222 | Annotates a sequence of documents with NLP extractions. Breaks documents into chunks, processes them into prompts |
| HIGH | langextract/chunking.py | 146 | Creates a token interval. Args: start_index: first token's index (inclusive). end_index: last token's index + |
| HIGH | langextract/chunking.py | 173 | Get the text within an interval of tokens. Args: tokenized_text: Tokenized documents. token_interval: An inte |
| HIGH | langextract/chunking.py | 220 | Returns the char interval corresponding to the token interval. Args: tokenized_text: Document. token_interval |
| HIGH | langextract/chunking.py | 247 | Converts all whitespace characters in input text to a single space. Args: text: Input to sanitize. Returns: |
| HIGH | langextract/io.py | 51 | Loads the dataset from a CSV file. Args: delimiter: The delimiter to use when reading the CSV file. Yiel |
| HIGH | langextract/io.py | 148 | Loads annotated documents from a JSON Lines file. Args: jsonl_path: The file path to the JSON Lines file. sho |
| HIGH | langextract/io.py | 198 | Reads a CSV file and yields rows as dicts. Args: filepath: The path to the file. column_names: The names of t |
| HIGH | langextract/io.py | 271 | Download text content from a URL with optional progress bar. Args: url: The URL to download from. timeout: Re |
| HIGH | langextract/prompt_validation.py | 134 | Align extractions to their own example text and collect issues. Args: examples: The few-shot examples to validate |
| HIGH | langextract/prompting.py | 56 | Reads a structured prompt template from a file. Args: prompt_path: Path to a file containing PromptTemplateStruct |
| HIGH | langextract/factory.py | 110 | Create a language model instance from configuration. Args: config: Model configuration with optional model_id and |
| HIGH | langextract/factory.py | 204 | Internal helper to create a model with optional schema constraints. This function creates a language model and option |
| HIGH | langextract/extraction.py | 66 | Extracts structured information from text. Retrieves structured information from the provided text or documents using |
| HIGH | langextract/resolver.py | 273 | Runs resolve function on text with YAML/JSON extraction data. Args: input_text: The input text to be proces |
| HIGH | langextract/resolver.py | 393 | Parses a YAML or JSON-formatted string into extraction data. This method is kept for backward compatibility with te |
| HIGH | langextract/resolver.py | 428 | Extracts and orders extraction data based on their associated indexes. This function processes a list of dictionari |
| HIGH | langextract/core/base_model.py | 157 | Parses model output as JSON or YAML. Note: This expects raw JSON/YAML without code fences. Code fence extractio |
| HIGH | langextract/core/tokenizer.py | 474 | Reconstructs the substring of the original text spanning a given token interval. Args: tokenized_text: A Tokenize |
| HIGH | langextract/core/tokenizer.py | 586 | Finds a 'sentence' interval from a given start index. Sentence boundaries are defined by: - punctuation tokens in |
| HIGH | langextract/core/format_handler.py | 154 | Parse model output to extract data. Args: text: Raw model output. strict: If True, enforce strict schem |
| HIGH | langextract/core/format_handler.py | 279 | Extract content from text, handling fences if configured. Args: text: Input text that may contain fenced bloc |
| HIGH | langextract/providers/openai_batch.py | 357 | Execute batch inference on multiple prompts using OpenAI Batch API. Args: client: OpenAI client instance (or comp |
| HIGH | langextract/providers/router.py | 140 | Resolve a model ID to a provider class. Args: model_id: The model identifier to resolve. Returns: The prov |
| HIGH | langextract/providers/router.py | 171 | Resolve a provider name to a provider class. This allows explicit provider selection by name or class name. Args: |
| HIGH | langextract/providers/gemini_batch.py | 317 | Submit a file-based batch job to Vertex AI using GCS storage. Batch processing is only supported with Vertex AI becau |
| HIGH | langextract/providers/gemini_batch.py | 552 | Poll batch job until completion or timeout. Args: client: google.genai.Client instance for polling job status. |
| HIGH | langextract/providers/gemini_batch.py | 629 | Extract text outputs from file-based batch results, preserving order. Reads results from GCS output directory. Arg |
| HIGH | langextract/providers/gemini_batch.py | 709 | Execute batch inference on multiple prompts using the Vertex AI Batch API. This function provides file-based batch pr |
| HIGH | langextract/providers/ollama.py | 561 | Sends a prompt to an Ollama model and returns the generated response. Note: This is a low-level method. Constructor |
| HIGH | scripts/create_provider_plugin.py | 240 | \ """Schema implementation for {provider_name} provider.""" import langextract as lx from lange |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | autoformat.sh | 1 | #!/bin/bash |
| LOW | .pre-commit-config.yaml | 1 | # Copyright 2025 Google LLC. |
| LOW | pyproject.toml | 1 | # Copyright 2025 Google LLC. |
| LOW | tox.ini | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/chunking_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/progress_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/format_handler_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/annotation_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/schema_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/prompting_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/inference_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/provider_schema_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/extract_precedence_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/resolver_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/resolver_test.py | 661 | extraction_text="prednisone", |
| LOW | tests/fuzzy_alignment_cases_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/test_kwargs_passthrough.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/gemini_retry_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/data_lib_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/init_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/openai_batch_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/registry_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/tokenizer_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/extract_schema_integration_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/prompt_validation_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/factory_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/visualization_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/io_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/provider_plugin_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/factory_schema_test.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/test_live_api.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/test_ollama_integration.py | 1 | # Copyright 2025 Google LLC. |
| LOW | tests/test_gemini_batch_api.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/plugins.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/annotation.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/chunking.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/registry.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/data_lib.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/io.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/prompt_validation.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/prompting.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/__init__.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/visualization.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/factory.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/tokenizer.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/extraction.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/resolver.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/inference.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/exceptions.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/progress.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/data.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/schema.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/core/__init__.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/core/types.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/core/base_model.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/core/tokenizer.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/core/format_handler.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/core/exceptions.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/core/data.py | 1 | # Copyright 2025 Google LLC. |
| LOW | langextract/core/schema.py | 1 | # Copyright 2025 Google LLC. |
| 47 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | tests/test_live_api.py | 0 | \ extract medication information including medication name, dosage, route, frequency, and duration in the order they app |
| HIGH | tests/test_live_api.py | 0 | \ extract medication information including medication name, dosage, route, frequency, and duration in the order they app |
| HIGH | tests/test_live_api.py | 0 | \ extract medication information including medication name, dosage, route, frequency, and duration in the order they app |
| HIGH | tests/test_live_api.py | 0 | \ extract medication information including medication name, dosage, route, frequency, and duration in the order they app |
| HIGH | tests/test_live_api.py | 0 | the patient was prescribed lisinopril and metformin last month. he takes the lisinopril 10mg daily for hypertension, but |
| HIGH | tests/test_live_api.py | 0 | the patient was prescribed lisinopril and metformin last month. he takes the lisinopril 10mg daily for hypertension, but |
| HIGH | docs/examples/medication_examples.md | 0 | the patient was prescribed lisinopril and metformin last month. he takes the lisinopril 10mg daily for hypertension, but |
| HIGH | examples/ollama/demo_ollama.py | 0 | the patient was prescribed lisinopril and metformin last month. he takes the lisinopril 10mg daily for hypertension, but |
| HIGH | tests/test_live_api.py | 0 | extract medications with their details, using attributes to group related information: 1. extract entities in the order |
| HIGH | tests/test_live_api.py | 0 | extract medications with their details, using attributes to group related information: 1. extract entities in the order |
| HIGH | docs/examples/medication_examples.md | 0 | extract medications with their details, using attributes to group related information: 1. extract entities in the order |
| HIGH | examples/ollama/demo_ollama.py | 0 | extract medications with their details, using attributes to group related information: 1. extract entities in the order |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/openai_batch_test.py | 19 | |
| LOW | langextract/plugins.py | 20 | |
| LOW | langextract/registry.py | 21 | |
| LOW | langextract/data_lib.py | 16 | |
| LOW | langextract/io.py | 16 | |
| LOW | langextract/prompt_validation.py | 17 | |
| LOW | langextract/prompting.py | 16 | |
| LOW | langextract/__init__.py | 21 | |
| LOW | langextract/visualization.py | 24 | |
| LOW | langextract/factory.py | 22 | |
| LOW | langextract/tokenizer.py | 21 | |
| LOW | langextract/tokenizer.py | 25 | |
| LOW | langextract/extraction.py | 17 | |
| LOW | langextract/resolver.py | 21 | |
| LOW | langextract/inference.py | 21 | |
| LOW | langextract/exceptions.py | 22 | |
| LOW | langextract/progress.py | 16 | |
| LOW | langextract/data.py | 21 | |
| LOW | langextract/data.py | 25 | |
| LOW | langextract/schema.py | 21 | |
| LOW | langextract/core/__init__.py | 22 | |
| LOW | langextract/core/types.py | 16 | |
| LOW | langextract/core/base_model.py | 16 | |
| LOW | langextract/core/format_handler.py | 17 | |
| LOW | langextract/core/exceptions.py | 21 | |
| LOW | langextract/core/data.py | 16 | |
| LOW | langextract/core/schema.py | 16 | |
| LOW | langextract/core/debug_utils.py | 16 | |
| LOW | langextract/providers/openai_batch.py | 22 | |
| LOW | langextract/providers/gemini.py | 19 | |
| LOW | langextract/providers/openai.py | 18 | |
| LOW | langextract/providers/router.py | 22 | |
| LOW | langextract/providers/gemini_batch.py | 25 | |
| LOW | langextract/providers/ollama.py | 84 | |
| LOW | langextract/providers/schemas/__init__.py | 16 | |
| LOW | langextract/providers/schemas/gemini.py | 18 | |
| LOW | langextract/providers/schemas/openai.py | 18 | |
| LOW | langextract/_compat/registry.py | 18 | |
| LOW | langextract/_compat/__init__.py | 21 | |
| LOW | langextract/_compat/inference.py | 17 | |
| LOW | langextract/_compat/exceptions.py | 18 | |
| LOW | langextract/_compat/schema.py | 18 | |
| LOW | …amples/custom_provider_plugin/test_example_provider.py | 24 | |
| LOW | …ovider_plugin/langextract_provider_example/provider.py | 17 | |
| LOW | …ovider_plugin/langextract_provider_example/__init__.py | 17 | |
| LOW | …provider_plugin/langextract_provider_example/schema.py | 17 | |
| LOW | benchmarks/fuzzy_benchmark.py | 29 | |
| LOW | scripts/validate_community_providers.py | 18 |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/prompt_validation_test.py | 457 | except Exception: # pylint: disable=broad-except |
| LOW | tests/prompt_validation_test.py | 505 | except Exception: # pylint: disable=broad-except |
| LOW | tests/prompt_validation_test.py | 526 | except Exception: # pylint: disable=broad-except |
| LOW | langextract/prompting.py | 78 | except Exception as e: |
| LOW | langextract/visualization.py | 59 | except Exception: |
| LOW | langextract/resolver.py | 420 | except Exception as e: |
| LOW | langextract/core/base_model.py | 179 | except Exception as e: |
| LOW | langextract/core/debug_utils.py | 89 | except Exception: |
| LOW | langextract/core/debug_utils.py | 129 | except Exception: |
| LOW | langextract/core/debug_utils.py | 184 | except Exception: |
| LOW | langextract/providers/openai_batch.py | 253 | except Exception as e: |
| LOW | langextract/providers/openai_batch.py | 277 | except Exception as e: |
| LOW | langextract/providers/openai_batch.py | 313 | except Exception as e: |
| LOW | langextract/providers/openai_batch.py | 444 | except Exception as e: |
| LOW | langextract/providers/openai_batch.py | 470 | except Exception as e: |
| LOW | langextract/providers/openai_batch.py | 496 | except Exception as e: |
| LOW | langextract/providers/openai_batch.py | 508 | except Exception as e: |
| LOW | langextract/providers/__init__.py | 134 | except Exception as e: |
| LOW | langextract/providers/__init__.py | 139 | except Exception as e: |
| LOW | langextract/providers/gemini.py | 361 | except Exception as e: |
| LOW | langextract/providers/gemini.py | 441 | except Exception as e: |
| LOW | langextract/providers/gemini.py | 478 | except Exception as e: |
| LOW | langextract/providers/openai.py | 258 | except Exception as e: |
| LOW | langextract/providers/openai.py | 337 | except Exception as e: |
| LOW | langextract/providers/openai.py | 375 | except Exception as e: |
| LOW | langextract/providers/gemini_batch.py | 172 | except Exception: |
| LOW | langextract/providers/gemini_batch.py | 253 | except Exception as e: |
| LOW | langextract/providers/gemini_batch.py | 416 | except Exception as e: |
| LOW | langextract/providers/gemini_batch.py | 457 | except Exception as e: |
| LOW | langextract/providers/gemini_batch.py | 468 | except Exception as e: |
| LOW | langextract/providers/gemini_batch.py | 586 | except Exception as e: |
| LOW | langextract/providers/gemini_batch.py | 837 | except Exception as e: |
| LOW | langextract/providers/ollama.py | 313 | except Exception as e: |
| LOW | …ovider_plugin/langextract_provider_example/provider.py | 181 | except Exception as e: |
| LOW | examples/ollama/demo_ollama.py | 452 | except Exception as e: |
| LOW | examples/ollama/demo_ollama.py | 533 | except Exception as e: |
| MEDIUM | benchmarks/plotting.py | 339 | print(f"Error loading {json_file}: {e}") |
| LOW | scripts/create_provider_plugin.py | 375 | except Exception as e: |
| LOW | scripts/create_provider_plugin.py | 393 | except Exception: |
| LOW | scripts/create_provider_plugin.py | 395 | except Exception as e: |
| LOW | scripts/create_provider_plugin.py | 429 | except Exception as e: |
| LOW | scripts/create_provider_plugin.py | 444 | except Exception as e: |
| LOW | .github/scripts/zenodo_publish.py | 211 | except Exception as e: |
| MEDIUM | .github/scripts/zenodo_publish.py | 180 | def main() -> int: |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | README.md | 206 | docker run --rm -e LANGEXTRACT_API_KEY="your-api-key" langextract python your_script.py |
| HIGH | README.md | 229 | export LANGEXTRACT_API_KEY="your-api-key-here" |
| HIGH | README.md | 239 | LANGEXTRACT_API_KEY=your-api-key-here |
| HIGH | README.md | 268 | api_key="your-api-key-here" # Only use this for testing/development |
| HIGH | docs/examples/medication_examples.md | 51 | api_key="your-api-key-here" # Optional if LANGEXTRACT_API_KEY environment variable is set |
| HIGH | docs/examples/medication_examples.md | 171 | api_key="your-api-key-here" # Optional if LANGEXTRACT_API_KEY environment variable is set |
| HIGH | docs/examples/japanese_extraction.md | 43 | api_key="your-api-key-here" # Optional if env var is set |
| HIGH | examples/custom_provider_plugin/README.md | 125 | provider_kwargs={"api_key": "your-api-key"}, |
| HIGH | examples/custom_provider_plugin/README.md | 142 | provider_kwargs={"api_key": "your-api-key"}, |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/extract_schema_integration_test.py | 113 | |
| LOW | tests/extract_schema_integration_test.py | 147 | |
| LOW | langextract/annotation.py | 46 | |
| LOW | langextract/annotation.py | 285 | |
| LOW | langextract/data_lib.py | 27 | |
| LOW | langextract/io.py | 85 | |
| LOW | langextract/io.py | 265 | |
| LOW | langextract/prompt_validation.py | 128 | |
| LOW | langextract/prompting.py | 52 | |
| LOW | langextract/factory.py | 53 | |
| LOW | langextract/extraction.py | 36 | |
| LOW | langextract/resolver.py | 1075 | |
| LOW | langextract/resolver.py | 424 | |
| LOW | langextract/resolver.py | 578 | |
| LOW | langextract/core/tokenizer.py | 580 | |
| LOW | langextract/core/tokenizer.py | 336 | |
| LOW | langextract/core/format_handler.py | 151 | |
| LOW | langextract/providers/openai_batch.py | 411 | |
| LOW | langextract/providers/__init__.py | 74 | |
| LOW | langextract/providers/__init__.py | 152 | |
| LOW | langextract/providers/gemini.py | 337 | |
| LOW | langextract/providers/gemini.py | 381 | |
| LOW | langextract/providers/openai.py | 284 | |
| LOW | langextract/providers/router.py | 170 | |
| LOW | langextract/providers/gemini_batch.py | 623 | |
| LOW | langextract/providers/gemini_batch.py | 442 | |
| LOW | langextract/providers/schemas/gemini.py | 98 | |
| LOW | langextract/providers/schemas/openai.py | 81 | |
| LOW | examples/ollama/demo_ollama.py | 418 | |
| LOW | benchmarks/benchmark.py | 140 | |
| LOW | benchmarks/benchmark.py | 276 | |
| LOW | benchmarks/benchmark.py | 311 | |
| LOW | benchmarks/fuzzy_benchmark.py | 342 | |
| LOW | benchmarks/plotting.py | 170 | |
| LOW | benchmarks/plotting.py | 220 | |
| LOW | benchmarks/plotting.py | 376 | |
| LOW | benchmarks/plotting.py | 492 |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/annotation_test.py | 701 | # Define a side effect function so return length based on batch length. |
| MEDIUM | tests/resolver_test.py | 2010 | # Define a chunk that includes the entire text. |
| MEDIUM | tests/resolver_test.py | 2052 | # Define a chunk that includes the entire text. |
| MEDIUM | tests/resolver_test.py | 2098 | # Define a chunk that includes too many tokens. |
| MEDIUM | tests/resolver_test.py | 2139 | # Define a correct chunk. |
| MEDIUM | tests/resolver_test.py | 2166 | # Define a chunk that includes the entire text. |
| MEDIUM | tests/extract_schema_integration_test.py | 188 | # Create a mock instance with required attributes |
| MEDIUM | tests/extract_schema_integration_test.py | 239 | # Create a mock Gemini schema with validate_format that issues warnings |
| MEDIUM | langextract/providers/ollama.py | 24 | # Create an example for few-shot learning |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/annotation_test.py | 503 | # ------------------------------------------------------------------------- |
| MEDIUM | tests/resolver_test.py | 667 | # -------------------------------------------------------------------- |
| MEDIUM | tests/resolver_test.py | 670 | # -------------------------------------------------------------------- |
| MEDIUM | tests/resolver_test.py | 673 | # -------------------------------------------------------------------- |
| MEDIUM | tests/resolver_test.py | 676 | # -------------------------------------------------------------------- |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/annotation_test.py | 91 | - patient: "Jane Doe" |
| LOW | tests/annotation_test.py | 118 | extraction_text="Jane Doe", |
| LOW | tests/annotation_test.py | 217 | - patient: "Jane Doe" |
| LOW | tests/annotation_test.py | 237 | extraction_text="Jane Doe", |
| LOW | tests/annotation_test.py | 336 | - patient: "Jane Doe" |
| LOW | tests/annotation_test.py | 371 | extraction_text="Jane Doe", |
| LOW | tests/schema_test.py | 217 | extraction_text="John Doe", |
| LOW | tests/resolver_test.py | 413 | "patient": "Jane Doe", |
| LOW | tests/resolver_test.py | 430 | extraction_text="Jane Doe", |
| LOW | tests/resolver_test.py | 454 | "patient": "John Doe", |
| LOW | tests/resolver_test.py | 493 | extraction_text="John Doe", |
| LOW | tests/tokenizer_test.py | 812 | expected_substring="Jane Doe", |
| Severity | File | Line | Snippet |
|---|---|---|---|
| CRITICAL | langextract/_compat/README.md | 16 | - `from langextract.inference import InferenceOutputError` → `from langextract.core.exceptions import InferenceOutputErr |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | tests/test_kwargs_passthrough.py | 700 | """Format key should be omitted from payload when None (not sent as null).""" |
| HIGH | langextract/visualization.py | 492 | let animationInterval = null; |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | langextract/core/tokenizer.py | 278 | # Fallback to the robust regex method |
| MEDIUM | benchmarks/plotting.py | 37 | """Generate comprehensive benchmark visualization. |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | langextract/core/base_model.py | 171 | # Check if we have a format_type attribute (providers should set this) |
| LOW | langextract/providers/gemini_batch.py | 236 | # Check if rule already exists |