The batteries-included agent harness.
9193 matches across 18 categories. Click a row to expand file-level details.
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | libs/evals/deepagents_evals/radar.py | 338 | def generate_individual_radars( |
| LOW | libs/evals/deepagents_evals/radar.py | 458 | def load_results_from_summary(path: str | Path) -> list[ModelResult]: |
| LOW | libs/evals/deepagents_evals/trial_summary.py | 27 | def render_per_trial_category_matrix( |
| LOW | libs/evals/tests/evals/conftest.py | 18 | def _parse_openrouter_providers(value: str) -> list[str]: |
| LOW | libs/evals/tests/evals/conftest.py | 193 | def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]) -> None: |
| LOW | libs/evals/tests/evals/conftest.py | 237 | def langsmith_experiment_metadata(request: pytest.FixtureRequest) -> dict[str, Any]: |
| LOW | libs/evals/tests/evals/pytest_reporter.py | 207 | def pytest_collection_modifyitems( |
| LOW | libs/evals/tests/evals/pytest_reporter.py | 264 | def _get_public_experiment_url(suite: object, experiment_id: object) -> str | None: |
| LOW | libs/evals/tests/evals/pytest_reporter.py | 293 | def _collect_experiment_links() -> list[dict[str, str]]: |
| LOW | libs/evals/tests/evals/external_benchmarks.py | 131 | def _create_file_backed_agent(model: BaseChatModel) -> CompiledStateGraph: |
| LOW | libs/evals/tests/evals/external_benchmarks.py | 223 | def _wrap_bfcl_methods_as_tools(instances: dict[str, Any]) -> list[StructuredTool]: |
| LOW | libs/evals/tests/evals/external_benchmarks.py | 249 | def _replay_bfcl_ground_truth(case: dict[str, Any]) -> dict[str, Any]: |
| LOW | libs/evals/tests/evals/test_file_operations.py | 34 | def test_read_file_seeded_state_backend_file(model: BaseChatModel) -> None: |
| LOW | libs/evals/tests/evals/test_file_operations.py | 76 | def test_write_files_in_parallel(model: str) -> None: |
| LOW | libs/evals/tests/evals/test_file_operations.py | 108 | def test_write_files_in_parallel_confirm_with_verification(model: str) -> None: |
| LOW | libs/evals/tests/evals/test_file_operations.py | 143 | def test_write_files_in_parallel_ambiguous_confirmation(model: BaseChatModel) -> None: |
| LOW | libs/evals/tests/evals/test_file_operations.py | 178 | def test_ls_directory_contains_file_yes_no(model: BaseChatModel) -> None: |
| LOW | libs/evals/tests/evals/test_file_operations.py | 202 | def test_ls_directory_missing_file_yes_no(model: BaseChatModel) -> None: |
| LOW | libs/evals/tests/evals/test_file_operations.py | 225 | def test_edit_file_replace_text(model: BaseChatModel) -> None: |
| LOW | libs/evals/tests/evals/test_file_operations.py | 248 | def test_read_then_write_derived_output(model: BaseChatModel) -> None: |
| LOW | libs/evals/tests/evals/test_file_operations.py | 273 | def test_avoid_unnecessary_tool_calls(model: BaseChatModel) -> None: |
| LOW | libs/evals/tests/evals/test_file_operations.py | 291 | def test_read_files_in_parallel(model: BaseChatModel) -> None: |
| LOW | libs/evals/tests/evals/test_file_operations.py | 321 | def test_grep_finds_matching_paths(model: BaseChatModel) -> None: |
| LOW | libs/evals/tests/evals/test_file_operations.py | 349 | def test_glob_lists_markdown_files(model: BaseChatModel) -> None: |
| LOW | libs/evals/tests/evals/test_file_operations.py | 377 | def test_find_magic_phrase_deep_nesting(model: BaseChatModel) -> None: |
| LOW | libs/evals/tests/evals/test_file_operations.py | 413 | def test_identify_quote_author_from_directory_parallel_reads( |
| LOW | libs/evals/tests/evals/test_file_operations.py | 488 | def test_identify_quote_author_from_directory_unprompted_efficiency( |
| LOW | libs/evals/tests/evals/test_file_operations.py | 561 | def test_read_file_truncation_recovery_with_pagination( |
| LOW | libs/evals/tests/evals/test_file_operations.py | 599 | def test_read_file_empty_file_reports_empty(model: BaseChatModel) -> None: |
| LOW | libs/evals/tests/evals/test_followup_quality.py | 96 | def test_followup_question_quality(model: BaseChatModel, case: dict[str, Any]) -> None: |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 361 | def get_current_time_for_location(location_id: int) -> str: |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 391 | def get_food_allergic_ingredients(food_id: int) -> list[str]: |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 453 | def test_single_tool_list_user_ids(model: BaseChatModel, repl_name: str | None) -> None: |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 482 | def test_single_tool_get_user_email(model: BaseChatModel, repl_name: str | None) -> None: |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 506 | def test_single_tool_get_food_calories(model: BaseChatModel, repl_name: str | None) -> None: |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 530 | def test_two_tools_user_name_from_current_id(model: BaseChatModel, repl_name: str | None) -> None: |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 558 | def test_two_tools_city_for_user(model: BaseChatModel, repl_name: str | None) -> None: |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 590 | def test_two_tools_find_user_then_email(model: BaseChatModel, repl_name: str | None) -> None: |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 618 | def test_three_tools_current_user_city(model: BaseChatModel, repl_name: str | None) -> None: |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 652 | def test_three_tools_find_user_then_city(model: BaseChatModel, repl_name: str | None) -> None: |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 686 | def test_three_tools_current_user_weather(model: BaseChatModel, repl_name: str | None) -> None: |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 720 | def test_four_tools_current_user_favorite_food_names( |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 761 | async def test_four_tools_find_user_food_name_and_calories( |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 802 | def test_four_tools_current_user_location_time_and_weather( |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 847 | def test_five_steps_current_user_food_names_and_calories( |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 895 | def test_four_steps_find_user_city_and_weather(model: BaseChatModel, repl_name: str | None) -> None: |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 945 | def test_four_steps_find_user_food_allergies(model: BaseChatModel, repl_name: str | None) -> None: |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 998 | def test_four_steps_current_user_food_names_calories_and_allergies( |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 1063 | def test_four_steps_find_user_city_weather_time_and_food_details( |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 1134 | def test_four_steps_find_user_email_city_foods_calories_and_allergies( |
| LOW | libs/evals/tests/evals/test_memory_multiturn.py | 205 | def test_implicit_preference_remembered(model: BaseChatModel, case: dict[str, Any]) -> None: |
| LOW | libs/evals/tests/evals/test_memory_multiturn.py | 232 | def test_explicit_preference_remembered(model: BaseChatModel, case: dict[str, Any]) -> None: |
| LOW | libs/evals/tests/evals/test_memory_multiturn.py | 259 | def test_transient_info_not_persisted(model: BaseChatModel, case: dict[str, Any]) -> None: |
| LOW | libs/evals/tests/evals/test_system_prompt.py | 29 | def test_custom_system_prompt(model: BaseChatModel) -> None: |
| LOW | …bs/evals/tests/evals/test_tool_usage_incident_graph.py | 510 | def list_service_dependencies(service_id: int) -> list[int]: |
| LOW | …bs/evals/tests/evals/test_tool_usage_incident_graph.py | 530 | def get_latest_deploy_for_service(service_id: int) -> int: |
| LOW | …bs/evals/tests/evals/test_tool_usage_incident_graph.py | 749 | async def _incident_graph_tool_error_middleware( |
| LOW | …bs/evals/tests/evals/test_tool_usage_incident_graph.py | 786 | async def test_single_tool_list_incident_ids(agent, model: BaseChatModel) -> None: |
| LOW | …bs/evals/tests/evals/test_tool_usage_incident_graph.py | 807 | async def test_two_tools_current_incident_service_name(agent, model: BaseChatModel) -> None: |
| LOW | …bs/evals/tests/evals/test_tool_usage_incident_graph.py | 829 | async def test_three_tools_find_service_owner_team(agent, model: BaseChatModel) -> None: |
| 6891 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | libs/evals/tests/evals/external_benchmarks.py | 191 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/external_benchmarks.py | 193 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_followup_quality.py | 27 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_followup_quality.py | 30 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_usage_relational.py | 33 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_usage_relational.py | 35 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_usage_relational.py | 206 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_usage_relational.py | 208 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_usage_relational.py | 249 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_usage_relational.py | 251 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_usage_relational.py | 406 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_usage_relational.py | 408 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_usage_relational.py | 432 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_usage_relational.py | 434 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_memory_multiturn.py | 44 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_memory_multiturn.py | 47 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_memory_multiturn.py | 100 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_memory_multiturn.py | 102 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_memory_multiturn.py | 148 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_memory_multiturn.py | 150 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_memory_multiturn.py | 194 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_memory_multiturn.py | 196 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_external_benchmarks.py | 29 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_external_benchmarks.py | 31 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_external_benchmarks.py | 59 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_external_benchmarks.py | 61 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_selection.py | 35 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_selection.py | 37 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_selection.py | 110 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_selection.py | 112 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_selection.py | 193 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_selection.py | 195 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_selection.py | 277 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_tool_selection.py | 279 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 22 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 24 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 74 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 76 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 144 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 146 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 214 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 216 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 514 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 516 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 709 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 711 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 878 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 880 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 942 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 944 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 1075 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/utils.py | 1077 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/llm_judge.py | 118 | # ------------------------------------------------------------------ |
| MEDIUM | libs/evals/tests/evals/llm_judge.py | 120 | # ------------------------------------------------------------------ |
| MEDIUM | libs/evals/tests/evals/llm_judge.py | 197 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/llm_judge.py | 199 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_memory.py | 350 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_memory.py | 352 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_memory.py | 469 | # --------------------------------------------------------------------------- |
| MEDIUM | libs/evals/tests/evals/test_memory.py | 471 | # --------------------------------------------------------------------------- |
| 536 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | libs/evals/tests/evals/test_tool_usage_relational.py | 0 | apply tool_use category to all tests in this module. tier is set per-test. |
| HIGH | libs/evals/tests/evals/test_tool_selection.py | 0 | apply tool_use category to all tests in this module. tier is set per-test. |
| HIGH | libs/evals/tests/evals/test_todos.py | 0 | apply tool_use category to all tests in this module. tier is set per-test. |
| HIGH | libs/evals/tests/evals/test_memory_multiturn.py | 0 | apply memory category and baseline tier to all tests in this module. |
| HIGH | libs/evals/tests/evals/test_memory.py | 0 | apply memory category and baseline tier to all tests in this module. |
| HIGH | …ts/evals/memory_agent_bench/test_memory_agent_bench.py | 0 | apply memory category and baseline tier to all tests in this module. |
| HIGH | libs/evals/tests/evals/test_system_prompt.py | 0 | apply unit_test category and baseline tier to all tests in this module. |
| HIGH | libs/evals/tests/evals/test_subagents.py | 0 | apply unit_test category and baseline tier to all tests in this module. |
| HIGH | libs/evals/tests/evals/test_skills.py | 0 | apply unit_test category and baseline tier to all tests in this module. |
| HIGH | libs/code/tests/unit_tests/test_thread_selector.py | 0 | pressing esc should dismiss the modal with none result. |
| HIGH | libs/code/tests/unit_tests/test_model_selector.py | 0 | pressing esc should dismiss the modal with none result. |
| HIGH | libs/code/tests/unit_tests/test_agent_selector.py | 0 | pressing esc should dismiss the modal with none result. |
| HIGH | libs/code/tests/unit_tests/test_thread_selector.py | 0 | clicking on non-link text should not open the browser. |
| HIGH | libs/code/tests/unit_tests/test_welcome.py | 0 | clicking on non-link text should not open the browser. |
| HIGH | libs/code/tests/unit_tests/test_messages.py | 0 | clicking on non-link text should not open the browser. |
| HIGH | libs/code/tests/unit_tests/test_sessions.py | 0 | create table checkpoints ( thread_id text not null, checkpoint_ns text not null default '', checkpoint_id text not null, |
| HIGH | libs/code/tests/unit_tests/test_sessions.py | 0 | create table checkpoints ( thread_id text not null, checkpoint_ns text not null default '', checkpoint_id text not null, |
| HIGH | libs/code/tests/unit_tests/test_sessions.py | 0 | create table checkpoints ( thread_id text not null, checkpoint_ns text not null default '', checkpoint_id text not null, |
| HIGH | libs/code/tests/unit_tests/test_sessions.py | 0 | create table if not exists checkpoints ( thread_id text not null, checkpoint_ns text not null default '', checkpoint_id |
| HIGH | libs/code/tests/unit_tests/test_sessions.py | 0 | create table if not exists checkpoints ( thread_id text not null, checkpoint_ns text not null default '', checkpoint_id |
| HIGH | libs/code/tests/unit_tests/test_sessions.py | 0 | create table if not exists checkpoints ( thread_id text not null, checkpoint_ns text not null default '', checkpoint_id |
| HIGH | libs/code/tests/unit_tests/test_sessions.py | 0 | create table if not exists writes ( thread_id text not null, checkpoint_ns text not null default '', checkpoint_id text |
| HIGH | libs/code/tests/unit_tests/test_sessions.py | 0 | create table if not exists writes ( thread_id text not null, checkpoint_ns text not null default '', checkpoint_id text |
| HIGH | libs/code/tests/unit_tests/test_sessions.py | 0 | create table if not exists writes ( thread_id text not null, checkpoint_ns text not null default '', checkpoint_id text |
| HIGH | libs/code/tests/unit_tests/test_sessions.py | 0 | create table if not exists checkpoints ( thread_id text not null, checkpoint_ns text not null default '', checkpoint_id |
| HIGH | libs/code/tests/unit_tests/test_sessions.py | 0 | create table if not exists checkpoints ( thread_id text not null, checkpoint_ns text not null default '', checkpoint_id |
| HIGH | libs/code/tests/unit_tests/test_sessions.py | 0 | create table if not exists checkpoints ( thread_id text not null, checkpoint_ns text not null default '', checkpoint_id |
| HIGH | libs/code/tests/unit_tests/test_config.py | 0 | [models.providers.anthropic.profile] max_input_tokens = 4096 |
| HIGH | libs/code/tests/unit_tests/test_config.py | 0 | [models.providers.anthropic.profile] max_input_tokens = 4096 |
| HIGH | libs/code/tests/unit_tests/test_config.py | 0 | [models.providers.anthropic.profile] max_input_tokens = 4096 |
| HIGH | libs/code/tests/unit_tests/test_config.py | 0 | [models.providers.anthropic.profile] max_input_tokens = 4096 |
| HIGH | libs/code/tests/unit_tests/test_config.py | 0 | [models.providers.anthropic.profile] max_input_tokens = 4096 |
| HIGH | libs/code/tests/unit_tests/test_config.py | 0 | [models.providers.fireworks] models = ["llama-v3p1-70b"] api_key_env = "fireworks_api_key" |
| HIGH | libs/code/tests/unit_tests/test_config.py | 0 | [models.providers.fireworks] models = ["llama-v3p1-70b"] api_key_env = "fireworks_api_key" |
| HIGH | libs/code/tests/unit_tests/test_model_switch.py | 0 | [models.providers.fireworks] models = ["llama-v3p1-70b"] api_key_env = "fireworks_api_key" |
| HIGH | libs/code/tests/unit_tests/test_model_switch.py | 0 | [models.providers.fireworks] models = ["llama-v3p1-70b"] api_key_env = "fireworks_api_key" |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.fireworks] models = ["llama-v3p1-70b"] api_key_env = "fireworks_api_key" |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.fireworks] models = ["llama-v3p1-70b"] api_key_env = "fireworks_api_key" |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.fireworks] models = ["llama-v3p1-70b"] api_key_env = "fireworks_api_key" |
| HIGH | libs/code/tests/unit_tests/test_config.py | 0 | [models.providers.ollama] models = ["qwen3:4b"] [models.providers.ollama.params] temperature = 0 [models.providers.ollam |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.ollama] models = ["qwen3:4b"] [models.providers.ollama.params] temperature = 0 [models.providers.ollam |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.ollama] models = ["qwen3:4b"] [models.providers.ollama.params] temperature = 0 [models.providers.ollam |
| HIGH | libs/code/tests/unit_tests/test_update_check.py | 0 | override update_state_file to use a temporary directory. |
| HIGH | libs/code/tests/unit_tests/test_update_check.py | 0 | override update_state_file to use a temporary directory. |
| HIGH | libs/code/tests/unit_tests/test_update_check.py | 0 | override update_state_file to use a temporary directory. |
| HIGH | libs/code/tests/unit_tests/test_update_check.py | 0 | override update_state_file to use a temporary directory. |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.anthropic] models = ["claude-sonnet-4-5"] |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.anthropic] models = ["claude-sonnet-4-5"] |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.anthropic] models = ["claude-sonnet-4-5"] |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.anthropic] models = ["claude-sonnet-4-5"] |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.anthropic] models = ["claude-sonnet-4-5"] |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.anthropic] models = ["claude-sonnet-4-5"] |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.anthropic] models = ["claude-sonnet-4-5"] api_key_env = "anthropic_api_key" |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.anthropic] models = ["claude-sonnet-4-5"] api_key_env = "anthropic_api_key" |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.anthropic] models = ["claude-sonnet-4-5"] api_key_env = "anthropic_api_key" |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.anthropic] models = ["claude-sonnet-4-5"] api_key_env = "anthropic_api_key" |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.baseten] class_path = "langchain_baseten.chat_models:chatbaseten" api_key_env = "baseten_api_key" |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.baseten] class_path = "langchain_baseten.chat_models:chatbaseten" api_key_env = "baseten_api_key" |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.baseten] class_path = "langchain_baseten.chat_models:chatbaseten" api_key_env = "baseten_api_key" |
| HIGH | libs/code/tests/unit_tests/test_model_config.py | 0 | [models.providers.baseten] class_path = "langchain_baseten.chat_models:chatbaseten" api_key_env = "baseten_api_key" |
| 53 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | AGENTS.md | 205 | Send an email to a recipient with specified priority. Any additional context about the function can go here. A |
| HIGH | libs/evals/deepagents_evals/radar.py | 459 | Load model results from an `evals_summary.json` file. The summary file is a JSON array of objects. Each object must |
| HIGH | libs/evals/tests/evals/utils.py | 83 | Return ``True`` when the assertion holds. Args: trajectory: The agent trajectory to check. |
| HIGH | libs/evals/tests/evals/utils.py | 97 | Return a human-readable explanation of why the check failed. Args: trajectory: The agent trajectory |
| HIGH | libs/evals/tests/evals/utils.py | 116 | Return ``True`` when the assertion holds. Args: trajectory: The agent trajectory to check. |
| HIGH | libs/evals/tests/evals/utils.py | 130 | Return a human-readable explanation of why the check failed. Args: trajectory: The agent trajectory |
| HIGH | libs/evals/tests/evals/utils.py | 177 | Coerce the ``files`` value from an agent result into ``dict[str, str]``. Args: raw_files: The raw files obj |
| HIGH | libs/evals/tests/evals/utils.py | 948 | Build an ``AgentTrajectory`` from a raw agent invoke result. Args: result: The mapping returned by ``agent. |
| HIGH | libs/evals/tests/evals/utils.py | 1142 | Run agent eval against the given query. Args: agent: The compiled state graph to invoke. query: A s |
| HIGH | libs/evals/tests/evals/utils.py | 1193 | Run agent eval asynchronously against the given query. Args: agent: The compiled state graph to invoke. |
| HIGH | libs/evals/tests/evals/llm_judge.py | 207 | Create an `LLMJudge` success assertion. Wraps `openevals.llm.create_llm_as_judge` to evaluate each criterion in |
| HIGH | libs/evals/tests/evals/tau2_airline/domain.py | 224 | Load a single task by ID from tasks.json. Args: task_id: The string task ID (e.g. "2", "14"). Returns: |
| HIGH | libs/evals/tests/evals/memory_agent_bench/data_utils.py | 55 | Load MemoryAgentBench data from HuggingFace. Args: split: Dataset split name (e.g. `Conflict_Resolution`). |
| HIGH | libs/evals/deepagents_harbor/langsmith.py | 383 | Get a LangSmith dataset by name. Args: dataset_name: Name of the dataset to retrieve. session: aioh |
| HIGH | libs/evals/deepagents_harbor/langsmith.py | 418 | Create a LangSmith experiment session for the given dataset. Args: dataset_name: Name of the LangSmith data |
| HIGH | libs/evals/deepagents_harbor/langsmith.py | 510 | Extract reward from trial's `result.json`. Falls back to `0.0` when the verifier did not produce a usable reward |
| HIGH | libs/evals/scripts/generate_radar.py | 35 | Load per-category results from a JSON file. Expected format: [ { "model": "ant |
| HIGH | libs/evals/scripts/composite_radar.py | 47 | Download `evals-summary` from a single GHA run via `gh run download`. Args: run_id: GitHub Actions run ID. |
| HIGH | libs/evals/scripts/composite_radar.py | 76 | Flatten a list of per-run summary arrays into a single array. Args: summaries: Paths to per-run `evals_summ |
| HIGH | libs/evals/scripts/run_trials.py | 174 | Aggregate per-trial eval reports into a single summary dict. Args: reports: Per-trial report dicts as writt |
| HIGH | libs/code/deepagents_code/sessions.py | 320 | List threads from checkpoints table. Args: agent_name: Optional filter by agent name. limit: Maximu |
| HIGH | libs/code/deepagents_code/textual_adapter.py | 384 | Execute a task with output directed to Textual UI. This is the Textual-compatible version of execute_task() that us |
| HIGH | libs/code/deepagents_code/config.py | 740 | Parse shell allow-list from string. Args: allow_list_str: Comma-separated list of commands, `'recommended'` |
| HIGH | libs/code/deepagents_code/config.py | 856 | Coerce a raw `interpreter_ptc` value into the canonical shape. Args: raw: Value loaded from TOML or supplie |
| HIGH | libs/code/deepagents_code/config.py | 1475 | Get the global agent directory path. Args: agent_name: Name of the agent Returns: |
| HIGH | libs/code/deepagents_code/config.py | 1495 | Ensure the global agent directory exists and return its path. Args: agent_name: Name of the agent |
| HIGH | libs/code/deepagents_code/config.py | 1939 | Fetch the LangSmith project URL, raising on any failure. Successful results are cached at module level so repeated |
| HIGH | libs/code/deepagents_code/config.py | 2340 | Import and instantiate a custom `BaseChatModel` class. Args: class_path: Fully-qualified class in `module.p |
| HIGH | libs/code/deepagents_code/config.py | 2402 | Create a model using langchain's `init_chat_model`. Args: model_name: Model identifier. provider: P |
| HIGH | libs/code/deepagents_code/config.py | 2593 | Create a chat model. Uses `init_chat_model` for standard providers, or imports a custom `BaseChatModel` subclas |
| HIGH | libs/code/deepagents_code/model_config.py | 341 | Parse a model specification string. Args: spec: Model specification in `'provider:model'` format. |
| HIGH | libs/code/deepagents_code/model_config.py | 631 | Load `_PROFILES` from a provider's data module. Results are cached by `module_path` so repeated calls (e.g., from b |
| HIGH | libs/code/deepagents_code/model_config.py | 2573 | Save the sort order preference for the thread selector. Args: sort_order: `"updated_at"` or `"created_at"`. |
| HIGH | libs/code/deepagents_code/update_check.py | 924 | Return the shell command that adds a package to the dcode tool env. Args: package: Package name to install |
| HIGH | libs/code/deepagents_code/update_check.py | 946 | Return the shell command that adds `extra` to the installed dcode tool. The documented install path is `uv tool ins |
| HIGH | libs/code/deepagents_code/mcp_disabled.py | 42 | Read the TOML config file. Args: config_path: Path to the TOML config file. Returns: Parsed TO |
| HIGH | libs/code/deepagents_code/ui.py | 24 | Argparse type for integer arguments that must be >= 1. Args: value: Raw argument string to parse. Retu |
| HIGH | libs/code/deepagents_code/mcp_auth.py | 129 | Resolve `${VAR}` env-var references in header values. Args: headers: Raw header mapping from MCP config. |
| HIGH | libs/code/deepagents_code/mcp_auth.py | 153 | Expand `${VAR}` references in `s` against the current environment. Args: s: Raw header value. heade |
| HIGH | libs/code/deepagents_code/mcp_auth.py | 807 | Parse a provider callback URL into `(code, state)`. Args: url: Raw callback URL pasted by the user. Re |
| HIGH | libs/code/deepagents_code/mcp_auth.py | 1082 | Run OAuth 2.0 Device Authorization Grant and return the token. Args: device_code_url: Provider endpoint tha |
| HIGH | libs/code/deepagents_code/tools.py | 73 | Reject URLs that target private/internal/metadata addresses. Resolves the URL's hostname and rejects any URL whose |
| HIGH | libs/code/deepagents_code/tools.py | 346 | Fetch `url`, re-validating each redirect hop against the SSRF guard. Each hop is validated by `_validate_url` and i |
| HIGH | libs/code/deepagents_code/server_graph.py | 70 | Assemble the tool list based on server config. Loads built-in tools (conditionally including web search when Tavily |
| HIGH | libs/code/deepagents_code/tool_display.py | 132 | Format tool calls for display with tool-specific smart formatting. Shows the most relevant information for each too |
| HIGH | libs/code/deepagents_code/event_bus.py | 360 | Decode one newline-delimited JSON external event. Args: data: Raw JSON line. source: Transport-spec |
| HIGH | libs/code/deepagents_code/server_manager.py | 294 | Start a LangGraph server and return a connected remote agent client. Args: assistant_id: Agent identifier. |
| HIGH | libs/code/deepagents_code/agent.py | 219 | Resolve the configured PTC allowlist to a concrete list of tool names. Args: ptc: Raw `interpreter_ptc` val |
| HIGH | libs/code/deepagents_code/agent.py | 1049 | Create a CLI-configured agent with flexible options. This is the main entry point for creating a Deep Agents Code a |
| HIGH | libs/code/deepagents_code/app.py | 697 | Extract `--model-params` and its JSON value from a `/model` arg string. Handles quoted (`'...'` / `"..."`) and bare |
| HIGH | libs/code/deepagents_code/app.py | 4269 | Strip `prefix` from `value`, logging if a wrong prefix was supplied. Three submission paths feed `_process_mess |
| HIGH | libs/code/deepagents_code/remote_client.py | 31 | Extract and validate that `thread_id` is present in config. Args: config: Config dict with `configurable.th |
| HIGH | libs/code/deepagents_code/remote_client.py | 142 | Stream agent execution, yielding tuples matching Pregel's format. Delegates to `RemoteGraph.astream` (which han |
| HIGH | libs/code/deepagents_code/remote_client.py | 220 | Get the current state of a thread. Returns `None` when the thread does not exist on the server (404) or |
| HIGH | libs/code/deepagents_code/_server_config.py | 50 | Read a JSON-encoded `DEEPAGENTS_CODE_SERVER_*` variable. Args: suffix: Variable name suffix after the `DEEP |
| HIGH | libs/code/deepagents_code/_server_config.py | 427 | Resolve a possibly-relative path to absolute. The server subprocess runs in a different working directory, so relat |
| HIGH | libs/code/deepagents_code/mcp_tools.py | 387 | Create and initialize a new cached session entry. Args: server_name: MCP server name. Retu |
| HIGH | libs/code/deepagents_code/mcp_tools.py | 607 | Load and validate MCP configuration from a JSON file. Supports multiple server types: - stdio: Process-based s |
| HIGH | libs/code/deepagents_code/mcp_tools.py | 936 | Enumerate MCP tools from `session`, paginating until exhausted. Args: session: Initialized MCP client sessi |
| HIGH | libs/code/deepagents_code/mcp_tools.py | 1246 | Build MCP connections from a validated config and load tools. Discovery always opens throwaway sessions to capture |
| 44 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | libs/evals/deepagents_harbor/backend.py | 170 | NR > offset && NR <= offset + limit {{ print }} |
| HIGH | libs/evals/deepagents_harbor/backend.py | 448 | cd {safe_path} 2>/dev/null || exit 1 |
| HIGH | libs/evals/deepagents_harbor/metadata.py | 108 | "nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 0", timeout=10 |
| HIGH | libs/evals/deepagents_harbor/metadata.py | 150 | os_result = await backend.aexecute("uname -s -r 2>/dev/null || echo unknown", timeout=10) |
| HIGH | libs/evals/deepagents_harbor/langsmith_environment.py | 384 | "[ -d /app ] && echo /app || echo /", |
| HIGH | libs/code/tests/unit_tests/test_shell_allow_list.py | 159 | ("ls && cat file", True), |
| HIGH | libs/code/tests/unit_tests/test_shell_allow_list.py | 160 | ("ls && rm file", False), |
| HIGH | libs/code/tests/unit_tests/test_shell_allow_list.py | 161 | ("ls -la && grep pattern file && cat output.txt", True), |
| HIGH | libs/code/tests/unit_tests/test_shell_allow_list.py | 162 | ("ls && cat file && grep test", True), |
| HIGH | libs/code/tests/unit_tests/test_shell_allow_list.py | 163 | ("cat a.txt && cat b.txt && cat c.txt", True), |
| HIGH | libs/code/tests/unit_tests/test_shell_allow_list.py | 164 | ("ls && rm -rf /", False), |
| HIGH | libs/code/tests/unit_tests/test_shell_allow_list.py | 177 | ("ls || cat file", True), |
| HIGH | libs/code/tests/unit_tests/test_shell_allow_list.py | 178 | ("ls || rm file", False), |
| HIGH | libs/code/tests/unit_tests/test_shell_allow_list.py | 549 | ("ls && cat file", True), |
| HIGH | libs/code/tests/unit_tests/test_shell_allow_list.py | 550 | ("ls && grep test file", True), |
| HIGH | libs/code/tests/unit_tests/test_shell_allow_list.py | 639 | assert not contains_dangerous_patterns("ls && cat file") |
| HIGH | libs/code/tests/unit_tests/test_shell_allow_list.py | 170 | """Test commands with && operator (commonly used by Claude for chaining).""" |
| HIGH | libs/code/tests/unit_tests/test_shell_allow_list.py | 184 | """Test commands with || operator.""" |
| HIGH | libs/code/tests/unit_tests/test_shell_allow_list.py | 638 | """Double && should not be flagged as dangerous (it's a safe operator).""" |
| HIGH | …ode/tests/integration_tests/test_sandbox_operations.py | 41 | "rm -rf /tmp/test_sandbox_ops && mkdir -p /tmp/test_sandbox_ops" |
| HIGH | libs/code/deepagents_code/local_context.py | 233 | $IN_GIT && ROOT="$(git rev-parse --show-toplevel 2>/dev/null)" |
| HIGH | libs/code/deepagents_code/local_context.py | 233 | $IN_GIT && ROOT="$(git rev-parse --show-toplevel 2>/dev/null)" |
| HIGH | libs/code/deepagents_code/local_context.py | 247 | $MONOREPO && echo "- Monorepo: yes" |
| HIGH | libs/code/deepagents_code/local_context.py | 265 | if grep -q '\[tool\.uv\]' pyproject.toml 2>/dev/null; then PKG="Python: uv" |
| HIGH | libs/code/deepagents_code/local_context.py | 266 | elif grep -q '\[tool\.poetry\]' pyproject.toml 2>/dev/null; then PKG="Python: poetry" |
| HIGH | libs/code/deepagents_code/local_context.py | 311 | BRANCH="$(git rev-parse --abbrev-ref HEAD 2>/dev/null)" |
| HIGH | libs/code/deepagents_code/local_context.py | 313 | COMMIT="$(git rev-parse --short HEAD 2>/dev/null)" |
| HIGH | libs/code/deepagents_code/local_context.py | 348 | if [ -f Makefile ] && grep -qE '^tests?:' Makefile 2>/dev/null; then TC="make test" |
| HIGH | libs/code/deepagents_code/local_context.py | 355 | && grep -q '"test"' package.json 2>/dev/null; then |
| HIGH | libs/code/deepagents_code/local_context.py | 373 | { ls -1 2>/dev/null; [ -e .deepagents ] && echo .deepagents; } | |
| HIGH | libs/code/deepagents_code/local_context.py | 422 | $TREE_TRUNCATED && echo "... (more lines truncated)" |
| HIGH | …s/partners/quickjs/tests/unit_tests/test_end_to_end.py | 241 | code = "const r = await tools.getUserEmailOrNone({user_id: -1});\n`${r === null}`;" |
| HIGH | libs/partners/quickjs/tests/benchmarks/_common.py | 24 | " values.push(await tools.echoPayload({value: `value-${i}`}));" |
| HIGH | libs/partners/quickjs/tests/benchmarks/_common.py | 33 | " values.push(value);" |
| HIGH | libs/partners/quickjs/langchain_quickjs/middleware.py | 232 | # eval runs on another (and sees `tools` undefined). |
| HIGH | …epagents/tests/unit_tests/test_subagent_transformer.py | 130 | mux.push( |
| HIGH | …epagents/tests/unit_tests/test_subagent_transformer.py | 138 | mux.push(_child_tasks_start([*parent_ns, f"tools:{parent_task_id}"])) |
| HIGH | …epagents/tests/unit_tests/test_subagent_transformer.py | 241 | mux.push(_parent_tasks_result([], parent_task_id="x")) |
| HIGH | …epagents/tests/unit_tests/test_subagent_transformer.py | 255 | mux.push(_parent_tasks_result([], parent_task_id="x", error="boom")) |
| HIGH | …epagents/tests/unit_tests/test_subagent_transformer.py | 274 | mux.push(_values({"k": 1}, namespace=["tools:x"])) |
| HIGH | …epagents/tests/unit_tests/test_subagent_transformer.py | 275 | mux.push(_values({"k": 2}, namespace=["tools:x"])) |
| HIGH | …epagents/tests/unit_tests/test_subagent_transformer.py | 292 | mux.push(_values({"k": "root"}, namespace=[])) |
| HIGH | …ents/tests/unit_tests/test_local_sandbox_operations.py | 254 | sandbox.execute("rm -rf /tmp/test_sandbox_ops && mkdir -p /tmp/test_sandbox_ops") |
| HIGH | libs/deepagents/deepagents/_subagent_transformer.py | 203 | self._log.push(handle) |
| HIGH | libs/deepagents/deepagents/_subagent_transformer.py | 245 | handle._mux.push(event) |
| HIGH | libs/deepagents/deepagents/middleware/subagents.py | 359 | if (n <= 1) return false |
| HIGH | libs/deepagents/deepagents/middleware/subagents.py | 361 | if (n % i === 0) return false |
| HIGH | libs/deepagents/deepagents/middleware/subagents.py | 363 | return true |
| HIGH | libs/deepagents/deepagents/middleware/filesystem.py | 427 | - Use '&&' when commands depend on each other (e.g., "mkdir dir && cd dir") |
| HIGH | libs/deepagents/deepagents/middleware/filesystem.py | 435 | - execute(command="npm install && npm test") |
| HIGH | libs/deepagents/deepagents/middleware/filesystem.py | 439 | - execute(command="cd /foo/bar && pytest tests") # Use absolute path instead |
| HIGH | libs/deepagents/deepagents/backends/sandbox.py | 793 | cmd = f"grep {grep_opts} {glob_pattern} -e {pattern_escaped} {search_path} 2>/dev/null || true" |
| HIGH | libs/acp/tests/test_dangerous_patterns.py | 65 | assert not contains_dangerous_patterns("cd dir && ls") |
| HIGH | libs/acp/tests/test_dangerous_patterns.py | 66 | assert not contains_dangerous_patterns("make && make test") |
| HIGH | libs/acp/tests/test_dangerous_patterns.py | 76 | assert extract_command_types("test -f foo || echo missing") == ["test", "echo"] |
| HIGH | libs/acp/tests/test_dangerous_patterns.py | 79 | result = extract_command_types("cd dir && ls ; echo done || cat file") |
| HIGH | libs/acp/tests/test_dangerous_patterns.py | 79 | result = extract_command_types("cd dir && ls ; echo done || cat file") |
| HIGH | libs/acp/tests/test_dangerous_patterns.py | 84 | assert extract_command_types("cd /path && npm install") == ["cd", "npm install"] |
| HIGH | libs/acp/tests/test_dangerous_patterns.py | 88 | cmd = "cd /Users/test/project && python -m pytest tests/test_agent.py -v" |
| HIGH | libs/acp/tests/test_dangerous_patterns.py | 83 | """Ensure existing && and | splitting still works.""" |
| 24 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | libs/evals/deepagents_evals/radar.py | 8 | |
| LOW | libs/evals/deepagents_evals/radar.py | 25 | |
| LOW | libs/evals/deepagents_evals/cli.py | 30 | |
| LOW | libs/evals/deepagents_evals/trial_summary.py | 3 | |
| LOW | libs/evals/tests/evals/conftest.py | 1 | |
| LOW | libs/evals/tests/evals/pytest_reporter.py | 1 | |
| LOW | libs/evals/tests/evals/external_benchmarks.py | 1 | |
| LOW | libs/evals/tests/evals/test_file_operations.py | 10 | |
| LOW | libs/evals/tests/evals/test_followup_quality.py | 11 | |
| LOW | libs/evals/tests/evals/test_tool_usage_relational.py | 8 | |
| LOW | libs/evals/tests/evals/test_memory_multiturn.py | 13 | |
| LOW | libs/evals/tests/evals/test_external_benchmarks.py | 11 | |
| LOW | libs/evals/tests/evals/test_system_prompt.py | 8 | |
| LOW | …bs/evals/tests/evals/test_tool_usage_incident_graph.py | 8 | |
| LOW | libs/evals/tests/evals/test_subagents.py | 9 | |
| LOW | libs/evals/tests/evals/test_tool_selection.py | 11 | |
| LOW | libs/evals/tests/evals/utils.py | 1 | |
| LOW | libs/evals/tests/evals/llm_judge.py | 12 | |
| LOW | …/tests/evals/test_iterative_constraint_satisfaction.py | 19 | |
| LOW | libs/evals/tests/evals/test_todos.py | 9 | |
| LOW | libs/evals/tests/evals/test_skills.py | 10 | |
| LOW | libs/evals/tests/evals/test_memory.py | 11 | |
| LOW | …bs/evals/tests/evals/test_langchain_middleware_todo.py | 23 | |
| LOW | libs/evals/tests/evals/tau2_airline/runner.py | 13 | |
| LOW | libs/evals/tests/evals/tau2_airline/domain.py | 12 | |
| LOW | libs/evals/tests/evals/tau2_airline/evaluation.py | 18 | |
| LOW | libs/evals/tests/evals/tau2_airline/user_sim.py | 13 | |
| LOW | …bs/evals/tests/evals/tau2_airline/test_tau2_airline.py | 17 | |
| LOW | libs/evals/tests/evals/memory_agent_bench/configs.py | 9 | |
| LOW | libs/evals/tests/evals/memory_agent_bench/data_utils.py | 9 | |
| LOW | libs/evals/tests/evals/memory_agent_bench/eval_utils.py | 10 | |
| LOW | …ts/evals/memory_agent_bench/test_memory_agent_bench.py | 15 | |
| LOW | libs/evals/tests/evals/data/bfcl_apis/trading_bot.py | 3 | |
| LOW | libs/evals/tests/unit_tests/test_category_tagging.py | 1 | |
| LOW | …ls/tests/unit_tests/test_external_benchmark_helpers.py | 1 | |
| LOW | libs/evals/tests/unit_tests/test_run_trials.py | 3 | |
| LOW | libs/evals/tests/unit_tests/test_infra.py | 3 | |
| LOW | …evals/tests/unit_tests/test_conftest_model_required.py | 8 | |
| LOW | …s/evals/tests/unit_tests/test_generate_radar_script.py | 1 | |
| LOW | libs/evals/tests/unit_tests/test_deepagents_wrapper.py | 3 | |
| LOW | libs/evals/tests/unit_tests/test_harbor_backend.py | 3 | |
| LOW | …s/evals/tests/unit_tests/test_analyze_eval_failures.py | 7 | |
| LOW | libs/evals/tests/unit_tests/test_model_groups.py | 3 | |
| LOW | …s/evals/tests/unit_tests/test_langsmith_environment.py | 3 | |
| LOW | libs/evals/tests/unit_tests/test_trajectory.py | 7 | |
| LOW | libs/evals/tests/unit_tests/test_pytest_reporter.py | 3 | |
| LOW | libs/evals/tests/unit_tests/test_radar.py | 1 | |
| LOW | libs/evals/tests/unit_tests/test_cli.py | 3 | |
| LOW | libs/evals/tests/unit_tests/test_langsmith.py | 3 | |
| LOW | libs/evals/tests/unit_tests/test_trial_summary.py | 3 | |
| LOW | libs/evals/tests/unit_tests/test_eval_catalog.py | 3 | |
| LOW | libs/evals/deepagents_harbor/metadata.py | 7 | |
| LOW | libs/evals/deepagents_harbor/__init__.py | 3 | |
| LOW | libs/evals/deepagents_harbor/__init__.py | 4 | |
| LOW | libs/evals/deepagents_harbor/__init__.py | 5 | |
| LOW | libs/evals/deepagents_harbor/__init__.py | 6 | |
| LOW | libs/evals/deepagents_harbor/__init__.py | 6 | |
| LOW | libs/evals/deepagents_harbor/__init__.py | 6 | |
| LOW | libs/evals/deepagents_harbor/__init__.py | 6 | |
| LOW | libs/evals/deepagents_harbor/__init__.py | 6 | |
| 474 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | libs/evals/tests/evals/data/bfcl_apis/trading_bot.py | 200 | # Define the start and end dates for the range |
| MEDIUM | libs/evals/scripts/analyze.py | 628 | # Create the deep agent for trajectory analysis |
| MEDIUM | libs/evals/scripts/analyze.py | 654 | # Create the user message with the trajectory and explicit status |
| MEDIUM | libs/code/tests/unit_tests/test_media_utils.py | 459 | # Create a file that reports > 20 MB via stat |
| MEDIUM | libs/code/tests/unit_tests/test_media_utils.py | 155 | # Create a small PNG in memory |
| MEDIUM | libs/code/tests/unit_tests/test_media_utils.py | 248 | # Create a small valid PNG |
| MEDIUM | libs/code/tests/unit_tests/test_media_utils.py | 419 | # Create a minimal valid MP4 file (ftyp box) |
| MEDIUM | libs/code/tests/unit_tests/test_agent.py | 837 | # Create the default agent directory with AGENTS.md |
| MEDIUM | libs/code/tests/unit_tests/test_agent.py | 842 | # Create a non-default agent |
| MEDIUM | libs/code/tests/unit_tests/test_end_to_end.py | 122 | # Create a fake model that returns predefined messages |
| MEDIUM | libs/code/tests/unit_tests/test_end_to_end.py | 144 | # Create a CLI agent with the fake model |
| MEDIUM | libs/code/tests/unit_tests/test_end_to_end.py | 183 | # Create a CLI agent with the fake model |
| MEDIUM | libs/code/tests/unit_tests/test_end_to_end.py | 244 | # Create a fake model that calls sample_tool |
| MEDIUM | libs/code/tests/unit_tests/test_end_to_end.py | 266 | # Create a CLI agent with the fake model and sample_tool |
| MEDIUM | libs/code/tests/unit_tests/test_end_to_end.py | 297 | # Create a test file to list |
| MEDIUM | libs/code/tests/unit_tests/test_end_to_end.py | 301 | # Create a fake model that uses filesystem tools |
| MEDIUM | libs/code/tests/unit_tests/test_end_to_end.py | 323 | # Create a CLI agent with the fake model |
| MEDIUM | libs/code/tests/unit_tests/test_end_to_end.py | 351 | # Create a fake model that makes multiple tool calls |
| MEDIUM | libs/code/tests/unit_tests/test_end_to_end.py | 384 | # Create a CLI agent with the fake model and sample_tool |
| MEDIUM | libs/code/tests/unit_tests/test_end_to_end.py | 417 | # Create a simple fake model |
| MEDIUM | libs/code/tests/unit_tests/test_end_to_end.py | 426 | # Create a CLI agent |
| MEDIUM | libs/code/tests/unit_tests/test_message_store.py | 590 | # Create a tool message with various states |
| MEDIUM | libs/code/tests/unit_tests/test_config.py | 47 | # Create a mock project structure |
| MEDIUM | libs/code/tests/unit_tests/test_config.py | 53 | # Create a subdirectory to search from |
| MEDIUM | libs/code/tests/unit_tests/test_config.py | 599 | # Create a mock project with .git |
| MEDIUM | libs/code/tests/unit_tests/test_config.py | 610 | # Create a directory without .git |
| MEDIUM | libs/code/tests/unit_tests/skills/test_load.py | 686 | # Create a healthy user skills directory |
| MEDIUM | …ode/tests/integration_tests/test_sandbox_operations.py | 287 | # Create a line with 3000 characters |
| MEDIUM | …s/code/tests/integration_tests/test_sandbox_factory.py | 215 | # Create a directory |
| MEDIUM | …s/code/tests/integration_tests/test_sandbox_factory.py | 285 | # Create a directory |
| MEDIUM | libs/code/deepagents_code/media_utils.py | 340 | # Create a temp file for the image |
| MEDIUM | libs/code/deepagents_code/agent.py | 1446 | # Create the agent |
| MEDIUM | libs/code/deepagents_code/app.py | 6273 | # Create the stats object up-front and store on the app so |
| MEDIUM | libs/deepagents/tests/unit_tests/test_middleware.py | 1857 | # Create a list that exceeds the token limit (20000 tokens * 4 chars = 80000 chars) |
| MEDIUM | libs/deepagents/tests/unit_tests/test_middleware.py | 1891 | # Create a large tool result |
| MEDIUM | libs/deepagents/tests/unit_tests/test_middleware.py | 1899 | # Create a request for a tool in TOOLS_EXCLUDED_FROM_EVICTION |
| MEDIUM | libs/deepagents/tests/unit_tests/test_middleware.py | 1921 | # Create a large tool result |
| MEDIUM | libs/deepagents/tests/unit_tests/test_middleware.py | 1929 | # Create a request for a tool NOT in TOOLS_EXCLUDED_FROM_EVICTION |
| MEDIUM | libs/deepagents/tests/unit_tests/test_middleware.py | 1955 | # Create a ToolMessage with the large execute output |
| MEDIUM | libs/deepagents/tests/unit_tests/test_middleware.py | 1962 | # Create a request for the execute tool |
| MEDIUM | libs/deepagents/tests/unit_tests/test_middleware.py | 439 | # Create a large number of files that will exceed TOOL_RESULT_TOKEN_LIMIT |
| MEDIUM | …/deepagents/tests/unit_tests/test_file_system_tools.py | 55 | # Create a deep agent with the fake model and a memory saver |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 880 | # Create a file with content that exceeds the truncation threshold |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 889 | # Create a fake model that calls read_file |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 938 | # Create a small file that doesn't exceed the truncation threshold |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 946 | # Create a fake model that calls read_file |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 992 | # Create a large file with many lines (each line is 500 chars + newline) |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 1002 | # Create a fake model that calls read_file with a non-zero offset |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 1048 | # Create a large file |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 1056 | # Create a fake model that calls read_file |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 130 | # Create a fake model that returns predefined messages |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 152 | # Create a deep agent with the fake model |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 262 | # Create a fake model that calls sample_tool |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 284 | # Create a deep agent with the fake model and sample_tool |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 306 | # Create a fake model that uses filesystem tools |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 328 | # Create a deep agent with the fake model |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 347 | # Create a fake model that makes multiple tool calls |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 380 | # Create a deep agent with the fake model and sample_tool |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 492 | # Create a fake model that calls read_file |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 589 | # Create a model that handles both turns |
| 60 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | libs/evals/deepagents_evals/cli.py | 210 | except Exception as exc: # noqa: BLE001 |
| LOW | libs/evals/deepagents_evals/cli.py | 229 | except Exception as exc: # noqa: BLE001 |
| LOW | libs/evals/deepagents_evals/cli.py | 298 | except Exception as exc: # noqa: BLE001 |
| LOW | libs/evals/tests/evals/pytest_reporter.py | 116 | except Exception: # noqa: BLE001 |
| LOW | libs/evals/tests/evals/pytest_reporter.py | 158 | except Exception as exc: # noqa: BLE001 |
| LOW | libs/evals/tests/evals/pytest_reporter.py | 171 | except Exception as exc: # noqa: BLE001 |
| LOW | libs/evals/tests/evals/pytest_reporter.py | 202 | except Exception as exc: # noqa: BLE001 |
| LOW | libs/evals/tests/evals/pytest_reporter.py | 287 | except Exception as exc: # noqa: BLE001 |
| LOW | libs/evals/tests/evals/pytest_reporter.py | 333 | except Exception as exc: # noqa: BLE001 # private API; best-effort |
| LOW | libs/evals/tests/evals/external_benchmarks.py | 338 | except Exception: |
| LOW | libs/evals/tests/evals/llm_judge.py | 166 | except Exception as exc: |
| LOW | libs/evals/tests/evals/llm_judge.py | 188 | except Exception as exc: # noqa: BLE001 |
| LOW | libs/evals/tests/evals/memory_agent_bench/data_utils.py | 182 | except Exception as exc: # noqa: BLE001 |
| LOW | libs/evals/deepagents_harbor/backend.py | 234 | except Exception as exc: |
| LOW | libs/evals/deepagents_harbor/backend.py | 276 | except Exception as exc: |
| LOW | libs/evals/deepagents_harbor/backend.py | 307 | except Exception as exc: |
| LOW | libs/evals/deepagents_harbor/backend.py | 515 | except Exception as exc: |
| LOW | libs/evals/deepagents_harbor/backend.py | 542 | except Exception as exc: |
| LOW | libs/evals/deepagents_harbor/metadata.py | 115 | except Exception: # noqa: BLE001 # best-effort metadata collection |
| LOW | libs/evals/deepagents_harbor/metadata.py | 131 | except Exception: # noqa: BLE001 # best-effort metadata collection |
| LOW | libs/evals/deepagents_harbor/metadata.py | 146 | except Exception: # noqa: BLE001 # best-effort metadata collection |
| LOW | libs/evals/deepagents_harbor/metadata.py | 152 | except Exception: # noqa: BLE001 # best-effort metadata collection |
| LOW | libs/evals/deepagents_harbor/langsmith_environment.py | 347 | except Exception as create_err: |
| LOW | libs/evals/deepagents_harbor/langsmith_environment.py | 375 | except Exception: # noqa: BLE001 |
| LOW | libs/evals/deepagents_harbor/langsmith_environment.py | 404 | except Exception: # noqa: BLE001 |
| LOW | libs/evals/deepagents_harbor/langsmith_environment.py | 550 | except Exception: # noqa: BLE001 |
| LOW | libs/evals/deepagents_harbor/deepagents_wrapper.py | 181 | except Exception: # noqa: BLE001 # gracefully degrade when LangSmith is unavailable |
| LOW | libs/evals/deepagents_harbor/deepagents_wrapper.py | 265 | except Exception: # noqa: BLE001 # metadata is supplementary; never abort a trial |
| LOW | libs/evals/scripts/generate_radar.py | 242 | except Exception as exc: # noqa: BLE001 # top-level script should surface chart backend failures cleanly |
| LOW | libs/evals/scripts/generate_radar.py | 260 | except Exception as exc: # noqa: BLE001 # top-level script should surface chart backend failures cleanly |
| MEDIUM | libs/evals/scripts/harbor_langsmith.py | 155 | print(f"Error: --metadata must be valid JSON: {exc}", file=sys.stderr) |
| MEDIUM | libs/evals/scripts/harbor_langsmith.py | 158 | print("Error: --metadata must be a JSON object.", file=sys.stderr) |
| MEDIUM | libs/evals/scripts/harbor_langsmith.py | 170 | print(f"Error: {exc}", file=sys.stderr) |
| MEDIUM | libs/evals/scripts/harbor_langsmith.py | 173 | print(f"Error: failed to create experiment: {exc}", file=sys.stderr) |
| LOW | libs/evals/scripts/harbor_langsmith.py | 175 | except Exception as exc: # noqa: BLE001 # unexpected; distinct exit code |
| MEDIUM | libs/evals/scripts/harbor_langsmith.py | 176 | print(f"Error: unexpected failure creating experiment: {exc!r}", file=sys.stderr) |
| MEDIUM | libs/evals/scripts/harbor_langsmith.py | 183 | print(f"Error: Job folder does not exist: {args.job_folder}") |
| LOW | libs/evals/scripts/analyze.py | 144 | except Exception: |
| LOW | libs/evals/scripts/analyze.py | 158 | except Exception: |
| LOW | libs/evals/scripts/analyze.py | 179 | except Exception: |
| LOW | libs/evals/scripts/analyze.py | 209 | except Exception: |
| LOW | libs/evals/scripts/analyze.py | 228 | except Exception: |
| LOW | libs/evals/scripts/analyze.py | 291 | except Exception: |
| MEDIUM | libs/evals/scripts/analyze.py | 371 | print(f"Error: Directory {jobs_dir} does not exist") |
| LOW | libs/evals/scripts/analyze.py | 510 | except Exception: |
| LOW | libs/evals/scripts/analyze.py | 868 | except Exception as e: |
| LOW | libs/code/tests/unit_tests/test_mcp_login_modal.py | 37 | except Exception: # noqa: BLE001 # screen may still be mounting |
| LOW | libs/code/examples/skills/arxiv-search/arxiv_search.py | 40 | except Exception as e: |
| MEDIUM | …de/examples/skills/skill-creator/scripts/init_skill.py | 214 | print(f"Error: Skill directory already exists: {skill_dir}") |
| LOW | …de/examples/skills/skill-creator/scripts/init_skill.py | 221 | except Exception as e: |
| MEDIUM | …de/examples/skills/skill-creator/scripts/init_skill.py | 222 | print(f"Error creating directory: {e}") |
| LOW | …de/examples/skills/skill-creator/scripts/init_skill.py | 235 | except Exception as e: |
| MEDIUM | …de/examples/skills/skill-creator/scripts/init_skill.py | 236 | print(f"Error creating SKILL.md: {e}") |
| LOW | …de/examples/skills/skill-creator/scripts/init_skill.py | 262 | except Exception as e: |
| MEDIUM | …de/examples/skills/skill-creator/scripts/init_skill.py | 263 | print(f"Error creating resource directories: {e}") |
| LOW | libs/code/deepagents_code/sessions.py | 488 | except Exception: |
| LOW | libs/code/deepagents_code/sessions.py | 879 | except Exception: |
| LOW | libs/code/deepagents_code/sessions.py | 945 | except Exception: |
| LOW | libs/code/deepagents_code/sessions.py | 1045 | except Exception: |
| LOW | libs/code/deepagents_code/hooks.py | 109 | except Exception: |
| 344 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | libs/evals/tests/evals/pytest_reporter.py | 120 | |
| LOW | libs/evals/tests/evals/pytest_reporter.py | 293 | |
| LOW | libs/evals/tests/evals/pytest_reporter.py | 342 | |
| LOW | libs/evals/tests/evals/utils.py | 947 | |
| LOW | …/tests/evals/test_iterative_constraint_satisfaction.py | 84 | |
| LOW | libs/evals/tests/evals/tau2_airline/domain.py | 427 | |
| LOW | libs/evals/tests/evals/tau2_airline/domain.py | 512 | |
| LOW | libs/evals/tests/evals/tau2_airline/evaluation.py | 156 | |
| LOW | libs/evals/tests/evals/tau2_airline/evaluation.py | 162 | |
| LOW | …bs/evals/tests/evals/data/bfcl_apis/vehicle_control.py | 331 | |
| LOW | …bs/evals/tests/evals/data/bfcl_apis/vehicle_control.py | 553 | |
| LOW | …bs/evals/tests/evals/data/bfcl_apis/vehicle_control.py | 618 | |
| LOW | libs/evals/tests/unit_tests/test_category_tagging.py | 90 | |
| LOW | libs/evals/deepagents_harbor/backend.py | 532 | |
| LOW | libs/evals/deepagents_harbor/langsmith.py | 639 | |
| LOW | libs/evals/deepagents_harbor/failure.py | 95 | |
| LOW | libs/evals/deepagents_harbor/deepagents_wrapper.py | 91 | |
| LOW | libs/evals/deepagents_harbor/deepagents_wrapper.py | 361 | |
| LOW | libs/evals/scripts/generate_radar.py | 119 | |
| LOW | libs/evals/scripts/generate_eval_catalog.py | 58 | |
| LOW | libs/evals/scripts/harbor_langsmith.py | 26 | |
| LOW | libs/evals/scripts/analyze.py | 183 | |
| LOW | libs/evals/scripts/analyze.py | 253 | |
| LOW | libs/evals/scripts/analyze.py | 767 | |
| LOW | libs/code/tests/unit_tests/test_startup_fast_paths.py | 199 | |
| LOW | libs/code/deepagents_code/_testing_models.py | 111 | |
| LOW | libs/code/deepagents_code/sessions.py | 959 | |
| LOW | libs/code/deepagents_code/server.py | 187 | |
| LOW | libs/code/deepagents_code/textual_adapter.py | 370 | |
| LOW | libs/code/deepagents_code/config.py | 154 | |
| LOW | libs/code/deepagents_code/config.py | 413 | |
| LOW | libs/code/deepagents_code/config.py | 1800 | |
| LOW | libs/code/deepagents_code/config.py | 2253 | |
| LOW | libs/code/deepagents_code/config.py | 2397 | |
| LOW | libs/code/deepagents_code/mcp_login_service.py | 124 | |
| LOW | libs/code/deepagents_code/model_config.py | 714 | |
| LOW | libs/code/deepagents_code/model_config.py | 891 | |
| LOW | libs/code/deepagents_code/model_config.py | 2345 | |
| LOW | libs/code/deepagents_code/model_config.py | 2414 | |
| LOW | libs/code/deepagents_code/non_interactive.py | 359 | |
| LOW | libs/code/deepagents_code/non_interactive.py | 429 | |
| LOW | libs/code/deepagents_code/non_interactive.py | 914 | |
| LOW | libs/code/deepagents_code/update_check.py | 300 | |
| LOW | libs/code/deepagents_code/update_check.py | 386 | |
| LOW | libs/code/deepagents_code/media_utils.py | 275 | |
| LOW | libs/code/deepagents_code/file_ops.py | 285 | |
| LOW | libs/code/deepagents_code/file_ops.py | 440 | |
| LOW | libs/code/deepagents_code/ask_user.py | 100 | |
| LOW | libs/code/deepagents_code/tool_display.py | 131 | |
| LOW | libs/code/deepagents_code/tool_display.py | 319 | |
| LOW | libs/code/deepagents_code/event_bus.py | 262 | |
| LOW | libs/code/deepagents_code/agent.py | 212 | |
| LOW | libs/code/deepagents_code/local_context.py | 85 | |
| LOW | libs/code/deepagents_code/app.py | 456 | |
| LOW | libs/code/deepagents_code/app.py | 528 | |
| LOW | libs/code/deepagents_code/app.py | 637 | |
| LOW | libs/code/deepagents_code/app.py | 696 | |
| LOW | libs/code/deepagents_code/app.py | 2468 | |
| LOW | libs/code/deepagents_code/app.py | 2790 | |
| LOW | libs/code/deepagents_code/app.py | 3039 | |
| 84 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | libs/evals/tests/evals/data/bfcl_apis/message_api.py | 181 | # Check if there is a current user logged in |
| LOW | libs/evals/tests/evals/data/bfcl_apis/travel_booking.py | 783 | # Check if the traveler is at least 18 years old |
| LOW | libs/evals/tests/evals/data/bfcl_apis/travel_booking.py | 790 | # Check if the passport number starts with 'US' (assuming this indicates a US passport) |
| LOW | libs/evals/scripts/analyze.py | 51 | # Check if this is a valid task directory (has solution/solve.sh) |
| LOW | libs/evals/scripts/analyze.py | 200 | # Check if this step has tool calls |
| LOW | libs/code/tests/unit_tests/test_chat_input.py | 2572 | # Set gap to 0 so any real delay exceeds it. |
| LOW | …de/examples/skills/skill-creator/scripts/init_skill.py | 212 | # Check if directory already exists |
| LOW | libs/code/deepagents_code/textual_adapter.py | 786 | # Check if this is an AIMessageChunk with content |
| LOW | libs/code/deepagents_code/config.py | 1857 | # Check if this command is in the allow set |
| LOW | libs/code/deepagents_code/config.py | 2728 | # Check if this provider uses a custom BaseChatModel class |
| LOW | libs/code/deepagents_code/media_utils.py | 393 | # Check if file was created and has content |
| LOW | libs/code/deepagents_code/app.py | 3935 | # Check if ALL actions in the batch are auto-approvable shell commands |
| LOW | libs/code/deepagents_code/app.py | 6205 | # Check if agent is available |
| LOW | libs/code/deepagents_code/app.py | 7546 | # Check if focused widget is the text area inside chat input |
| LOW | libs/code/deepagents_code/app.py | 10094 | # Check if already using this exact model |
| LOW | libs/code/deepagents_code/skills/commands.py | 94 | # Check if skill_dir is within base_dir |
| LOW | libs/code/deepagents_code/skills/commands.py | 594 | # Check if this project skill shadows a user skill with the same name. |
| LOW | …de/built_in_skills/skill-creator/scripts/init_skill.py | 256 | # Check if directory already exists |
| LOW | libs/code/deepagents_code/widgets/chat_input.py | 1830 | # Check if it looked like media but failed validation |
| LOW | …li/examples/skills/skill-creator/scripts/init_skill.py | 212 | # Check if directory already exists |
| LOW | …ts/unit_tests/backends/test_composite_backend_async.py | 432 | # Write files to both backends |
| LOW | …ts/unit_tests/backends/test_composite_backend_async.py | 835 | # Write files to memories |
| LOW | …ts/unit_tests/backends/test_composite_backend_async.py | 1016 | # Write files to memories |
| LOW | …ts/tests/unit_tests/backends/test_composite_backend.py | 539 | # Write files to both backends |
| LOW | …ts/tests/unit_tests/backends/test_composite_backend.py | 949 | # Write files to memories |
| LOW | …ts/tests/unit_tests/backends/test_composite_backend.py | 1130 | # Write files to memories |
| LOW | …/tests/unit_tests/backends/test_store_backend_async.py | 140 | # Write file with multiple occurrences |
| LOW | …/tests/unit_tests/backends/test_store_backend_async.py | 173 | # Write file with multiple lines |
| LOW | …/tests/unit_tests/backends/test_store_backend_async.py | 217 | # Write files in nested directories |
| LOW | …/tests/unit_tests/backends/test_local_shell_backend.py | 114 | # Read the file |
| LOW | libs/deepagents/deepagents/middleware/filesystem.py | 1630 | # Check if execute tool is present and if backend supports it |
| LOW | libs/deepagents/deepagents/middleware/filesystem.py | 1695 | # Check if execute tool is present and if backend supports it |
| LOW | libs/deepagents/deepagents/middleware/filesystem.py | 1770 | # Check if content exceeds eviction threshold |
| LOW | libs/deepagents/deepagents/middleware/summarization.py | 732 | # Check if this AIMessage has tool calls we need to truncate |
| LOW | libs/deepagents/deepagents/backends/store.py | 442 | # Check if file is in the specified directory or a subdirectory |
| LOW | libs/deepagents/deepagents/backends/store.py | 574 | # Check if file exists |
| LOW | libs/deepagents/deepagents/backends/store.py | 597 | # Check if file exists using async method |
| LOW | libs/deepagents/deepagents/backends/utils.py | 533 | # Check if path matches an exact file |
| LOW | libs/deepagents/deepagents/backends/state.py | 175 | # Check if file is in the specified directory or a subdirectory |
| LOW | libs/acp/deepagents_acp/server.py | 807 | # Check if this is write_todos - auto-approve updates to existing plan |
| LOW | libs/acp/deepagents_acp/server.py | 836 | # Check if ALL command types are already allowed |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | libs/evals/tests/evals/test_skills.py | 54 | # Step 1: read_file to get the skill content. |
| LOW | libs/evals/tests/evals/test_skills.py | 55 | # Step 2: answer with the magic number. |
| LOW | libs/evals/tests/evals/test_skills.py | 97 | # Step 1: read_file for code-review only. |
| LOW | libs/evals/tests/evals/test_skills.py | 98 | # Step 2: answer with the code. |
| LOW | libs/evals/tests/evals/test_skills.py | 143 | # Step 1: read_file for both skills in parallel. |
| LOW | libs/evals/tests/evals/test_skills.py | 144 | # Step 2: answer combining both ports. |
| LOW | libs/evals/tests/evals/test_skills.py | 230 | # Step 1: read_file to discover the typo. |
| LOW | libs/evals/tests/evals/test_skills.py | 231 | # Step 2: edit_file to fix it. |
| LOW | libs/evals/tests/evals/test_skills.py | 232 | # Step 3: confirm. |
| LOW | libs/evals/deepagents_harbor/backend.py | 208 | # Step 1: existence check + mkdir (small command, no ARG_MAX risk). |
| LOW | libs/evals/deepagents_harbor/backend.py | 222 | # Step 2: transfer content via Harbor's native file upload |
| LOW | libs/code/tests/unit_tests/test_model_config.py | 1116 | # Step 1: Save model to config (simulating /model anthropic:claude-opus-4-5) |
| LOW | libs/code/tests/unit_tests/test_model_config.py | 1124 | # Step 2: Patch DEFAULT_CONFIG_PATH and call _get_default_model_spec |
| LOW | libs/code/tests/unit_tests/test_model_config.py | 1134 | # Step 3: Get default model spec - should use saved recent model |
| LOW | libs/code/deepagents_code/skills/commands.py | 363 | ### Step 1: [First Action] |
| LOW | libs/code/deepagents_code/skills/commands.py | 366 | ### Step 2: [Second Action] |
| LOW | libs/code/deepagents_code/skills/commands.py | 369 | ### Step 3: [Final Action] |
| LOW | …nts/tests/unit_tests/backends/test_backwards_compat.py | 350 | # Step 1: "Restore" v1 checkpoint data |
| LOW | …nts/tests/unit_tests/backends/test_backwards_compat.py | 358 | # Step 2: V2 backend reads v1 data |
| LOW | …nts/tests/unit_tests/backends/test_backwards_compat.py | 367 | # Step 3: Edit v1 data (result upgrades to v2) |
| LOW | …nts/tests/unit_tests/backends/test_backwards_compat.py | 373 | # Step 4: Write a brand new file in v2 |
| LOW | …nts/tests/unit_tests/backends/test_backwards_compat.py | 377 | # Step 5: Verify everything via reads |
| LOW | libs/deepagents/deepagents/middleware/summarization.py | 946 | # Step 1: Truncate args if configured |
| LOW | libs/deepagents/deepagents/middleware/summarization.py | 953 | # Step 2: Check if summarization should happen |
| LOW | libs/deepagents/deepagents/middleware/summarization.py | 970 | # Step 3: Perform summarization |
| LOW | libs/deepagents/deepagents/middleware/summarization.py | 1067 | # Step 1: Truncate args if configured |
| LOW | libs/deepagents/deepagents/middleware/summarization.py | 1074 | # Step 2: Check if summarization should happen |
| LOW | libs/deepagents/deepagents/middleware/summarization.py | 1091 | # Step 3: Perform summarization |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | libs/cli/frontend/package-lock.json | 5514 | "resolved": "https://registry.npmjs.org/robust-predicates/-/robust-predicates-3.0.3.tgz", |
| MEDIUM | …ents_cli/deploy/frontend_dist/assets/index-DM3gptpu.js | 49 | `).replace(wA,"")}function ib(r,a){return a=rb(a),rb(r)===a}function pt(r,a,u,c,m,b){switch(u){case"children":typeof c== |
| MEDIUM | …ents_cli/deploy/frontend_dist/assets/index-DM3gptpu.js | 69 | `):JSON.stringify(e,null,2)}function w2(e,t="Something went wrong."){return e instanceof Error&&e.message?e.message:type |
| MEDIUM | …ents_cli/deploy/frontend_dist/assets/index-DM3gptpu.js | 69 | `):JSON.stringify(e,null,2)}function w2(e,t="Something went wrong."){return e instanceof Error&&e.message?e.message:type |
| LOW | libs/partners/quickjs/langchain_quickjs/_format.py | 210 | # stdout path -- we've already truncated the result, so we just add the marker |
| MEDIUM | libs/deepagents/tests/unit_tests/test_graph.py | 1629 | # A harness profile is registered under bare "openai". |
| MEDIUM | …pshots/system_prompt_with_memory_and_skills_tools.json | 226 | "description": "Launch an ephemeral subagent to handle complex, multi-step independent tasks with isolated context |
| MEDIUM | …smoke_tests/snapshots/custom_system_message_tools.json | 226 | "description": "Launch an ephemeral subagent to handle complex, multi-step independent tasks with isolated context |
| MEDIUM | …sts/snapshots/system_prompt_without_execute_tools.json | 226 | "description": "Launch an ephemeral subagent to handle complex, multi-step independent tasks with isolated context |
| MEDIUM | …system_prompt_with_sync_and_async_subagents_tools.json | 226 | "description": "Launch an ephemeral subagent to handle complex, multi-step independent tasks with isolated context |
| MEDIUM | …_tests/snapshots/system_prompt_with_execute_tools.json | 257 | "description": "Launch an ephemeral subagent to handle complex, multi-step independent tasks with isolated context |
| MEDIUM | libs/deepagents/deepagents/graph.py | 552 | # Copy of `tools` with any harness-specific description rewrites. |
| MEDIUM | libs/deepagents/deepagents/graph.py | 653 | # Auto-add the default general-purpose subagent unless the harness profile |
| MEDIUM | libs/deepagents/deepagents/graph.py | 673 | # Add harness-profile middleware, if any |
| MEDIUM | libs/deepagents/deepagents/profiles/__init__.py | 28 | # Built-in provider/harness profiles are registered lazily on first |
| LOW | libs/acp/deepagents_acp/utils.py | 312 | # Use specific handler if available, otherwise just use base command |
| LOW | libs/acp/deepagents_acp/utils.py | 317 | # Non-sensitive commands - just use the base command |
| MEDIUM | examples/nvidia_deep_agent/src/prompts.py | 12 | RESEARCHER_INSTRUCTIONS = """Gather and synthesize comprehensive information on the provided query, carefully addressing |
| MEDIUM | examples/better-harness/tests/__init__.py | 1 | # Tests package for better-harness. |
| MEDIUM | …amples/better-harness/examples/deepagents_example.toml | 1 | # Worked example for optimizing a Deep Agents-based harness. |
| MEDIUM | examples/better-harness/better_harness/core.py | 345 | "# better-harness report", |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | libs/code/deepagents_code/sessions.py | 761 | batch_results = await _load_latest_checkpoint_summaries_batch( |
| LOW | libs/code/deepagents_code/theme.py | 161 | """Incognito shell accent (darkened for light bg contrast).""" |
| LOW | libs/code/deepagents_code/app.py | 9541 | ) |
| LOW | libs/code/deepagents_code/widgets/chat_input.py | 721 | end = (row - 1, len(prev_line)) |
| LOW | libs/code/deepagents_code/widgets/chat_input.py | 1141 | # now-empty text, assume all media were deleted, and discard them |
| LOW | libs/code/deepagents_code/widgets/chat_input.py | 1321 | # currently-visible `!` is the second bang of `!!`. Promote to |
| LOW | libs/code/scripts/install.sh | 1 | #!/usr/bin/env bash |
| LOW | libs/code/scripts/install.sh | 301 | log_info "deepagents-code ${pre_label} found (editable install from local source)." |
| LOW | libs/cli/scripts/install.sh | 1 | #!/usr/bin/env bash |
| LOW | libs/cli/deepagents_cli/deploy/config.py | 761 | # runtime store instead. |
| LOW | libs/cli/deepagents_cli/deploy/templates.py | 581 | # deploy_graph.py — the generated server entry point |
| LOW | libs/partners/quickjs/langchain_quickjs/_repl.py | 361 | self._console = _ConsoleBuffer(max_stdout_chars) |
| LOW | libs/partners/quickjs/langchain_quickjs/_repl.py | 461 | if target_names == self._active_tool_names and self._tools_installed: |
| LOW | libs/deepagents/deepagents/middleware/filesystem.py | 501 | |
| LOW | libs/acp/examples/local_context.py | 41 | @runtime_checkable |
| LOW | …amples/better-harness/examples/deepagents_example.toml | 1 | # Worked example for optimizing a Deep Agents-based harness. |
| LOW | .github/workflows/evals.yml | 1 | # Evaluation workflow for Deep Agents. |
| LOW | .github/workflows/evals.yml | 561 | PYEOF |
| LOW | .github/workflows/release.yml | 1 | # Builds and publishes deepagents packages to PyPI. |
| LOW | .github/workflows/release.yml | 681 | # maximally sensitive to catching breakage. |
| LOW | .github/workflows/pr_labeler.yml | 1 | # Unified PR labeler — applies size, file-based, title-based, and |
| LOW | .github/workflows/harbor.yml | 1 | # Harbor evaluation workflow for Deep Agents |
| LOW | .github/workflows/close_unchecked_issues.yml | 1 | # Auto-close issues that bypass or ignore the issue template checkboxes. |
| LOW | .github/workflows/tag-external-issues.yml | 1 | # Automatically tag issues as "external" or "internal" based on whether |
| LOW | .github/workflows/pr_lint_trailer.yml | 41 | // and CRLF leaves stray \r chars in offending-line displays. |
| LOW | .github/workflows/pr_lint_trailer.yml | 61 | async function findStickyComment() { |
| LOW | .github/workflows/evals_trials.yml | 1 | # N-trial eval workflow. |
| LOW | .github/workflows/bump_uv_pin.yml | 1 | # Monthly bump of the uv pin in `.github/actions/uv_setup/action.yml`. |
| LOW | .github/workflows/release-please.yml | 321 | # Dispatching makes each release run a top-level workflow, so the OIDC token |
| LOW | .github/workflows/release-please.yml | 361 | ACP_RELEASE: ${{ needs.release-please.outputs.acp-release }} |
| LOW | .github/workflows/release_please_parse_check.yml | 1 | # Pre-merge parse check for release-please. |
| LOW | .github/workflows/release_please_parse_check.yml | 21 | # release-please bumps. As of release-please 17.6.0 this is 0.4.1: |
| LOW | .github/workflows/pr_lint.yml | 1 | # PR title linting. |
| LOW | .github/workflows/pr_lint.yml | 21 | # * test — adding tests or correcting existing |
| LOW | .github/workflows/pr_lint.yml | 141 | |
| LOW | .github/workflows/block_fork_main_prs.yml | 1 | # Block PRs whose head ref is `main` (or `master`) from a fork. This topology |
| LOW | .github/workflows/require_issue_link.yml | 1 | # Require external PRs to reference an approved issue (e.g. Fixes #NNN) and |
| LOW | .github/workflows/require_issue_link.yml | 21 | # Enforcement gate: set to 'true' to activate the issue link requirement. |
| LOW | .github/workflows/sync_priority_labels.yml | 1 | # Sync priority labels (p0–p3) from linked issues to PRs. |
| Severity | File | Line | Snippet |
|---|---|---|---|
| CRITICAL | …ents_cli/deploy/frontend_dist/assets/index-DM3gptpu.js | 85 | `))}function d(T,v,C,A){const k=C.enter("tableCell"),D=C.enter("phrasing"),N=C.containerPhrasing(T,{...A,before:l,after: |
| CRITICAL | …ents_cli/deploy/frontend_dist/assets/index-DM3gptpu.js | 107 | `,h="/",g="*",p="",_="comment",T="declaration";function v(A,k){if(typeof A!="string")throw new TypeError("First argument |
| CRITICAL | …ents_cli/deploy/frontend_dist/assets/index-DM3gptpu.js | 158 | `+f.text,this.inlineQueue.pop(),this.inlineQueue.at(-1).src=h.text):n.push(f);continue}if(t){let h="Infinite loop on byt |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | …/deepagents/tests/unit_tests/test_file_system_tools.py | 320 | "content": "def helper():\n return 42", |
| LOW | …/tests/unit_tests/middleware/test_skills_middleware.py | 767 | helper_content = "def helper(): pass" |
| LOW | …nts/tests/unit_tests/backends/test_backwards_compat.py | 77 | write_res = be.write("/src/utils.py", "import sys\ndef helper():\n pass\nimport os\n") |
| LOW | …nts/tests/unit_tests/backends/test_backwards_compat.py | 189 | be.write("/src/utils.py", "import sys\ndef helper():\n pass\nimport os\n") |
| LOW | …s/deepagents/tests/unit_tests/_api/test_deprecation.py | 45 | def helper() -> None: |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | libs/code/tests/unit_tests/test_mcp_tools.py | 902 | |
| MEDIUM | libs/code/tests/unit_tests/test_mcp_tools.py | 1214 | |
| MEDIUM | libs/deepagents/tests/unit_tests/test_end_to_end.py | 4099 |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | libs/code/scripts/install.sh | 4 | # Usage: |
| LOW | libs/cli/scripts/install.sh | 4 | # Usage: |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | …agents/tests/unit_tests/backends/test_store_backend.py | 450 | ("user@example.com",), |