The LLM Evaluation Framework
3880 matches across 16 categories. Click a row to expand file-level details.
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_metrics/test_image_coherence_metric.py | 20 | def test_multimodal_async_metric_measure(self): |
| LOW | tests/test_metrics/test_image_coherence_metric.py | 41 | def test_multimodal_sync_metric_measure(self): |
| LOW | tests/test_metrics/test_image_coherence_metric.py | 62 | def test_invalid_model_throws_error_for_multimodal(self): |
| LOW | tests/test_metrics/test_image_coherence_metric.py | 82 | def test_multimodal_evaluate_method(self): |
| LOW | tests/test_metrics/test_mcp_use_metric.py | 21 | def test_normal_sync_metric_measure(self): |
| LOW | tests/test_metrics/test_mcp_use_metric.py | 41 | def test_normal_async_metric_measure(self): |
| LOW | tests/test_metrics/test_mcp_use_metric.py | 61 | def test_multimodal_async_metric_measure(self): |
| LOW | tests/test_metrics/test_mcp_use_metric.py | 78 | def test_multimodal_sync_metric_measure(self): |
| LOW | tests/test_metrics/test_mcp_use_metric.py | 95 | def test_invalid_model_throws_error_for_multimodal(self): |
| LOW | tests/test_metrics/test_mcp_use_metric.py | 109 | def test_normal_evaluate_method(self): |
| LOW | tests/test_metrics/test_mcp_use_metric.py | 129 | def test_multimodal_evaluate_method(self): |
| LOW | tests/test_metrics/test_topic_adherence_metric.py | 20 | def test_normal_sync_metric_measure(self): |
| LOW | tests/test_metrics/test_topic_adherence_metric.py | 44 | def test_normal_async_metric_measure(self): |
| LOW | tests/test_metrics/test_topic_adherence_metric.py | 66 | def test_multimodal_async_metric_measure(self): |
| LOW | tests/test_metrics/test_topic_adherence_metric.py | 90 | def test_multimodal_sync_metric_measure(self): |
| LOW | tests/test_metrics/test_topic_adherence_metric.py | 116 | def test_invalid_model_throws_error_for_multimodal(self): |
| LOW | tests/test_metrics/test_topic_adherence_metric.py | 139 | def test_normal_evaluate_method(self): |
| LOW | tests/test_metrics/test_topic_adherence_metric.py | 160 | def test_multimodal_evaluate_method(self): |
| LOW | tests/test_metrics/test_pattern_match_metric.py | 20 | def test_normal_sync_metric_measure(self): |
| LOW | tests/test_metrics/test_pattern_match_metric.py | 44 | def test_normal_evaluate_method(self): |
| LOW | tests/test_metrics/test_contextual_precision_metric.py | 20 | def test_normal_sync_metric_measure(self): |
| LOW | tests/test_metrics/test_contextual_precision_metric.py | 44 | def test_normal_async_metric_measure(self): |
| LOW | tests/test_metrics/test_contextual_precision_metric.py | 68 | def test_multimodal_async_metric_measure(self): |
| LOW | tests/test_metrics/test_contextual_precision_metric.py | 89 | def test_multimodal_sync_metric_measure(self): |
| LOW | tests/test_metrics/test_contextual_precision_metric.py | 110 | def test_invalid_model_throws_error_for_multimodal(self): |
| LOW | tests/test_metrics/test_contextual_precision_metric.py | 130 | def test_normal_evaluate_method(self): |
| LOW | tests/test_metrics/test_contextual_precision_metric.py | 154 | def test_multimodal_evaluate_method(self): |
| LOW | tests/test_metrics/test_image_editing_metric.py | 20 | def test_multimodal_async_metric_measure(self): |
| LOW | tests/test_metrics/test_image_editing_metric.py | 41 | def test_multimodal_sync_metric_measure(self): |
| LOW | tests/test_metrics/test_image_editing_metric.py | 62 | def test_invalid_model_throws_error_for_multimodal(self): |
| LOW | tests/test_metrics/test_image_editing_metric.py | 80 | def test_multimodal_evaluate_method(self): |
| LOW | tests/test_metrics/test_dag.py | 45 | def test_is_valid_dag_deep_nested_mixed_nodes(self): |
| LOW | tests/test_metrics/test_dag.py | 99 | def test_extract_required_params(self): |
| LOW | tests/test_metrics/test_dag.py | 132 | def test_extract_required_params_non_binary(self): |
| LOW | tests/test_metrics/test_dag.py | 151 | def test_disallow_multiple_judgement_roots(self): |
| LOW | tests/test_metrics/test_dag.py | 170 | def test_allow_multiple_tasknode_roots(self): |
| LOW | tests/test_metrics/test_dag.py | 176 | def test_copy_graph_isolated_and_deep(self): |
| LOW | tests/test_metrics/test_dag.py | 234 | def test_non_binary_node_in_dag(self): |
| LOW | tests/test_metrics/test_dag.py | 262 | def test_verdict_node_with_child(self): |
| LOW | tests/test_metrics/test_bias_metric.py | 20 | def test_normal_sync_metric_measure(self): |
| LOW | tests/test_metrics/test_bias_metric.py | 44 | def test_normal_async_metric_measure(self): |
| LOW | tests/test_metrics/test_bias_metric.py | 68 | def test_multimodal_async_metric_measure(self): |
| LOW | tests/test_metrics/test_bias_metric.py | 89 | def test_multimodal_sync_metric_measure(self): |
| LOW | tests/test_metrics/test_bias_metric.py | 110 | def test_invalid_model_throws_error_for_multimodal(self): |
| LOW | tests/test_metrics/test_bias_metric.py | 128 | def test_normal_evaluate_method(self): |
| LOW | tests/test_metrics/test_bias_metric.py | 152 | def test_multimodal_evaluate_method(self): |
| LOW | tests/test_metrics/test_knowledge_retention_metric.py | 21 | def test_normal_sync_metric_measure(self): |
| LOW | tests/test_metrics/test_knowledge_retention_metric.py | 43 | def test_normal_async_metric_measure(self): |
| LOW | tests/test_metrics/test_knowledge_retention_metric.py | 65 | def test_multimodal_async_metric_measure(self): |
| LOW | tests/test_metrics/test_knowledge_retention_metric.py | 89 | def test_multimodal_sync_metric_measure(self): |
| LOW | tests/test_metrics/test_knowledge_retention_metric.py | 113 | def test_invalid_model_throws_error_for_multimodal(self): |
| LOW | tests/test_metrics/test_knowledge_retention_metric.py | 134 | def test_normal_evaluate_method(self): |
| LOW | tests/test_metrics/test_knowledge_retention_metric.py | 155 | def test_multimodal_evaluate_method(self): |
| LOW | tests/test_metrics/test_knowledge_retention_metric.py | 182 | def test_knowledge_schema_unpacking(self): |
| LOW | tests/test_metrics/test_knowledge_retention_metric.py | 189 | def test_knowledge_schema_rejects_double_wrap(self): |
| LOW | tests/test_metrics/test_knowledge_retention_metric.py | 196 | def test_normal_sync_metric_measure(self): |
| LOW | tests/test_metrics/test_knowledge_retention_metric.py | 221 | def test_normal_async_metric_measure(self): |
| LOW | tests/test_metrics/test_exact_match_metric.py | 20 | def test_normal_sync_metric_measure(self): |
| LOW | tests/test_metrics/test_exact_match_metric.py | 44 | def test_normal_evaluate_method(self): |
| LOW | tests/test_metrics/test_task_completetion_metric.py | 21 | def test_normal_sync_metric_measure(self): |
| 1954 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/test_metrics/test_dag_serialization.py | 31 | # ---------------------------------------------------------------------------- |
| MEDIUM | tests/test_metrics/test_dag_serialization.py | 33 | # ---------------------------------------------------------------------------- |
| MEDIUM | tests/test_metrics/test_dag_serialization.py | 224 | # ---------------------------------------------------------------------------- |
| MEDIUM | tests/test_metrics/test_dag_serialization.py | 226 | # ---------------------------------------------------------------------------- |
| MEDIUM | tests/test_metrics/test_dag_serialization.py | 396 | # ---------------------------------------------------------------------------- |
| MEDIUM | tests/test_metrics/test_dag_serialization.py | 398 | # ---------------------------------------------------------------------------- |
| MEDIUM | tests/test_metrics/test_dag_serialization.py | 287 | # ---------------------------------------------------------------------------- |
| MEDIUM | tests/test_metrics/test_dag_serialization.py | 289 | # ---------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 172 | # --------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 174 | # --------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 281 | # --------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 283 | # --------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 465 | # --------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 468 | # --------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 528 | # --------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 530 | # --------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 1002 | # --------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 1004 | # --------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 362 | # --------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 364 | # --------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 702 | # --------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 705 | # --------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 956 | # --------------------------------------------------------------------------- |
| MEDIUM | …t_integrations/test_googleadk/test_span_interceptor.py | 958 | # --------------------------------------------------------------------------- |
| MEDIUM | tests/test_integrations/test_llamaindex/test_sync.py | 42 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_llamaindex/test_sync.py | 44 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_llamaindex/test_sync.py | 66 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_llamaindex/test_sync.py | 68 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_llamaindex/test_sync.py | 105 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_llamaindex/test_sync.py | 107 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_llamaindex/test_async.py | 58 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_llamaindex/test_async.py | 60 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_llamaindex/test_async.py | 83 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_llamaindex/test_async.py | 85 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_llamaindex/test_async.py | 108 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_llamaindex/test_async.py | 110 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_llamaindex/test_async.py | 150 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_llamaindex/test_async.py | 152 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_llamaindex/test_async.py | 175 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_llamaindex/test_async.py | 177 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_crewai/test_sync.py | 41 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_crewai/test_sync.py | 43 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_crewai/test_async.py | 33 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_crewai/test_async.py | 35 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/conftest.py | 148 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/conftest.py | 150 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/conftest.py | 526 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/conftest.py | 528 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/conftest.py | 645 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/conftest.py | 647 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/test_sync.py | 310 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/test_sync.py | 312 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/test_sync.py | 553 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/test_sync.py | 555 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/test_sync.py | 50 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/test_sync.py | 52 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/test_sync.py | 72 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/test_sync.py | 74 | # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/test_sync.py | 105 | # # ============================================================================= |
| MEDIUM | tests/test_integrations/test_langgraph/test_sync.py | 107 | # # ============================================================================= |
| 324 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | …t_integrations/test_googleadk/test_span_interceptor.py | 0 | falls back to settings when current_trace_context is none. |
| HIGH | …est_integrations/test_strands/test_span_interceptor.py | 0 | falls back to settings when current_trace_context is none. |
| HIGH | …_integrations/test_pydanticai/test_span_interceptor.py | 0 | falls back to settings when current_trace_context is none. |
| HIGH | …t_integrations/test_agentcore/test_span_interceptor.py | 0 | falls back to settings when current_trace_context is none. |
| HIGH | …t_integrations/test_googleadk/test_span_interceptor.py | 0 | when a real deepeval span is on ``current_span_context`` and the otel span is a root (no native parent), the interceptor |
| HIGH | …est_integrations/test_strands/test_span_interceptor.py | 0 | when a real deepeval span is on ``current_span_context`` and the otel span is a root (no native parent), the interceptor |
| HIGH | …t_integrations/test_agentcore/test_span_interceptor.py | 0 | when a real deepeval span is on ``current_span_context`` and the otel span is a root (no native parent), the interceptor |
| HIGH | …t_integrations/test_googleadk/test_span_interceptor.py | 0 | otel children already have a real parent_id pointing into the same otel trace — no need to bridge. |
| HIGH | …est_integrations/test_strands/test_span_interceptor.py | 0 | otel children already have a real parent_id pointing into the same otel trace — no need to bridge. |
| HIGH | …t_integrations/test_agentcore/test_span_interceptor.py | 0 | otel children already have a real parent_id pointing into the same otel trace — no need to bridge. |
| HIGH | …t_integrations/test_googleadk/test_span_interceptor.py | 0 | one-shot semantics through the interceptor: a second agent span inside the same ``with`` block does not inherit. |
| HIGH | …est_integrations/test_strands/test_span_interceptor.py | 0 | one-shot semantics through the interceptor: a second agent span inside the same ``with`` block does not inherit. |
| HIGH | …_integrations/test_pydanticai/test_span_interceptor.py | 0 | one-shot semantics through the interceptor: a second agent span inside the same ``with`` block does not inherit. |
| HIGH | …t_integrations/test_agentcore/test_span_interceptor.py | 0 | one-shot semantics through the interceptor: a second agent span inside the same ``with`` block does not inherit. |
| HIGH | …t_integrations/test_googleadk/test_span_interceptor.py | 0 | typed slot is not consumed by spans of a different type. an llm span fired inside ``with next_agent_span(...)`` should p |
| HIGH | …est_integrations/test_strands/test_span_interceptor.py | 0 | typed slot is not consumed by spans of a different type. an llm span fired inside ``with next_agent_span(...)`` should p |
| HIGH | …t_integrations/test_agentcore/test_span_interceptor.py | 0 | typed slot is not consumed by spans of a different type. an llm span fired inside ``with next_agent_span(...)`` should p |
| HIGH | …t_integrations/test_googleadk/test_span_interceptor.py | 0 | mirrors the ``test_tool_metric_collection`` flow in test_sync.py — ``with next_tool_span(metric_collection=...)`` sets t |
| HIGH | …est_integrations/test_strands/test_span_interceptor.py | 0 | mirrors the ``test_tool_metric_collection`` flow in test_sync.py — ``with next_tool_span(metric_collection=...)`` sets t |
| HIGH | …t_integrations/test_agentcore/test_span_interceptor.py | 0 | mirrors the ``test_tool_metric_collection`` flow in test_sync.py — ``with next_tool_span(metric_collection=...)`` sets t |
| HIGH | …t_integrations/test_googleadk/test_span_interceptor.py | 0 | last-write-wins: ``next_agent_span`` sets the floor at on_start; later ``update_current_span(...)`` (e.g. from inside a |
| HIGH | …est_integrations/test_strands/test_span_interceptor.py | 0 | last-write-wins: ``next_agent_span`` sets the floor at on_start; later ``update_current_span(...)`` (e.g. from inside a |
| HIGH | …t_integrations/test_agentcore/test_span_interceptor.py | 0 | last-write-wins: ``next_agent_span`` sets the floor at on_start; later ``update_current_span(...)`` (e.g. from inside a |
| HIGH | …t_integrations/test_googleadk/test_span_interceptor.py | 0 | ``with next_agent_span(metrics=[...])`` populates the placeholder; at on_end the interceptor calls ``stash_pending_metri |
| HIGH | …est_integrations/test_strands/test_span_interceptor.py | 0 | ``with next_agent_span(metrics=[...])`` populates the placeholder; at on_end the interceptor calls ``stash_pending_metri |
| HIGH | …t_integrations/test_agentcore/test_span_interceptor.py | 0 | ``with next_agent_span(metrics=[...])`` populates the placeholder; at on_end the interceptor calls ``stash_pending_metri |
| HIGH | …t_integrations/test_googleadk/test_span_interceptor.py | 0 | in production paths (``is_evaluating=false``) the metrics overlay would leak — gate prevents the stash. |
| HIGH | …est_integrations/test_strands/test_span_interceptor.py | 0 | in production paths (``is_evaluating=false``) the metrics overlay would leak — gate prevents the stash. |
| HIGH | …t_integrations/test_agentcore/test_span_interceptor.py | 0 | in production paths (``is_evaluating=false``) the metrics overlay would leak — gate prevents the stash. |
| HIGH | …t_integrations/test_googleadk/test_span_interceptor.py | 0 | span-level kwargs were removed in the otel poc migration. each must raise ``typeerror`` on construction so callers see e |
| HIGH | …est_integrations/test_strands/test_span_interceptor.py | 0 | span-level kwargs were removed in the otel poc migration. each must raise ``typeerror`` on construction so callers see e |
| HIGH | …t_integrations/test_agentcore/test_span_interceptor.py | 0 | span-level kwargs were removed in the otel poc migration. each must raise ``typeerror`` on construction so callers see e |
| HIGH | …t_integrations/test_googleadk/test_span_interceptor.py | 0 | constructor must succeed when no api_key is supplied or in env. the otel pipeline still wires up locally — only the outb |
| HIGH | …est_integrations/test_strands/test_span_interceptor.py | 0 | constructor must succeed when no api_key is supplied or in env. the otel pipeline still wires up locally — only the outb |
| HIGH | …t_integrations/test_agentcore/test_span_interceptor.py | 0 | constructor must succeed when no api_key is supplied or in env. the otel pipeline still wires up locally — only the outb |
| HIGH | …ns/test_googleadk/apps/googleadk_multiple_tools_app.py | 0 | trace-level setup for the multiple-tools fixture. per-tool / per-agent metric collections belong on ``with next_*_span(. |
| HIGH | …ations/test_strands/apps/strands_multiple_tools_app.py | 0 | trace-level setup for the multiple-tools fixture. per-tool / per-agent metric collections belong on ``with next_*_span(. |
| HIGH | …ns/test_agentcore/apps/agentcore_multiple_tools_app.py | 0 | trace-level setup for the multiple-tools fixture. per-tool / per-agent metric collections belong on ``with next_*_span(. |
| HIGH | …integrations/test_googleadk/apps/googleadk_tool_app.py | 0 | trace-only setup. tool / agent / llm span-level fields belong at the call site (``with next_*_span(...)`` or ``update_cu |
| HIGH | …est_integrations/test_strands/apps/strands_tool_app.py | 0 | trace-only setup. tool / agent / llm span-level fields belong at the call site (``with next_*_span(...)`` or ``update_cu |
| HIGH | …integrations/test_agentcore/apps/agentcore_tool_app.py | 0 | trace-only setup. tool / agent / llm span-level fields belong at the call site (``with next_*_span(...)`` or ``update_cu |
| HIGH | …test_integrations/test_crewai/test_crewai_component.py | 0 | fetch weather data for a given city. returns temperature and conditions. |
| HIGH | tests/test_integrations/test_crewai/test_crewai.py | 0 | fetch weather data for a given city. returns temperature and conditions. |
| HIGH | …val/test_ai_agent_evals/test_crewai/test_crewai_e2e.py | 0 | fetch weather data for a given city. returns temperature and conditions. |
| HIGH | tests/test_integrations/test_langgraph/test_sync.py | 0 | decorator that switches between generate and assert mode based on generate_schemas env var. args: schema_name: name of t |
| HIGH | tests/test_integrations/test_langgraph/test_async.py | 0 | decorator that switches between generate and assert mode based on generate_schemas env var. args: schema_name: name of t |
| HIGH | tests/test_integrations/test_langchain/test_sync.py | 0 | decorator that switches between generate and assert mode based on generate_schemas env var. args: schema_name: name of t |
| HIGH | tests/test_integrations/test_langchain/test_async.py | 0 | decorator that switches between generate and assert mode based on generate_schemas env var. args: schema_name: name of t |
| HIGH | tests/test_integrations/test_pydanticai/test_sync.py | 0 | decorator that switches between generate and assert mode based on generate_schemas env var. args: schema_name: name of t |
| HIGH | tests/test_integrations/test_pydanticai/test_async.py | 0 | decorator that switches between generate and assert mode based on generate_schemas env var. args: schema_name: name of t |
| HIGH | …grations/test_openai_agents/test_scenerios/test_run.py | 0 | fetches weather data for a given location using the open-meteo api. args: latitude (float): the latitude of the location |
| HIGH | …i_agents/test_scenerios/test_with_trace_and_wrapped.py | 0 | fetches weather data for a given location using the open-meteo api. args: latitude (float): the latitude of the location |
| HIGH | …s/test_openai_agents/test_scenerios/test_with_trace.py | 0 | fetches weather data for a given location using the open-meteo api. args: latitude (float): the latitude of the location |
| HIGH | …ons/test_openai_agents/test_scenerios/test_run_sync.py | 0 | fetches weather data for a given location using the open-meteo api. args: latitude (float): the latitude of the location |
| HIGH | …ai_agents/test_scenerios/test_weather_agent_patched.py | 0 | fetches weather data for a given location using the open-meteo api. args: latitude (float): the latitude of the location |
| HIGH | …test_openai_agents/test_scenerios/test_run_streamed.py | 0 | fetches weather data for a given location using the open-meteo api. args: latitude (float): the latitude of the location |
| HIGH | …grations/test_openai_agents/test_scenerios/test_run.py | 0 | get latitude and longitude for a city name. args: city_name (str): name of the city returns: dict: dictionary with lat, |
| HIGH | …i_agents/test_scenerios/test_with_trace_and_wrapped.py | 0 | get latitude and longitude for a city name. args: city_name (str): name of the city returns: dict: dictionary with lat, |
| HIGH | …s/test_openai_agents/test_scenerios/test_with_trace.py | 0 | get latitude and longitude for a city name. args: city_name (str): name of the city returns: dict: dictionary with lat, |
| HIGH | …ons/test_openai_agents/test_scenerios/test_run_sync.py | 0 | get latitude and longitude for a city name. args: city_name (str): name of the city returns: dict: dictionary with lat, |
| 89 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | test_pydantic_agent.py | 19 | |
| LOW | tests/test_metrics/test_mcp_use_metric.py | 4 | |
| LOW | tests/test_metrics/test_exact_match_metric.py | 4 | |
| LOW | tests/test_metrics/test_dag_serialization.py | 4 | |
| LOW | tests/test_metrics/test_dag_serialization.py | 21 | |
| LOW | …sts/test_metrics/test_turn_contextual_recall_metric.py | 4 | |
| LOW | tests/test_metrics/turn_contextual_relevancy_metric.py | 4 | |
| LOW | …t_metrics/test_answer_relevancy_metric_empty_output.py | 19 | |
| LOW | tests/test_integrations/utils.py | 1 | |
| LOW | …t_integrations/test_googleadk/test_span_interceptor.py | 46 | |
| LOW | …tegrations/test_googleadk/apps/googleadk_simple_app.py | 8 | |
| LOW | …ns/test_googleadk/apps/googleadk_multiple_tools_app.py | 9 | |
| LOW | …integrations/test_googleadk/apps/googleadk_tool_app.py | 8 | |
| LOW | …integrations/test_googleadk/apps/googleadk_eval_app.py | 18 | |
| LOW | tests/test_integrations/test_anthropic/conftest.py | 6 | |
| LOW | …ts/test_integrations/test_exporter/test_pydantic_ai.py | 1 | |
| LOW | tests/test_integrations/test_crewai/test_stress.py | 48 | |
| LOW | …t_integrations/test_crewai/test_knowledge_retriever.py | 7 | |
| LOW | …t_integrations/test_crewai/test_knowledge_retriever.py | 8 | |
| LOW | …test_integrations/test_crewai/test_crewai_component.py | 2 | |
| LOW | …test_integrations/test_crewai/test_crewai_component.py | 3 | |
| LOW | …test_integrations/test_crewai/test_crewai_component.py | 4 | |
| LOW | …test_integrations/test_crewai/test_crewai_component.py | 5 | |
| LOW | tests/test_integrations/test_crewai/test_crewai.py | 2 | |
| LOW | tests/test_integrations/test_crewai/test_crewai.py | 3 | |
| LOW | tests/test_integrations/test_crewai/test_crewai.py | 4 | |
| LOW | tests/test_integrations/test_crewai/test_crewai.py | 5 | |
| LOW | tests/test_integrations/test_crewai/test_crewai.py | 7 | |
| LOW | tests/test_integrations/test_crewai/test_crewai.py | 19 | |
| LOW | tests/test_integrations/test_crewai/apps/simple_app.py | 7 | |
| LOW | tests/test_integrations/test_crewai/apps/simple_app.py | 9 | |
| LOW | tests/test_integrations/test_crewai/apps/async_app.py | 7 | |
| LOW | …rations/test_langgraph/apps/langgraph_next_span_app.py | 18 | |
| LOW | tests/test_integrations/test_strands/conftest.py | 2 | |
| LOW | …est_integrations/test_strands/test_span_interceptor.py | 28 | |
| LOW | …est_integrations/test_strands/test_span_interceptor.py | 40 | |
| LOW | …tegrations/test_openai/test_tool_call_flow_response.py | 6 | |
| LOW | tests/test_integrations/test_openai/test_sync_openai.py | 4 | |
| LOW | …grations/test_openai/test_tool_call_flow_completion.py | 6 | |
| LOW | …sts/test_integrations/test_openai/test_async_openai.py | 1 | |
| LOW | …sts/test_integrations/test_openai/test_async_openai.py | 7 | |
| LOW | tests/test_integrations/test_openai_agents/test_sync.py | 9 | |
| LOW | …grations/test_openai_agents/test_scenerios/test_run.py | 6 | |
| LOW | …test_openai_agents/test_scenerios/test_multi_agents.py | 2 | |
| LOW | …test_openai_agents/test_scenerios/test_multi_agents.py | 3 | |
| LOW | …test_openai_agents/test_scenerios/test_multi_agents.py | 6 | |
| LOW | …test_openai_agents/test_scenerios/test_multi_agents.py | 7 | |
| LOW | …i_agents/test_scenerios/test_with_trace_and_wrapped.py | 3 | |
| LOW | …s/test_openai_agents/test_scenerios/test_with_trace.py | 5 | |
| LOW | …ons/test_openai_agents/test_scenerios/test_run_sync.py | 2 | |
| LOW | …ons/test_openai_agents/test_scenerios/test_run_sync.py | 3 | |
| LOW | …ai_agents/test_scenerios/test_weather_agent_patched.py | 2 | |
| LOW | …test_openai_agents/test_scenerios/test_run_streamed.py | 4 | |
| LOW | …sts/test_integrations/test_langchain/test_next_span.py | 18 | |
| LOW | tests/test_integrations/test_pydanticai/conftest.py | 2 | |
| LOW | tests/test_integrations/test_agentcore/conftest.py | 2 | |
| LOW | …t_integrations/test_agentcore/test_span_interceptor.py | 28 | |
| LOW | …t_integrations/test_agentcore/test_span_interceptor.py | 40 | |
| LOW | tests/test_confident/test_prompt.py | 8 | |
| LOW | tests/test_confident/test_prompt.py | 8 | |
| 522 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_integrations/test_langgraph/conftest.py | 143 | except Exception: |
| LOW | tests/test_integrations/test_langgraph/conftest.py | 241 | except Exception: |
| LOW | tests/test_integrations/test_langgraph/conftest.py | 267 | except Exception: |
| LOW | tests/test_integrations/test_langgraph/conftest.py | 691 | except Exception: |
| LOW | …ns/test_langgraph/apps/langgraph_multiple_tools_app.py | 69 | except Exception as e: |
| LOW | …ns/test_langgraph/apps/langgraph_parallel_tools_app.py | 80 | except Exception: |
| LOW | tests/test_integrations/test_openai_agents/conftest.py | 17 | except Exception: |
| LOW | …est_integrations/test_openai_agents/apps/tool_agent.py | 35 | except Exception: |
| LOW | tests/test_integrations/test_langchain/conftest.py | 120 | except Exception: |
| LOW | tests/test_integrations/test_langchain/conftest.py | 199 | except Exception: |
| LOW | tests/test_integrations/test_langchain/conftest.py | 224 | except Exception: |
| LOW | tests/test_integrations/test_langchain/conftest.py | 455 | except Exception: |
| LOW | …ntegrations/test_langchain/apps/langchain_agent_app.py | 39 | except Exception as e: |
| LOW | …test_langchain/apps/langchain_metric_collection_app.py | 34 | except Exception as e: |
| LOW | …ns/test_langchain/apps/langchain_parallel_tools_app.py | 63 | except Exception: |
| LOW | …ns/test_langchain/apps/langchain_multiple_tools_app.py | 68 | except Exception as e: |
| LOW | tests/test_core/conftest.py | 7 | except Exception: |
| LOW | tests/test_core/conftest.py | 194 | except Exception: |
| LOW | …racing/test_integration/test_current_golden_context.py | 76 | except Exception: |
| LOW | …racing/test_integration/test_current_golden_context.py | 104 | except Exception: |
| LOW | tests/test_core/test_tracing/apps/sync_app.py | 39 | except Exception as e: |
| LOW | tests/test_core/test_tracing/apps/sync_app.py | 65 | except Exception: |
| LOW | tests/test_core/test_tracing/apps/sync_app.py | 84 | except Exception: |
| LOW | tests/test_core/test_tracing/apps/sync_app.py | 101 | except Exception: |
| LOW | tests/test_core/test_tracing/apps/sync_app.py | 134 | except Exception as e: |
| LOW | tests/test_core/test_tracing/apps/sync_app.py | 159 | except Exception as e: |
| LOW | tests/test_core/test_tracing/apps/sync_app.py | 222 | except Exception as e: |
| LOW | tests/test_core/test_tracing/apps/sync_app.py | 270 | except Exception: |
| MEDIUM | …acing/test_generators/test_generator_context_safety.py | 187 | def consume(g): |
| LOW | …acing/test_generators/test_generator_context_safety.py | 191 | except Exception as e: |
| LOW | tests/test_core/test_evaluation/test_local_store.py | 150 | except Exception as e: # pragma: no cover |
| LOW | …re/test_evaluation/test_execute/test_error_boundary.py | 442 | except Exception: |
| LOW | …re/test_evaluation/test_execute/test_error_boundary.py | 481 | except Exception: |
| MEDIUM | …re/test_evaluation/test_execute/test_error_boundary.py | 121 | def parent_catches(): |
| LOW | …s/test_core/test_synthesizer/test_context_generator.py | 423 | except Exception: |
| LOW | .scripts/changelog/generate.py | 529 | except Exception as e: |
| LOW | .scripts/changelog/generate.py | 947 | except Exception as e: |
| LOW | .scripts/changelog/generate.py | 1090 | except Exception as e: |
| LOW | .scripts/changelog/generate.py | 1119 | except Exception: |
| LOW | .scripts/changelog/generate.py | 1394 | except Exception as e: |
| MEDIUM | .scripts/changelog/extract_release_notes.py | 36 | print(f"Error: {args.changelog_dir} does not exist.") |
| LOW | scripts/check_openai_model_capabilities.py | 89 | except Exception as exc: |
| LOW | scripts/check_openai_model_capabilities.py | 119 | except Exception as exc: |
| LOW | deepeval/telemetry.py | 140 | except Exception: |
| LOW | deepeval/telemetry.py | 153 | except Exception: |
| MEDIUM | deepeval/telemetry.py | 41 | def blocked_by_firewall(): |
| LOW | deepeval/utils.py | 452 | except Exception as e: |
| MEDIUM | deepeval/utils.py | 453 | print(f"An error occurred: {e}") |
| LOW | deepeval/utils.py | 799 | except Exception: |
| LOW | deepeval/utils.py | 825 | except Exception: |
| MEDIUM | deepeval/utils.py | 192 | def get_or_create_event_loop() -> asyncio.AbstractEventLoop: |
| LOW | deepeval/metrics/indicator.py | 139 | except Exception as e: |
| LOW | deepeval/metrics/indicator.py | 299 | except Exception as e: |
| LOW | deepeval/metrics/utils.py | 444 | except Exception as e: |
| LOW | deepeval/metrics/g_eval/utils.py | 382 | except Exception: |
| LOW | deepeval/metrics/role_adherence/role_adherence.py | 201 | except Exception: |
| LOW | deepeval/metrics/role_adherence/role_adherence.py | 226 | except Exception: |
| LOW | deepeval/model_integrations/utils.py | 111 | except Exception: |
| LOW | deepeval/tracing/tracing.py | 126 | except Exception as e: |
| LOW | deepeval/tracing/tracing.py | 153 | except Exception as e: |
| 253 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_integrations/utils.py | 28 | |
| LOW | tests/test_integrations/utils.py | 87 | |
| LOW | tests/test_integrations/utils.py | 43 | |
| LOW | tests/test_integrations/utils.py | 156 | |
| LOW | tests/test_integrations/utils.py | 270 | |
| LOW | tests/test_integrations/test_langgraph/conftest.py | 182 | |
| LOW | tests/test_integrations/test_langgraph/conftest.py | 213 | |
| LOW | tests/test_integrations/test_langgraph/conftest.py | 347 | |
| LOW | …grations/test_openai/test_tool_call_flow_completion.py | 66 | |
| LOW | tests/test_integrations/test_langchain/conftest.py | 153 | |
| LOW | tests/test_integrations/test_langchain/conftest.py | 176 | |
| LOW | …ntegrations/test_langchain/apps/langchain_agent_app.py | 66 | |
| LOW | …ntegrations/test_langchain/apps/langchain_agent_app.py | 115 | |
| LOW | …rations/test_langchain/apps/langchain_streaming_app.py | 56 | |
| LOW | …rations/test_langchain/apps/langchain_streaming_app.py | 101 | |
| LOW | …rations/test_langchain/apps/langchain_streaming_app.py | 146 | |
| LOW | …rations/test_langchain/apps/langchain_streaming_app.py | 191 | |
| LOW | …ns/test_langchain/apps/langchain_parallel_tools_app.py | 86 | |
| LOW | …ns/test_langchain/apps/langchain_parallel_tools_app.py | 131 | |
| LOW | …ns/test_langchain/apps/langchain_multiple_tools_app.py | 86 | |
| LOW | …ns/test_langchain/apps/langchain_multiple_tools_app.py | 131 | |
| LOW | …tions/test_langchain/apps/langchain_conditional_app.py | 68 | |
| LOW | …tions/test_langchain/apps/langchain_conditional_app.py | 113 | |
| LOW | …tions/test_langchain/apps/langchain_single_tool_app.py | 35 | |
| LOW | …tions/test_langchain/apps/langchain_single_tool_app.py | 84 | |
| LOW | tests/test_confident/test_dataset.py | 72 | |
| LOW | tests/test_core/stubs.py | 119 | |
| LOW | tests/test_core/stubs.py | 125 | |
| LOW | …re/test_evaluation/test_execute/test_error_boundary.py | 436 | |
| LOW | .scripts/changelog/generate.py | 811 | |
| LOW | .scripts/changelog/generate.py | 840 | |
| LOW | .scripts/changelog/generate.py | 906 | |
| LOW | .scripts/changelog/generate.py | 1015 | |
| LOW | .scripts/changelog/generate.py | 1172 | |
| LOW | .scripts/changelog/generate.py | 1182 | |
| LOW | .scripts/changelog/generate.py | 1233 | |
| LOW | .scripts/changelog/extract_release_notes.py | 16 | |
| LOW | examples/mcp_evaluation/mcp_eval_multi_turn.py | 59 | |
| LOW | deepeval/progress_context.py | 39 | |
| LOW | deepeval/utils.py | 139 | |
| LOW | deepeval/utils.py | 417 | |
| LOW | deepeval/utils.py | 432 | |
| LOW | deepeval/metrics/indicator.py | 73 | |
| LOW | deepeval/metrics/indicator.py | 157 | |
| LOW | deepeval/metrics/indicator.py | 241 | |
| LOW | deepeval/metrics/utils.py | 233 | |
| LOW | deepeval/metrics/utils.py | 305 | |
| LOW | deepeval/metrics/utils.py | 613 | |
| LOW | deepeval/metrics/utils.py | 711 | |
| LOW | deepeval/metrics/tool_use/tool_use.py | 271 | |
| LOW | deepeval/metrics/summarization/summarization.py | 283 | |
| LOW | deepeval/metrics/dag/graph.py | 30 | |
| LOW | deepeval/metrics/dag/utils.py | 33 | |
| LOW | deepeval/metrics/dag/utils.py | 74 | |
| LOW | deepeval/metrics/dag/nodes.py | 648 | |
| LOW | deepeval/metrics/dag/nodes.py | 90 | |
| LOW | deepeval/metrics/dag/nodes.py | 155 | |
| LOW | deepeval/metrics/conversational_dag/nodes.py | 792 | |
| LOW | deepeval/metrics/conversational_dag/nodes.py | 112 | |
| LOW | deepeval/metrics/conversational_dag/nodes.py | 188 | |
| 179 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | tests/test_metrics/test_json_correctness_metric.py | 30 | actual_output="{'name': null}", |
| HIGH | tests/test_metrics/test_json_correctness_metric.py | 56 | actual_output="{'name': null}", |
| HIGH | tests/test_metrics/test_json_correctness_metric.py | 150 | actual_output="{'name': null}", |
| HIGH | tests/test_confident/test_prompt.py | 172 | prompt.push(text=f"Latest content {UUID}") |
| HIGH | tests/test_confident/test_prompt.py | 185 | prompt.push(text=f"Version 1 {UUID1}") |
| HIGH | tests/test_confident/test_prompt.py | 189 | prompt.push(text=f"Version 2 {UUID2}") |
| HIGH | tests/test_confident/test_prompt.py | 898 | prompt.push(messages=MESSAGES) |
| HIGH | tests/test_confident/test_prompt.py | 913 | prompt.push(messages=MESSAGES1) |
| HIGH | tests/test_confident/test_prompt.py | 918 | prompt.push(messages=MESSAGES2) |
| HIGH | tests/test_confident/test_prompt.py | 136 | prompt.push(text=TEXT) |
| HIGH | tests/test_confident/test_prompt.py | 153 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 263 | prompt_setup.push(text=f"Setup cache content {uuid.uuid4()}") |
| HIGH | tests/test_confident/test_prompt.py | 314 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 341 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 370 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 409 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 428 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 447 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 466 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 485 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 507 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 537 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 587 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 620 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 632 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 659 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 694 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 736 | prompt1.push( |
| HIGH | tests/test_confident/test_prompt.py | 767 | prompt.push(text="Main branch push") |
| HIGH | tests/test_confident/test_prompt.py | 771 | prompt.push(text="Different branch push", branch=self.BRANCH_NAME) |
| HIGH | tests/test_confident/test_prompt.py | 855 | prompt.push(messages=MESSAGES) |
| HIGH | tests/test_confident/test_prompt.py | 875 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 1054 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 1084 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 1118 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 1168 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 1221 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 1257 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 1269 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 1299 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 1337 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 1380 | prompt1.push( |
| HIGH | tests/test_confident/test_prompt.py | 1411 | prompt.push( |
| HIGH | tests/test_confident/test_prompt.py | 1417 | prompt.push( |
| HIGH | tests/test_confident/test_dataset.py | 125 | dataset.push(alias=self.PUSH_ALIAS) |
| HIGH | tests/test_confident/test_dataset.py | 162 | dataset.push(alias=self.PUSH_ALIAS) |
| HIGH | deepeval/prompt/prompt.py | 856 | return self.push( |
| HIGH | deepeval/simulator/simulation_graph/template.py | 40 | "index": null, |
| HIGH | deepeval/test_run/hyperparameters.py | 48 | value.push(_verbose=verbose) |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/test_integrations/test_exporter/readable_spans.py | 5 | # Create a simple span context |
| MEDIUM | tests/test_integrations/test_exporter/readable_spans.py | 13 | # Create the ReadableSpan with one attribute |
| MEDIUM | tests/test_integrations/test_exporter/readable_spans.py | 184 | # Create a multi-turn span context |
| MEDIUM | tests/test_integrations/test_exporter/readable_spans.py | 192 | # Create the multi-turn readable span |
| MEDIUM | …t_integrations/test_crewai/test_knowledge_retriever.py | 18 | # Create a knowledge source |
| MEDIUM | …t_integrations/test_crewai/test_knowledge_retriever.py | 24 | # Create an LLM with a temperature of 0 to ensure deterministic outputs |
| MEDIUM | …t_integrations/test_crewai/test_knowledge_retriever.py | 27 | # Create an agent with the knowledge store |
| MEDIUM | tests/test_integrations/test_langgraph/conftest.py | 78 | # Create a unique identifier for this test run |
| MEDIUM | …test_langgraph/apps/langgraph_metric_collection_app.py | 19 | # Create a Prompt object for prompt tracking |
| MEDIUM | …grations/test_openai_agents/test_scenerios/test_run.py | 63 | # Create the weather specialist agent |
| MEDIUM | …i_agents/test_scenerios/test_with_trace_and_wrapped.py | 82 | # Create the weather specialist agent |
| MEDIUM | …s/test_openai_agents/test_scenerios/test_with_trace.py | 65 | # Create the weather specialist agent |
| MEDIUM | …ons/test_openai_agents/test_scenerios/test_run_sync.py | 62 | # Create the weather specialist agent |
| MEDIUM | …ai_agents/test_scenerios/test_weather_agent_patched.py | 78 | # Create the weather specialist agent |
| MEDIUM | …test_openai_agents/test_scenerios/test_run_streamed.py | 63 | # Create the weather specialist agent |
| MEDIUM | tests/test_integrations/test_langchain/conftest.py | 58 | # Create a unique identifier for this test run |
| MEDIUM | …test_langchain/apps/langchain_metric_collection_app.py | 16 | # Create a Prompt object for prompt tracking |
| MEDIUM | tests/test_core/stubs.py | 189 | # Define the mock types |
| MEDIUM | tests/test_core/test_models/test_openai_model.py | 166 | # Create a mock parsed response |
| MEDIUM | tests/test_core/test_models/test_openai_model.py | 215 | # Create a mock that tracks the call arguments |
| MEDIUM | tests/test_core/test_models/test_openai_model.py | 263 | # Create a mock parsed response |
| MEDIUM | tests/test_core/test_cli/test_cli.py | 648 | # Create a real JSON file (with whitespace) so we can verify normalization. |
| MEDIUM | …agent_evals/test_langgraph/test_langgraph_component.py | 12 | # Define the tool |
| MEDIUM | …ocs/test_confident/test_integrations/test_langgraph.py | 23 | # Create a metric |
| MEDIUM | .scripts/changelog/extract_release_notes.py | 85 | # Create an empty file so the workflow knows fallback is needed |
| MEDIUM | deepeval/metrics/ragas.py | 76 | # Create a dataset from the test case |
| MEDIUM | deepeval/metrics/ragas.py | 516 | # Create a dataset from the test case |
| MEDIUM | deepeval/tracing/tracing.py | 589 | # Create a new event loop |
| MEDIUM | deepeval/tracing/tracing.py | 927 | # Create the base API span |
| MEDIUM | deepeval/synthesizer/synthesizer.py | 1490 | # Create the pandas DataFrame |
| MEDIUM | deepeval/benchmarks/math_qa/math_qa.py | 144 | # Create a DataFrame from task_results_data |
| MEDIUM | deepeval/benchmarks/bbq/bbq.py | 106 | # Create a DataFrame from task_results_data |
| MEDIUM | deepeval/benchmarks/human_eval/human_eval.py | 21 | # Create a restricted globals dictionary with only safe built-ins |
| MEDIUM | deepeval/benchmarks/human_eval/human_eval.py | 160 | # Create a DataFrame from task_results_data |
| MEDIUM | deepeval/benchmarks/logi_qa/logi_qa.py | 146 | # Create a DataFrame from task_results_data |
| MEDIUM | deepeval/benchmarks/drop/drop.py | 148 | # Create a DataFrame from task_results_data |
| MEDIUM | deepeval/benchmarks/truthful_qa/truthful_qa.py | 155 | # Create a DataFrame from task_results_data |
| MEDIUM | deepeval/benchmarks/big_bench_hard/big_bench_hard.py | 178 | # Create a DataFrame from task_results_data |
| MEDIUM | deepeval/benchmarks/squad/squad.py | 113 | # Create a DataFrame from task_results_data |
| MEDIUM | deepeval/benchmarks/mmlu/mmlu.py | 145 | # Create a DataFrame from task_results_data |
| MEDIUM | deepeval/benchmarks/equity_med_qa/equity_med_qa.py | 88 | # Create a DataFrame from task_results_data |
| MEDIUM | deepeval/benchmarks/hellaswag/hellaswag.py | 149 | # Create a DataFrame from task_results_data |
| MEDIUM | deepeval/test_run/test_run.py | 377 | # Create a single TraceMetricScores object instead of a list |
| MEDIUM | deepeval/test_run/hyperparameters.py | 72 | # Define the wrapper function that will be the actual decorator |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | deepeval/utils.py | 883 | Ensures that a required parameter is provided. If the parameter is `None`, raises a `DeepEvalError` with a help |
| HIGH | deepeval/utils.py | 915 | Imports an optional dependency module or raises a `DeepEvalError` if the module is not found. The error message |
| HIGH | deepeval/dataset/dataset.py | 259 | Load test cases from a CSV file. This method reads a CSV file, extracting test case data based on spec |
| HIGH | deepeval/dataset/dataset.py | 406 | Load test cases from a JSON file. This method reads a JSON file containing a list of objects, each rep |
| HIGH | deepeval/models/utils.py | 11 | Extract base model name from provider-prefixed format. This function is useful for extracting the actual model name |
| HIGH | deepeval/models/utils.py | 47 | Normalize and validate a provider API key stored as a SecretStr. Args: secret: The SecretS |
| HIGH | deepeval/models/retry_policy.py | 515 | Run a synchronous callable with a soft timeout enforced by a helper thread, with a global cap on concurrent tim |
| HIGH | deepeval/scorer/scorer.py | 276 | Calculate the toxicity score of a given text prediction using the Detoxify model. Args: pr |
| HIGH | deepeval/synthesizer/synthesizer.py | 1507 | Save synthetic goldens to a file. Args: file_type: Type of file to save as ('json' or 'csv'). |
| HIGH | deepeval/anthropic/utils.py | 10 | Return a short, human-readable summary string for an Anthropic-style multimodal `content` value. This is used |
| HIGH | deepeval/openai/utils.py | 42 | Return a short, human-readable summary string for an OpenAI-style multimodal `content` value. This is used to |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | …_integrations/test_pydanticai/test_span_interceptor.py | 801 | DeepEvalInstrumentationSettings(api_key="dummy", is_test_mode=False) |
| LOW | tests/test_confident/test_dataset.py | 181 | |
| LOW | …i_agent_evals/test_pydanticai/test_pydanticai_setup.py | 21 | # result = await agent.run("What are the LLMs?") |
| LOW | …_deepeval/test_llm_evals/test_component_level_evals.py | 61 | # dataset.evaluate(task) |
| LOW | docs/source.config.ts | 101 | // `next/image`. Our blog + tutorial content references dozens of |
| LOW | docs/app/layout.tsx | 61 | // this unless it overrides `openGraph.images` (the docs section |
| LOW | …ts/MetricTagsDisplayer/MetricTagsDisplayer.module.scss | 1 | // Metric classification tags shown under each metric's H1. Each variant |
| LOW | examples/tracing/test_chatbot.py | 1 | # from deepeval.tracing import trace, TraceType |
| LOW | examples/tracing/test_chatbot.py | 21 | # ], |
| LOW | examples/tracing/test_chatbot.py | 41 | # def retriever(self, input=input): |
| LOW | examples/tracing/test_chatbot.py | 61 | |
| LOW | examples/tracing/test_chatbot.py | 81 | # "Be at least 35 years old.", |
| LOW | deepeval/metrics/ragas.py | 261 | # """This metric checks the noise sensitivity using Ragas""" |
| LOW | deepeval/metrics/ragas.py | 281 | # import_ragas() |
| LOW | deepeval/metrics/ragas.py | 301 | # data = { |
| LOW | deepeval/tracing/types.py | 201 | |
| LOW | deepeval/tracing/context.py | 281 | # only callsite the user owns is the one wrapping the framework call. |
| LOW | deepeval/tracing/context.py | 601 | _pending_next_retriever_span.reset(token) |
| LOW | deepeval/tracing/otel/exporter.py | 121 | api_key: Optional[str] = None, # dynamic api key, |
| LOW | deepeval/tracing/otel/exporter.py | 181 | ################ Set Trace Attributes from ################ |
| LOW | deepeval/config/settings.py | 841 | # |
| LOW | deepeval/openai_agents/runner.py | 1 | # from __future__ import annotations |
| LOW | deepeval/openai_agents/runner.py | 21 | # from agents.models.interface import Model |
| LOW | deepeval/openai_agents/runner.py | 41 | # # Import observed provider/model helpers from our agent module |
| LOW | deepeval/openai_agents/runner.py | 61 | |
| LOW | deepeval/openai_agents/runner.py | 81 | # patched_get_model.__doc__ = original_get_model.__doc__ |
| LOW | deepeval/openai_agents/runner.py | 101 | # context: Optional[TContext] = None, |
| LOW | deepeval/openai_agents/runner.py | 121 | # span_type="custom", |
| LOW | deepeval/openai_agents/runner.py | 141 | # current_span.input = input |
| LOW | deepeval/openai_agents/runner.py | 161 | # update_trace_attributes(output=_output) |
| LOW | deepeval/openai_agents/runner.py | 181 | # metadata: Optional[dict] = None, |
| LOW | deepeval/openai_agents/runner.py | 201 | # metric_collection=metric_collection, |
| LOW | deepeval/openai_agents/runner.py | 221 | # **kwargs, # backwards compatibility |
| LOW | deepeval/openai_agents/runner.py | 241 | # max_turns: int = DEFAULT_MAX_TURNS, |
| LOW | deepeval/openai_agents/runner.py | 261 | # metrics=metrics, |
| LOW | deepeval/openai_agents/runner.py | 281 | # if current_span: |
| LOW | deepeval/openai_agents/runner.py | 301 | # try: |
| LOW | deepeval/openai_agents/runner.py | 321 | # output: Any = None, |
| LOW | deepeval/openai_agents/runner.py | 341 | # if thread_id: |
| LOW | deepeval/models/retry_policy.py | 861 | |
| LOW | deepeval/models/retry_policy.py | 901 | + _HTTPX_NET_EXCS |
| LOW | deepeval/integrations/pydantic_ai/instrumentator.py | 281 | # within a process so this is safe across threads / asyncio tasks. |
| LOW | deepeval/integrations/pydantic_ai/instrumentator.py | 301 | # change) are written at ``on_end`` instead of here, so the OTel span |
| LOW | deepeval/integrations/pydantic_ai/instrumentator.py | 321 | # OTel sees no parent and the exporter would otherwise emit it as a |
| LOW | deepeval/evaluate/utils.py | 161 | # Fall back to the golden's input when the trace didn't capture a |
| LOW | deepeval/evaluate/utils.py | 181 | # ``metadata``, ``environment``) are forwarded from the trace so that |
| LOW | deepeval/test_run/api.py | 21 | # multimodal_input: Optional[str] = Field(None, alias="multimodalInput") |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_integrations/test_langgraph/conftest.py | 630 | # Print values when present |
| LOW | …ntegrations/test_langchain/apps/langchain_agent_app.py | 82 | # Check if we have tool calls |
| LOW | …ntegrations/test_langchain/apps/langchain_agent_app.py | 131 | # Check if we have tool calls |
| LOW | tests/test_core/test_evaluation/test_console_report.py | 113 | # Check if the aggregate table is built correctly |
| LOW | tests/test_core/test_evaluation/test_console_report.py | 119 | # Check if it's a Panel and contains the aggregate metrics table |
| LOW | …al/test_ai_agent_evals/test_setup_end_to_end_python.py | 24 | # Loop through dataset |
| LOW | deepeval/utils.py | 386 | # Check if obj is iterable (but not a string) |
| LOW | deepeval/metrics/dag/nodes.py | 395 | # Check if all children are ClassificationResultNode and their classifications are boolean |
| LOW | deepeval/metrics/dag/nodes.py | 405 | # Check if there is one True and one False classification |
| LOW | deepeval/metrics/dag/nodes.py | 520 | # Check if children is not empty |
| LOW | deepeval/metrics/dag/nodes.py | 531 | # Check if the verdict attribute of each child is a string |
| LOW | deepeval/metrics/conversational_dag/nodes.py | 467 | # Check if all children are ClassificationResultNode and their classifications are boolean |
| LOW | deepeval/metrics/conversational_dag/nodes.py | 479 | # Check if there is one True and one False classification |
| LOW | deepeval/metrics/conversational_dag/nodes.py | 630 | # Check if children is not empty |
| LOW | deepeval/metrics/conversational_dag/nodes.py | 643 | # Check if the verdict attribute of each child is a string |
| LOW | deepeval/metrics/g_eval/utils.py | 193 | # Check if both criteria and evaluation_steps are not None at the same time |
| LOW | deepeval/metrics/g_eval/utils.py | 199 | # Check if criteria is provided, it cannot be an empty string |
| LOW | deepeval/metrics/g_eval/utils.py | 203 | # Check if evaluation_steps is provided, it cannot be an empty list |
| LOW | deepeval/metrics/g_eval/utils.py | 224 | # Check if ranges overlap |
| LOW | deepeval/tracing/tracing.py | 575 | # Add the trace to the queue |
| LOW | deepeval/tracing/tracing.py | 822 | # Add children to the stack for processing |
| LOW | deepeval/models/llms/openrouter_model.py | 332 | # Check if response has cost information |
| LOW | deepeval/synthesizer/chunking/context_generator.py | 341 | # Check if chunk_size and max_context_size is valid for document lengths |
| LOW | deepeval/evaluate/execute/agentic.py | 303 | # Set it to None to ensure the test_case is added |
| LOW | deepeval/prompt/prompt.py | 520 | # Check if we need to bootstrap the cache |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/test_core/test_tracing/apps/sync_app.py | 212 | research_prompt = f"""Based on the following retrieved documents, provide a comprehensive research analysis of ' |
| MEDIUM | docs/src/sections/home/HomeSection.module.scss | 96 | // Duplicated content length is 200%; shift by half for a seamless loop. |
| MEDIUM | deepeval/metrics/role_violation/template.py | 12 | return f"""Based on the role violations identified: {role_violations}, and the role violation score: {score}, pl |
| MEDIUM | deepeval/metrics/misuse/template.py | 15 | return f"""Based on the misuse violations identified: {misuse_violations}, and the misuse score: {score}, please |
| MEDIUM | deepeval/metrics/pii_leakage/template.py | 14 | return f"""Based on the privacy violations identified: {privacy_violations}, and the privacy violation score: {s |
| MEDIUM | deepeval/metrics/faithfulness/template.py | 13 | f"""Based on the given {'excerpt' if multimodal else 'text'}, please extract a comprehensive list of FACTUAL |
| MEDIUM | deepeval/metrics/faithfulness/template.py | 63 | f"""Based on the given {'excerpt (text and images)' if multimodal else 'text'}, please generate a comprehens |
| MEDIUM | deepeval/metrics/non_advice/template.py | 15 | return f"""Based on the inappropriate advice violations identified: {non_advice_violations}, and the non-advice |
| MEDIUM | deepeval/tracing/otel/context_aware_processor.py | 139 | # Trace-shape testing override: when a test harness has set |
| LOW | deepeval/dataset/utils.py | 221 | # already a Task so just return it |
| LOW | deepeval/dataset/utils.py | 225 | # If it is a future, it is already scheduled, so just return it |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | …acing/schemas/masking/comprehensive_masked_schema.json | 15 | "name": "John Doe" |
| LOW | …acing/schemas/masking/comprehensive_masked_schema.json | 23 | "name": "John Doe" |
| LOW | …acing/schemas/masking/comprehensive_masked_schema.json | 39 | "name": "John Doe" |
| LOW | …acing/schemas/masking/comprehensive_masked_schema.json | 47 | "name": "John Doe" |
| LOW | …ts/test_core/test_tracing/test_masking/test_masking.py | 80 | "email": "user@example.com", |
| LOW | …ts/test_core/test_tracing/test_masking/test_masking.py | 82 | "name": "John Doe", |
| LOW | …test_tracing/test_configuration/test_masking_config.py | 183 | return "user@example.com" |
| LOW | …test_tracing/test_configuration/test_masking_config.py | 186 | assert result == "user@example.com" # Function returns original |
| LOW | deepeval/metrics/knowledge_retention/template.py | 84 | "Phone Number": "555-1029" |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | deepeval/models/llms/gemini_model.py | 52 | api_key="your-api-key" |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_integrations/utils.py | 441 | async def my_function(): |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_core/test_prompts/test_interpolation.py | 568 | This ensures users don't forget to provide required template variables. |