@@ -569,6 +569,170 @@ def test_loss_analysis_result_show(self, capsys):
569569 assert "c1" in captured .out
570570
571571
572+ def _make_eval_result (
573+ metrics = None ,
574+ candidate_names = None ,
575+ ):
576+ """Helper to create an EvaluationResult with the given metrics and candidates."""
577+ metrics = metrics or ["task_success_v1" ]
578+ candidate_names = candidate_names or ["agent-1" ]
579+
580+ metric_results = {}
581+ for m in metrics :
582+ metric_results [m ] = common_types .EvalCaseMetricResult (metric_name = m )
583+
584+ eval_case_results = [
585+ common_types .EvalCaseResult (
586+ eval_case_index = 0 ,
587+ response_candidate_results = [
588+ common_types .ResponseCandidateResult (
589+ response_index = 0 ,
590+ metric_results = metric_results ,
591+ )
592+ ],
593+ )
594+ ]
595+ metadata = common_types .EvaluationRunMetadata (
596+ candidate_names = candidate_names ,
597+ )
598+ return common_types .EvaluationResult (
599+ eval_case_results = eval_case_results ,
600+ metadata = metadata ,
601+ )
602+
603+
604+ class TestResolveMetricName :
605+ """Unit tests for _resolve_metric_name."""
606+
607+ def test_none_returns_none (self ):
608+ assert _evals_utils ._resolve_metric_name (None ) is None
609+
610+ def test_string_passes_through (self ):
611+ assert _evals_utils ._resolve_metric_name ("task_success_v1" ) == "task_success_v1"
612+
613+ def test_metric_object_extracts_name (self ):
614+ metric = common_types .Metric (name = "multi_turn_task_success_v1" )
615+ assert _evals_utils ._resolve_metric_name (metric ) == "multi_turn_task_success_v1"
616+
617+ def test_object_with_name_attr (self ):
618+ """Tests that any object with a .name attribute works (e.g., LazyLoadedPrebuiltMetric)."""
619+
620+ class FakeMetric :
621+ name = "tool_use_quality_v1"
622+
623+ assert _evals_utils ._resolve_metric_name (FakeMetric ()) == "tool_use_quality_v1"
624+
625+ def test_lazy_loaded_prebuilt_metric_resolves_versioned_name (self ):
626+ """Tests that LazyLoadedPrebuiltMetric resolves to the versioned API spec name."""
627+
628+ class FakeLazyMetric :
629+ name = "MULTI_TURN_TASK_SUCCESS"
630+
631+ def _get_api_metric_spec_name (self ):
632+ return "multi_turn_task_success_v1"
633+
634+ assert (
635+ _evals_utils ._resolve_metric_name (FakeLazyMetric ())
636+ == "multi_turn_task_success_v1"
637+ )
638+
639+ def test_lazy_loaded_prebuilt_metric_falls_back_to_name (self ):
640+ """Tests fallback to .name when _get_api_metric_spec_name returns None."""
641+
642+ class FakeLazyMetricNoSpec :
643+ name = "CUSTOM_METRIC"
644+
645+ def _get_api_metric_spec_name (self ):
646+ return None
647+
648+ assert (
649+ _evals_utils ._resolve_metric_name (FakeLazyMetricNoSpec ()) == "CUSTOM_METRIC"
650+ )
651+
652+
653+ class TestResolveLossAnalysisConfig :
654+ """Unit tests for _resolve_loss_analysis_config."""
655+
656+ def test_auto_infer_single_metric_and_candidate (self ):
657+ eval_result = _make_eval_result (
658+ metrics = ["task_success_v1" ], candidate_names = ["agent-1" ]
659+ )
660+ resolved = _evals_utils ._resolve_loss_analysis_config (eval_result = eval_result )
661+ assert resolved .metric == "task_success_v1"
662+ assert resolved .candidate == "agent-1"
663+
664+ def test_explicit_metric_and_candidate (self ):
665+ eval_result = _make_eval_result (
666+ metrics = ["m1" , "m2" ], candidate_names = ["c1" , "c2" ]
667+ )
668+ resolved = _evals_utils ._resolve_loss_analysis_config (
669+ eval_result = eval_result , metric = "m1" , candidate = "c2"
670+ )
671+ assert resolved .metric == "m1"
672+ assert resolved .candidate == "c2"
673+
674+ def test_config_provides_metric_and_candidate (self ):
675+ eval_result = _make_eval_result (metrics = ["m1" ], candidate_names = ["c1" ])
676+ config = common_types .LossAnalysisConfig (
677+ metric = "m1" , candidate = "c1" , predefined_taxonomy = "my_taxonomy"
678+ )
679+ resolved = _evals_utils ._resolve_loss_analysis_config (
680+ eval_result = eval_result , config = config
681+ )
682+ assert resolved .metric == "m1"
683+ assert resolved .candidate == "c1"
684+ assert resolved .predefined_taxonomy == "my_taxonomy"
685+
686+ def test_explicit_args_override_config (self ):
687+ eval_result = _make_eval_result (
688+ metrics = ["m1" , "m2" ], candidate_names = ["c1" , "c2" ]
689+ )
690+ config = common_types .LossAnalysisConfig (metric = "m1" , candidate = "c1" )
691+ resolved = _evals_utils ._resolve_loss_analysis_config (
692+ eval_result = eval_result , config = config , metric = "m2" , candidate = "c2"
693+ )
694+ assert resolved .metric == "m2"
695+ assert resolved .candidate == "c2"
696+
697+ def test_error_multiple_metrics_no_explicit (self ):
698+ eval_result = _make_eval_result (metrics = ["m1" , "m2" ], candidate_names = ["c1" ])
699+ with pytest .raises (ValueError , match = "multiple metrics" ):
700+ _evals_utils ._resolve_loss_analysis_config (eval_result = eval_result )
701+
702+ def test_error_multiple_candidates_no_explicit (self ):
703+ eval_result = _make_eval_result (metrics = ["m1" ], candidate_names = ["c1" , "c2" ])
704+ with pytest .raises (ValueError , match = "multiple candidates" ):
705+ _evals_utils ._resolve_loss_analysis_config (eval_result = eval_result )
706+
707+ def test_error_invalid_metric (self ):
708+ eval_result = _make_eval_result (metrics = ["m1" ], candidate_names = ["c1" ])
709+ with pytest .raises (ValueError , match = "not found in eval_result" ):
710+ _evals_utils ._resolve_loss_analysis_config (
711+ eval_result = eval_result , metric = "nonexistent"
712+ )
713+
714+ def test_error_invalid_candidate (self ):
715+ eval_result = _make_eval_result (metrics = ["m1" ], candidate_names = ["c1" ])
716+ with pytest .raises (ValueError , match = "not found in eval_result" ):
717+ _evals_utils ._resolve_loss_analysis_config (
718+ eval_result = eval_result , candidate = "nonexistent"
719+ )
720+
721+ def test_no_candidates_defaults_to_candidate_1 (self ):
722+ eval_result = _make_eval_result (metrics = ["m1" ], candidate_names = [])
723+ eval_result = eval_result .model_copy (
724+ update = {"metadata" : common_types .EvaluationRunMetadata ()}
725+ )
726+ resolved = _evals_utils ._resolve_loss_analysis_config (eval_result = eval_result )
727+ assert resolved .metric == "m1"
728+ assert resolved .candidate == "candidate_1"
729+
730+ def test_no_eval_case_results_raises (self ):
731+ eval_result = common_types .EvaluationResult ()
732+ with pytest .raises (ValueError , match = "no metric results" ):
733+ _evals_utils ._resolve_loss_analysis_config (eval_result = eval_result )
734+
735+
572736class TestEvals :
573737 """Unit tests for the GenAI client."""
574738
0 commit comments