import { APIResource } from "../../core/resource.js"; import * as Shared from "../shared.js"; import * as ResponsesAPI from "../responses/responses.js"; export declare class GraderModels extends APIResource { } /** * A LabelModelGrader object which uses a model to assign labels to each item in * the evaluation. */ export interface LabelModelGrader { input: Array; /** * The labels to assign to each item in the evaluation. */ labels: Array; /** * The model to use for the evaluation. Must support structured outputs. */ model: string; /** * The name of the grader. */ name: string; /** * The labels that indicate a passing result. Must be a subset of labels. */ passing_labels: Array; /** * The object type, which is always `label_model`. */ type: 'label_model'; } export declare namespace LabelModelGrader { /** * A message input to the model with a role indicating instruction following * hierarchy. Instructions given with the `developer` or `system` role take * precedence over instructions given with the `user` role. Messages with the * `assistant` role are presumed to have been generated by the model in previous * interactions. */ interface Input { /** * Inputs to the model - can contain template strings. */ content: string | ResponsesAPI.ResponseInputText | Input.OutputText | Input.InputImage | ResponsesAPI.ResponseInputAudio | Array; /** * The role of the message input. One of `user`, `assistant`, `system`, or * `developer`. */ role: 'user' | 'assistant' | 'system' | 'developer'; /** * The type of the message input. Always `message`. */ type?: 'message'; } namespace Input { /** * A text output from the model. */ interface OutputText { /** * The text output from the model. */ text: string; /** * The type of the output text. Always `output_text`. */ type: 'output_text'; } /** * An image input to the model. */ interface InputImage { /** * The URL of the image input. */ image_url: string; /** * The type of the image input. Always `input_image`. */ type: 'input_image'; /** * The detail level of the image to be sent to the model. One of `high`, `low`, or * `auto`. Defaults to `auto`. */ detail?: string; } } } /** * A MultiGrader object combines the output of multiple graders to produce a single * score. */ export interface MultiGrader { /** * A formula to calculate the output based on grader results. */ calculate_output: string; /** * A StringCheckGrader object that performs a string comparison between input and * reference using a specified operation. */ graders: StringCheckGrader | TextSimilarityGrader | PythonGrader | ScoreModelGrader | LabelModelGrader; /** * The name of the grader. */ name: string; /** * The object type, which is always `multi`. */ type: 'multi'; } /** * A PythonGrader object that runs a python script on the input. */ export interface PythonGrader { /** * The name of the grader. */ name: string; /** * The source code of the python script. */ source: string; /** * The object type, which is always `python`. */ type: 'python'; /** * The image tag to use for the python script. */ image_tag?: string; } /** * A ScoreModelGrader object that uses a model to assign a score to the input. */ export interface ScoreModelGrader { /** * The input text. This may include template strings. */ input: Array; /** * The model to use for the evaluation. */ model: string; /** * The name of the grader. */ name: string; /** * The object type, which is always `score_model`. */ type: 'score_model'; /** * The range of the score. Defaults to `[0, 1]`. */ range?: Array; /** * The sampling parameters for the model. */ sampling_params?: ScoreModelGrader.SamplingParams; } export declare namespace ScoreModelGrader { /** * A message input to the model with a role indicating instruction following * hierarchy. Instructions given with the `developer` or `system` role take * precedence over instructions given with the `user` role. Messages with the * `assistant` role are presumed to have been generated by the model in previous * interactions. */ interface Input { /** * Inputs to the model - can contain template strings. */ content: string | ResponsesAPI.ResponseInputText | Input.OutputText | Input.InputImage | ResponsesAPI.ResponseInputAudio | Array; /** * The role of the message input. One of `user`, `assistant`, `system`, or * `developer`. */ role: 'user' | 'assistant' | 'system' | 'developer'; /** * The type of the message input. Always `message`. */ type?: 'message'; } namespace Input { /** * A text output from the model. */ interface OutputText { /** * The text output from the model. */ text: string; /** * The type of the output text. Always `output_text`. */ type: 'output_text'; } /** * An image input to the model. */ interface InputImage { /** * The URL of the image input. */ image_url: string; /** * The type of the image input. Always `input_image`. */ type: 'input_image'; /** * The detail level of the image to be sent to the model. One of `high`, `low`, or * `auto`. Defaults to `auto`. */ detail?: string; } } /** * The sampling parameters for the model. */ interface SamplingParams { /** * The maximum number of tokens the grader model may generate in its response. */ max_completions_tokens?: number | null; /** * Constrains effort on reasoning for * [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently * supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning * effort can result in faster responses and fewer tokens used on reasoning in a * response. * * Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning * effort. */ reasoning_effort?: Shared.ReasoningEffort | null; /** * A seed value to initialize the randomness, during sampling. */ seed?: number | null; /** * A higher temperature increases randomness in the outputs. */ temperature?: number | null; /** * An alternative to temperature for nucleus sampling; 1.0 includes all tokens. */ top_p?: number | null; } } /** * A StringCheckGrader object that performs a string comparison between input and * reference using a specified operation. */ export interface StringCheckGrader { /** * The input text. This may include template strings. */ input: string; /** * The name of the grader. */ name: string; /** * The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`. */ operation: 'eq' | 'ne' | 'like' | 'ilike'; /** * The reference text. This may include template strings. */ reference: string; /** * The object type, which is always `string_check`. */ type: 'string_check'; } /** * A TextSimilarityGrader object which grades text based on similarity metrics. */ export interface TextSimilarityGrader { /** * The evaluation metric to use. One of `cosine`, `fuzzy_match`, `bleu`, `gleu`, * `meteor`, `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, or `rouge_l`. */ evaluation_metric: 'cosine' | 'fuzzy_match' | 'bleu' | 'gleu' | 'meteor' | 'rouge_1' | 'rouge_2' | 'rouge_3' | 'rouge_4' | 'rouge_5' | 'rouge_l'; /** * The text being graded. */ input: string; /** * The name of the grader. */ name: string; /** * The text being graded against. */ reference: string; /** * The type of grader. */ type: 'text_similarity'; } export declare namespace GraderModels { export { type LabelModelGrader as LabelModelGrader, type MultiGrader as MultiGrader, type PythonGrader as PythonGrader, type ScoreModelGrader as ScoreModelGrader, type StringCheckGrader as StringCheckGrader, type TextSimilarityGrader as TextSimilarityGrader, }; } //# sourceMappingURL=grader-models.d.ts.map