vllm.entrypoints.pooling.base.protocol ¶

ChatRequestMixin ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/pooling/base/protocol.py

class ChatRequestMixin(OpenAIBaseModel):
    messages: list[ChatCompletionMessageParam]

    add_generation_prompt: bool = Field(
        default=False,
        description=(
            "If true, the generation prompt will be added to the chat template. "
            "This is a parameter used by chat template in tokenizer config of the "
            "model."
        ),
    )

    continue_final_message: bool = Field(
        default=False,
        description=(
            "If this is set, the chat will be formatted so that the final "
            "message in the chat is open-ended, without any EOS tokens. The "
            "model will continue this message rather than starting a new one. "
            'This allows you to "prefill" part of the model\'s response for it. '
            "Cannot be used at the same time as `add_generation_prompt`."
        ),
    )

    add_special_tokens: bool = Field(
        default=False,
        description=(
            "If true, special tokens (e.g. BOS) will be added to the prompt "
            "on top of what is added by the chat template. "
            "For most models, the chat template takes care of adding the "
            "special tokens so this should be set to false (as is the "
            "default)."
        ),
    )

    chat_template: str | None = Field(
        default=None,
        description=(
            "A Jinja template to use for this conversion. "
            "As of transformers v4.44, default chat template is no longer "
            "allowed, so you must provide a chat template if the tokenizer "
            "does not define one."
        ),
    )

    chat_template_kwargs: dict[str, Any] | None = Field(
        default=None,
        description=(
            "Additional keyword args to pass to the template renderer. "
            "Will be accessible by the chat template."
        ),
    )

    @model_validator(mode="before")
    @classmethod
    def check_generation_prompt(cls, data):
        if data.get("continue_final_message") and data.get("add_generation_prompt"):
            raise ValueError(
                "Cannot set both `continue_final_message` and "
                "`add_generation_prompt` to True."
            )
        return data

add_generation_prompt `class-attribute` `instance-attribute` ¶

add_generation_prompt: bool = Field(
    default=False,
    description="If true, the generation prompt will be added to the chat template. This is a parameter used by chat template in tokenizer config of the model.",
)

add_special_tokens `class-attribute` `instance-attribute` ¶

add_special_tokens: bool = Field(
    default=False,
    description="If true, special tokens (e.g. BOS) will be added to the prompt on top of what is added by the chat template. For most models, the chat template takes care of adding the special tokens so this should be set to false (as is the default).",
)

chat_template `class-attribute` `instance-attribute` ¶

chat_template: str | None = Field(
    default=None,
    description="A Jinja template to use for this conversion. As of transformers v4.44, default chat template is no longer allowed, so you must provide a chat template if the tokenizer does not define one.",
)

chat_template_kwargs `class-attribute` `instance-attribute` ¶

chat_template_kwargs: dict[str, Any] | None = Field(
    default=None,
    description="Additional keyword args to pass to the template renderer. Will be accessible by the chat template.",
)

continue_final_message `class-attribute` `instance-attribute` ¶

continue_final_message: bool = Field(
    default=False,
    description='If this is set, the chat will be formatted so that the final message in the chat is open-ended, without any EOS tokens. The model will continue this message rather than starting a new one. This allows you to "prefill" part of the model\'s response for it. Cannot be used at the same time as `add_generation_prompt`.',
)

messages `instance-attribute` ¶

messages: list[ChatCompletionMessageParam]

check_generation_prompt `classmethod` ¶

check_generation_prompt(data)

Source code in vllm/entrypoints/pooling/base/protocol.py

@model_validator(mode="before")
@classmethod
def check_generation_prompt(cls, data):
    if data.get("continue_final_message") and data.get("add_generation_prompt"):
        raise ValueError(
            "Cannot set both `continue_final_message` and "
            "`add_generation_prompt` to True."
        )
    return data

CompletionRequestMixin ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/pooling/base/protocol.py

class CompletionRequestMixin(OpenAIBaseModel):
    input: list[int] | list[list[int]] | str | list[str]

    add_special_tokens: bool = Field(
        default=True,
        description=(
            "If true (the default), special tokens (e.g. BOS) will be added to "
            "the prompt."
        ),
    )

add_special_tokens `class-attribute` `instance-attribute` ¶

add_special_tokens: bool = Field(
    default=True,
    description="If true (the default), special tokens (e.g. BOS) will be added to the prompt.",
)

input `instance-attribute` ¶

input: list[int] | list[list[int]] | str | list[str]

PoolingBasicRequestMixin ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/pooling/base/protocol.py

class PoolingBasicRequestMixin(OpenAIBaseModel):
    model: str | None = None
    user: str | None = None
    truncate_prompt_tokens: Annotated[int, Field(ge=-1)] | None = None

    request_id: str = Field(
        default_factory=random_uuid,
        description=(
            "The request_id related to this request. If the caller does "
            "not set it, a random_uuid will be generated. This id is used "
            "through out the inference process and return in response."
        ),
    )

    priority: int = Field(
        default=0,
        description=(
            "The priority of the request (lower means earlier handling; "
            "default: 0). Any priority other than 0 will raise an error "
            "if the served model does not use priority scheduling."
        ),
    )

model `class-attribute` `instance-attribute` ¶

model: str | None = None

priority `class-attribute` `instance-attribute` ¶

priority: int = Field(
    default=0,
    description="The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.",
)

request_id `class-attribute` `instance-attribute` ¶

request_id: str = Field(
    default_factory=random_uuid,
    description="The request_id related to this request. If the caller does not set it, a random_uuid will be generated. This id is used through out the inference process and return in response.",
)

truncate_prompt_tokens `class-attribute` `instance-attribute` ¶

truncate_prompt_tokens: (
    Annotated[int, Field(ge=-1)] | None
) = None

user `class-attribute` `instance-attribute` ¶

user: str | None = None

vllm.entrypoints.pooling.base.protocol ¶

ChatRequestMixin ¶

add_generation_prompt class-attribute instance-attribute ¶

add_special_tokens class-attribute instance-attribute ¶

chat_template class-attribute instance-attribute ¶

chat_template_kwargs class-attribute instance-attribute ¶

continue_final_message class-attribute instance-attribute ¶

messages instance-attribute ¶

check_generation_prompt classmethod ¶

CompletionRequestMixin ¶

add_special_tokens class-attribute instance-attribute ¶

input instance-attribute ¶

PoolingBasicRequestMixin ¶

model class-attribute instance-attribute ¶

priority class-attribute instance-attribute ¶

request_id class-attribute instance-attribute ¶

truncate_prompt_tokens class-attribute instance-attribute ¶

user class-attribute instance-attribute ¶

add_generation_prompt `class-attribute` `instance-attribute` ¶

add_special_tokens `class-attribute` `instance-attribute` ¶

chat_template `class-attribute` `instance-attribute` ¶

chat_template_kwargs `class-attribute` `instance-attribute` ¶

continue_final_message `class-attribute` `instance-attribute` ¶

messages `instance-attribute` ¶

check_generation_prompt `classmethod` ¶

add_special_tokens `class-attribute` `instance-attribute` ¶

input `instance-attribute` ¶

model `class-attribute` `instance-attribute` ¶

priority `class-attribute` `instance-attribute` ¶

request_id `class-attribute` `instance-attribute` ¶

truncate_prompt_tokens `class-attribute` `instance-attribute` ¶

user `class-attribute` `instance-attribute` ¶