"""Defines an interface to common OpenAI models."""
from abc import abstractmethod
from typing import Any, Generic, NoReturn, TypeVar
from kedro.io import AbstractDataset, DatasetError
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
OPENAI_TYPE = TypeVar("OPENAI_TYPE")
class OpenAIDataset(AbstractDataset[None, OPENAI_TYPE], Generic[OPENAI_TYPE]):
"""OpenAI dataset used to access credentials at runtime."""
@property
@abstractmethod
def constructor(self) -> OPENAI_TYPE:
"""Return the OpenAI class to construct in the _load method."""
def __init__(self, credentials: dict[str, str], kwargs: dict[str, Any] = None):
"""Constructor.
Args:
credentials: must contain `openai_api_base` and `openai_api_key`.
kwargs: keyword arguments passed to the underlying constructor.
"""
self.openai_api_base = credentials["openai_api_base"]
self.openai_api_key = credentials["openai_api_key"]
self.kwargs = kwargs or {}
def _describe(self) -> dict[str, Any]:
return {**self.kwargs}
def save(self, data: None) -> NoReturn:
raise DatasetError(f"{self.__class__.__name__} is a read only dataset type")
def load(self) -> OPENAI_TYPE:
return self.constructor(
openai_api_base=self.openai_api_base,
openai_api_key=self.openai_api_key,
**self.kwargs,
)
[docs]
class OpenAIEmbeddingsDataset(OpenAIDataset[OpenAIEmbeddings]):
"""``OpenAIEmbeddingsDataset`` loads a OpenAIEmbeddings `langchain <https://python.langchain.com/>`_ model.
Example usage for the :doc:`YAML API <kedro:data/data_catalog_yaml_examples>`:
catalog.yml:
.. code-block:: yaml
text_embedding_ada_002:
type: langchain.OpenAIEmbeddingsDataset
kwargs:
model: "text-embedding-ada-002"
credentials: openai
credentials.yml:
.. code-block:: yaml
openai:
openai_api_base: <openai-api-base>
openai_api_key: <openai-api-key>
Example usage for the
`Python API <https://docs.kedro.org/en/stable/data/\
advanced_data_catalog_usage.html>`_:
.. code-block:: pycon
>>> from kedro_datasets_experimental.langchain import OpenAIEmbeddingsDataset
>>>
>>> embeddings = OpenAIEmbeddingsDataset(
... credentials={
... "openai_api_base": "<openai-api-base>",
... "openai_api_key": "<openai-api-key>",
... },
... kwargs={
... "model": "text-embedding-ada-002",
... },
... ).load()
>>>
>>> # See: https://python.langchain.com/docs/integrations/text_embedding/openai
>>> embeddings.embed_query("Hello world!")
"""
@property
def constructor(self) -> type[OpenAIEmbeddings]:
return OpenAIEmbeddings
[docs]
class ChatOpenAIDataset(OpenAIDataset[ChatOpenAI]):
"""``ChatOpenAIDataset`` loads a ChatOpenAI `langchain <https://python.langchain.com/>`_ model.
Example usage for the :doc:`YAML API <kedro:data/data_catalog_yaml_examples>`:
catalog.yml:
.. code-block:: yaml
gpt_3_5_turbo:
type: langchain.ChatOpenAIDataset
kwargs:
model: "gpt-3.5-turbo"
temperature: 0.0
credentials: openai
credentials.yml:
.. code-block:: yaml
openai:
openai_api_base: <openai-api-base>
openai_api_key: <openai-api-key>
Example usage for the
`Python API <https://docs.kedro.org/en/stable/data/\
advanced_data_catalog_usage.html>`_:
.. code-block:: pycon
>>> from kedro_datasets_experimental.langchain import ChatOpenAIDataset
>>>
>>> llm = ChatOpenAIDataset(
... credentials={
... "openai_api_base": "<openai-api-base>",
... "openai_api_key": "<openai-api-key>",
... },
... kwargs={
... "model": "gpt-3.5-turbo",
... "temperature": 0.0,
... },
... ).load()
>>>
>>> # See: https://python.langchain.com/docs/integrations/chat/openai
>>> llm.invoke("Hello world!")
"""
@property
def constructor(self) -> type[ChatOpenAI]:
return ChatOpenAI