class Dataset

source code

__init__

source code

def __init__(
        self,
        data: Any,
        *,
        name: str = None,
        artifacts: Dict[str, Any] = None,
        with_ids: Optional[bool] = False,
        api_key: Optional[str] = None,
    ) -> None:

Initialize a local dataset.

Arguments:

  • data - The data for populating the dataset.
  • name - The name of the dataset. To create a dataset for a specific project
  • artifacts - Dataset metadata. This is an optional dict.
  • with_ids - If platform entry ids are passed with the data. Defaults to False.
  • api_key - API key for accessing the Unify API. If None, it attempts to

Raises:

  • UnifyError: If the API key is missing.

properties


name

source code

def name(self) -> str:

Name of the dataset.

setters


set_name

source code

def set_name(self, name: str) -> Self:

Set the name of the dataset.

Arguments:

  • name - The name to set the dataset to.

Returns:

This dataset, useful for chaining methods.

methods


add

source code

def add(
        self,
        other: Union[
            Dataset,
            str,
            Dict,
            Prompt,
            int,
            List[Union[str, Dict, Prompt]],
        ],
    ) -> Self:

Adds another dataset to this one, return a new Dataset instance, with this new dataset receiving all unique queries from the other added dataset.

Arguments:

  • other - The other dataset being added to this one.

Returns:

The new dataset following the addition.


download

source code

def download(self, overwrite: bool = False) -> Self:

Downloads all unique upstream data from the user account to the local dataset. This function will not upload any unique values stored locally. Use sync to synchronize and superset the datasets in both directions. Set overwrite=True to disregard any pre-existing data stored in this class.

Arguments:

  • overwrite - Whether to overwrite the local data, if any already exists

Returns:

This dataset after the in-place download, useful for chaining methods.


from_upstream

source code

def from_upstream(
        name: str,
        api_key: Optional[str] = None,
    ) -> Dataset:

Initialize a local dataset from the upstream dataset.

Arguments:

  • name - The name of the dataset.
  • api_key - API key for accessing the Unify API. If None, it attempts to

Returns:

The dataset, with contents downloaded from upstream.

Raises:

  • UnifyError: If the API key is missing.

inplace_add

source code

def inplace_add(
        self,
        other: Union[
            Dataset,
            str,
            Dict,
            Prompt,
            int,
            List[Union[str, Dict, Prompt]],
        ],
    ) -> Self:

Adds another dataset to this one, with this dataset receiving all unique queries from the other added dataset.

Arguments:

  • other - The other dataset being added to this one.

Returns:

This dataset following the in-place addition.


inplace_sub

source code

def inplace_sub(
        self,
        other: Union[Dataset, str, Dict, Prompt, List[Union[str, Dict, Prompt]]],
    ) -> Self:

Subtracts another dataset from this one, with this dataset losing all queries from the other subtracted dataset.

Arguments:

  • other - The other dataset being added to this one.

Returns:

This dataset following the in-place subtraction.


sub

source code

def sub(
        self,
        other: Union[Dataset, str, Dict, Prompt, List[Union[str, Dict, Prompt]]],
    ) -> Self:

Subtracts another dataset from this one, return a new Dataset instance, with this new dataset losing all queries from the other subtracted dataset.

Arguments:

  • other - The other dataset being added to this one.

Returns:

The new dataset following the subtraction.


sync

source code

def sync(self) -> Self:

Synchronize the dataset in both directions, downloading any values missing locally, and uploading any values missing from upstream in the account.

Returns:

This dataset after the in-place sync, useful for chaining methods.


upload

source code

def upload(self, overwrite: bool = False) -> Self:

Uploads all unique local data in the dataset to the user account upstream. This function will not download any uniques from upstream. Use sync to synchronize and superset the datasets in both directions. Set overwrite=True to disregard any pre-existing upstream data.

Arguments:

  • overwrite - Whether to overwrite the upstream dataset if it already exists.

Returns:

This dataset, useful for chaining methods.


upstream_diff

source code

def upstream_diff(self) -> Self:

Prints the difference between the local dataset and the upstream dataset.

Returns:

This dataset after printing the diff, useful for chaining methods.

dunder_methods


__add__

source code

def __add__(
        self,
        other: Union[Dataset, str, Dict, Prompt, List[Union[str, Dict, Prompt]]],
    ) -> Self:

Adds another dataset to this one via the + operator, return a new Dataset instance, with this new dataset receiving all unique queries from the other added dataset.

Arguments:

  • other - The other dataset being added to this one.

Returns:

The new dataset following the addition.


__contains__

source code

def __contains__(
        self,
        item: Union[Dataset, str, Dict, Prompt, List[Union[str, Dict, Prompt]]],
    ) -> bool:

Determine whether the item is contained within the dataset. The item is cast to a Dataset instance, and can therefore take on many different types. Only returns True if all entries in the passed dataset are contained within this dataset.

Arguments:

  • item - The item to cast to a Dataset before checking if it’s a subset of this

Returns:

Boolean, whether the passed Dataset is a subset of this one.


__getitem__

source code

def __getitem__(self, item: Union[int, slice]) -> Union[Any, Dataset]:

Gets an item from the dataset, either via an int or slice. In the case of an int, then a data instance is returned, and for a slice a Dataset instance is returned.

Arguments:

  • item - integer or slice for extraction.

Returns:

An individual item or Dataset slice, for int and slice queries respectively.


__iadd__

source code

def __iadd__(
        self,
        other: Union[Dataset, str, Dict, Prompt, List[Union[str, Dict, Prompt]]],
    ) -> Self:

Adds another dataset to this one, with this dataset receiving all unique queries from the other added dataset.

Arguments:

  • other - The other dataset being added to this one.

Returns:

This dataset following the in-place addition.


__isub__

source code

def __isub__(
        self,
        other: Union[Dataset, str, Dict, Prompt, List[Union[str, Dict, Prompt]]],
    ) -> Self:

Subtracts another dataset from this one, with this dataset losing all queries from the other subtracted dataset.

Arguments:

  • other - The other dataset being added to this one.

Returns:

This dataset following the in-place subtraction.


__iter__

source code

def __iter__(self) -> Any:

Iterates through the dataset, return one instance at a time.

Returns:

The next instance in the dataset.


__len__

source code

def __len__(self) -> int:

Returns the number of entries contained within the dataset.

Returns:

The number of entries in the dataset.


__radd__

source code

def __radd__(
        self,
        other: Union[
            Dataset,
            str,
            Dict,
            Prompt,
            int,
            List[Union[str, Dict, Prompt]],
        ],
    ) -> Self:

Adds another dataset to this one via the + operator, this is used if the other item does not have a valid add method for these two types. Return a new Dataset instance, with this new dataset receiving all unique queries from the other added dataset.

Arguments:

  • other - The other dataset being added to this one.

Returns:

The new dataset following the addition.


__rsub__

source code

def __rsub__(
        self,
        other: Union[Dataset, str, Dict, Prompt, List[Union[str, Dict, Prompt]]],
    ) -> Self:

Subtracts another dataset from this one via the - operator, this is used if the other item does not have a valid sub method for these two types. Return a new Dataset instance, with this new dataset losing all queries from the other subtracted dataset.

Arguments:

  • other - The other dataset being subtracted from this one.

Returns:

The new dataset following the subtraction.


__sub__

source code

def __sub__(
        self,
        other: Union[Dataset, str, Dict, Prompt, List[Union[str, Dict, Prompt]]],
    ) -> Self:

Subtracts another dataset from this one via the - operator, return a new Dataset instance, with this new dataset losing all queries from the other subtracted dataset.

Arguments:

  • other - The other dataset being subtracted from this one.

Returns:

The new dataset following the subtraction.