# Copyright 2021 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""A ``DataCatalog`` with a default ``DataSet`` implementation for any data set
which is not registered in the catalog.
"""
from typing import Any, Callable, Dict, Optional
from kedro.io.core import AbstractDataSet
from kedro.io.data_catalog import DataCatalog
from kedro.versioning import Journal
[docs]class DataCatalogWithDefault(DataCatalog):
"""A ``DataCatalog`` with a default ``DataSet`` implementation for any
data set which is not registered in the catalog.
"""
[docs] def __init__(
self,
data_sets: Dict[str, AbstractDataSet] = None,
default: Callable[[str], AbstractDataSet] = None,
remember: bool = False,
):
"""A ``DataCatalog`` with a default ``DataSet`` implementation for any
data set which is not registered in the catalog.
Args:
data_sets: A dictionary of data set names and data set instances.
default: A callable which accepts a single argument of type string,
the key of the data set, and returns an ``AbstractDataSet``.
``load`` and ``save`` calls on data sets which are not
registered to the catalog will be delegated to this
``AbstractDataSet``.
remember: If True, then store in the catalog any
``AbstractDataSet``s provided by the ``default`` callable
argument. Useful when one want to transition from a
``DataCatalogWithDefault`` to a ``DataCatalog``: just call
``DataCatalogWithDefault.to_yaml``, after all required data
sets have been saved/loaded, and use the generated YAML file
with a new ``DataCatalog``.
Raises:
TypeError: If default is not a callable.
Example:
::
>>> from kedro.extras.datasets.pandas import CSVDataSet
>>>
>>> def default_data_set(name):
>>> return CSVDataSet(filepath='data/01_raw/' + name)
>>>
>>> io = DataCatalog(data_sets={},
>>> default=default_data_set)
>>>
>>> # load the file in data/raw/cars.csv
>>> df = io.load("cars.csv")
"""
super().__init__(data_sets)
if not callable(default):
raise TypeError(
"Default must be a callable with a single input "
"string argument: the key of the requested data "
"set."
)
self._default = default
self._remember = remember
[docs] def load(self, name: str, version: str = None) -> Any:
"""Loads a registered data set
Args:
name: A data set to be loaded.
version: Optional version to be loaded.
Returns:
The loaded data as configured.
Raises:
DataSetNotFoundError: When a data set with the given name
has not yet been registered.
"""
data_set = self._data_sets.get(name, self._default(name))
if self._remember and name not in self._data_sets:
self._data_sets[name] = data_set
return data_set.load()
[docs] def save(self, name: str, data: Any):
"""Save data to a registered data set.
Args:
name: A data set to be saved to.
data: A data object to be saved as configured in the registered
data set.
Raises:
DataSetNotFoundError: When a data set with the given name
has not yet been registered.
"""
data_set = self._data_sets.get(name, self._default(name))
if self._remember and name not in self._data_sets:
self._data_sets[name] = data_set
data_set.save(data)
# pylint: disable=too-many-arguments
[docs] @classmethod
def from_config(
cls,
catalog: Optional[Dict[str, Dict[str, Any]]],
credentials: Dict[str, Dict[str, Any]] = None,
load_versions: Dict[str, str] = None,
save_version: str = None,
journal: Journal = None,
):
"""To create a ``DataCatalogWithDefault`` from configuration, please
use:
::
>>> DataCatalogWithDefault.from_data_catalog(
>>> DataCatalog.from_config(catalog, credentials))
Args:
catalog: See ``DataCatalog.from_config``
credentials: See ``DataCatalog.from_config``
load_versions: See ``DataCatalog.from_config``
save_version: See ``DataCatalog.from_config``
journal: See ``DataCatalog.from_config``
Raises:
ValueError: If you try to instantiate a ``DataCatalogWithDefault``
directly with this method.
"""
raise ValueError(
"Cannot instantiate a `DataCatalogWithDefault` "
"directly from configuration files. Please use"
"``DataCatalogWithDefault.from_data_catalog("
"DataCatalog.from_config(catalog, "
"credentials, journal))"
)
[docs] @classmethod
def from_data_catalog(
cls, data_catalog: DataCatalog, default: Callable[[str], AbstractDataSet]
) -> "DataCatalogWithDefault":
"""Convenience factory method to create a ``DataCatalogWithDefault``
from a ``DataCatalog``
A ``DataCatalog`` with a default ``DataSet`` implementation for any
data set which is not registered in the catalog.
Args:
data_catalog: The ``DataCatalog`` to convert to a
``DataCatalogWithDefault``.
default: A callable which accepts a single argument of type string,
the key of the data set, and returns an ``AbstractDataSet``.
``load`` and ``save`` calls on data sets which are not
registered to the catalog will be delegated to this
``AbstractDataSet``.
Returns:
A new ``DataCatalogWithDefault`` which contains all the
``AbstractDataSets`` from the provided data-catalog.
"""
# pylint: disable=protected-access
return cls({**data_catalog._data_sets}, default)
[docs] def shallow_copy(self) -> "DataCatalogWithDefault": # pragma: no cover
"""Returns a shallow copy of the current object.
Returns:
Copy of the current object.
"""
return DataCatalogWithDefault({**self._data_sets}, self._default)