import os
import re
try:
import pathlib
except ImportError as e:
try:
import pathlib2 as pathlib
except ImportError:
raise e
import numpy as np
import exdir
try:
import ruamel_yaml as yaml
except ImportError:
import ruamel.yaml as yaml
try:
from collections import abc
except ImportError:
import collections as abc
from .exdir_object import Object
from .mode import assert_file_open, OpenMode, assert_file_writable
from . import exdir_object as exob
from . import dataset as ds
from . import raw
from .. import utils
def _data_to_shape_and_dtype(data, shape, dtype):
if data is not None:
if shape is None:
shape = data.shape
if dtype is None:
dtype = data.dtype
return shape, dtype
if dtype is None:
dtype = np.float32
return shape, dtype
def _assert_data_shape_dtype_match(data, shape, dtype):
if data is not None:
if shape is not None and np.product(shape) != np.product(data.shape):
raise ValueError(
"Provided shape and data.shape do not match: {} vs {}".format(
shape, data.shape
)
)
if dtype is not None and not data.dtype == dtype:
raise ValueError(
"Provided dtype and data.dtype do not match: {} vs {}".format(
dtype, data.dtype
)
)
return
[docs]class Group(Object):
"""
Container of other groups and datasets.
"""
def __init__(self, root_directory, parent_path, object_name, file):
"""
WARNING: Internal. Should only be called from require_group.
"""
super(Group, self).__init__(
root_directory=root_directory,
parent_path=parent_path,
object_name=object_name,
file=file
)
[docs] def create_dataset(self, name, shape=None, dtype=None,
data=None, fillvalue=None):
"""
Create a dataset. This will create a folder on the filesystem with the given
name, an exdir.yaml file that identifies the folder as an Exdir Dataset,
and a data.npy file that contains the data.
Parameters
----------
name: str
Name of the dataset to be created.
shape: tuple, semi-optional
Shape of the dataset to be created.
Must be set together with dtype.
Cannot be set together with `data`, but must be set if `data` is not set.
dtype: numpy.dtype
Data type of the dataset to be created.
Must be set together with `shape`.
Cannot be set together with `data`, but must be set if `data` is not set.
data: scalar, list, numpy.array or plugin-supported type, semi-optional
Data to be inserted in the created dataset.
Cannot be set together with `dtype` or `shape`, but must be set if
`dtype` and `shape` are not set.
fillvalue: scalar
Used to create a dataset with the given `shape` and `type` with the
initial value of `fillvalue`.
Returns
-------
The newly created Dataset.
Raises
------
FileExistsError
If an object with the same `name` already exists.
See also
--------
require_dataset
"""
assert_file_writable(self.file)
exob._assert_valid_name(name, self)
if data is None and shape is None:
raise TypeError(
"Cannot create dataset. Missing shape or data keyword."
)
prepared_data, attrs, meta = ds._prepare_write(
data,
self.file.plugin_manager.dataset_plugins.write_order,
attrs={},
meta=exob._default_metadata(exob.DATASET_TYPENAME)
)
_assert_data_shape_dtype_match(prepared_data, shape, dtype)
shape, dtype = _data_to_shape_and_dtype(prepared_data, shape, dtype)
if prepared_data is not None:
if shape is not None and prepared_data.shape != shape:
prepared_data = np.reshape(prepared_data, shape)
else:
if shape is None:
prepared_data = None
else:
fillvalue = fillvalue or 0.0
prepared_data = np.full(shape, fillvalue, dtype=dtype)
if prepared_data is None:
raise TypeError("Could not create a meaningful dataset.")
dataset_directory = self.directory / name
exob._create_object_directory(dataset_directory, meta)
dataset = self._dataset(name)
dataset._reset_data(prepared_data, attrs, None) # meta already set above
return dataset
[docs] def create_group(self, name):
"""
Create a group. This will create a folder on the filesystem with the
given name and an exdir.yaml file that identifies the folder as a
group. A group can contain multiple groups and datasets.
Parameters
----------
name: str
Name of the subgroup. Must follow the naming convention of the parent Exdir File.
Raises
------
FileExistsError
If an object with the same `name` already exists.
Returns
-------
The newly created Group.
See also
--------
require_group
"""
assert_file_writable(self.file)
path = utils.path.name_to_asserted_group_path(name)
if len(path.parts) > 1:
subgroup = self.require_group(path.parent)
subgroup.create_group(path.name)
return
exob._assert_valid_name(path, self)
if name in self:
raise FileExistsError(
"'{}' already exists in '{}'".format(name, self.name)
)
group_directory = self.directory / path
exob._create_object_directory(group_directory, exob._default_metadata(exob.GROUP_TYPENAME))
return self._group(name)
def _group(self, name):
return Group(
root_directory=self.root_directory,
parent_path=self.relative_path,
object_name=name,
file=self.file
)
[docs] def require_group(self, name):
"""
Open an existing subgroup or create one if it does not exist.
This might create a new subfolder on the file system.
Parameters
----------
name: str
Name of the subgroup. Must follow the naming convention of the parent Exdir File.
Returns
-------
The existing or created group.
See also
--------
create_group
"""
assert_file_open(self.file)
path = utils.path.name_to_asserted_group_path(name)
if len(path.parts) > 1:
subgroup = self.require_group(path.parent)
return subgroup.require_group(path.name)
group_directory = self.directory / name
if name in self:
current_object = self[name]
if isinstance(current_object, Group):
return current_object
else:
raise TypeError(
"An object with name '{}' already "
"exists, but it is not a Group.".format(name)
)
elif group_directory.exists():
raise FileExistsError(
"Directory " + group_directory + " already exists, " +
"but is not an Exdir object."
)
return self.create_group(name)
[docs] def require_dataset(self, name, shape=None, dtype=None, exact=False,
data=None, fillvalue=None):
"""
Open an existing dataset or create it if it does not exist.
Parameters
----------
name: str
Name of the dataset. Must follow naming convention of parent Exdir File.
shape: np.array, semi-optional
Shape of the dataset. Must be set together with dtype.
Cannot be set together with data, but must be set if data is not set.
Will be used to verify that an existing dataset has the same shape or
to create a new dataset of the given shape.
See also `exact`.
dtype: np.dtype, semi-optional
NumPy datatype of the dataset. Must be set together with shape.
Cannot be set together with data, but must be set if data is not set.
Will be used to verify that an existing dataset has the same or a
convertible dtype or to create a new dataset with the given dtype.
See also `exact`.
exact: bool, optional
Only used if the dataset already exists.
If `exact` is `False`, the shape must match the existing dataset and
the data type must be convertible between the existing and requested
data type.
If `exact` is `True`, the `shape` and `dtype` must match exactly.
The default is False.
See also `shape`, `dtype` and `data`.
data: list, np.array, semi-optional
The data that will be used to create the dataset if it does not already exist.
The shape and dtype of `data` will be compared to the existing dataset if it already exists.
See `shape`, `dtype` and `exact`.
fillvalue: scalar
Used to create a dataset with the given `shape` and `type` with the
initial value of `fillvalue`.
"""
assert_file_open(self.file)
if name not in self:
return self.create_dataset(
name,
shape=shape,
dtype=dtype,
data=data,
fillvalue=fillvalue
)
current_object = self[name]
if not isinstance(current_object, ds.Dataset):
raise TypeError(
"Incompatible object already exists: {}".format(
current_object.__class__.__name__
)
)
data, attrs, meta = ds._prepare_write(
data,
plugins=self.file.plugin_manager.dataset_plugins.write_order,
attrs={},
meta={}
)
# TODO verify proper attributes
_assert_data_shape_dtype_match(data, shape, dtype)
shape, dtype = _data_to_shape_and_dtype(data, shape, dtype)
if not np.array_equal(shape, current_object.shape):
raise TypeError(
"Shapes do not match (existing {} vs "
"new {})".format(current_object.shape, shape)
)
if dtype != current_object.dtype:
if exact:
raise TypeError(
"Datatypes do not exactly match "
"existing {} vs new {})".format(current_object.dtype, dtype)
)
if not np.can_cast(dtype, current_object.dtype):
raise TypeError(
"Cannot safely cast from {} to {}".format(
dtype,
current_object.dtype
)
)
return current_object
def __contains__(self, name):
"""
Checks the existence of an object with the given name in the group.
Parameters
----------
name: str
the case-sensitive name of the object
"""
if self.file.io_mode == OpenMode.FILE_CLOSED:
return False
if name == ".":
return True
if name == "":
return False
path = utils.path.name_to_asserted_group_path(name)
directory = self.directory / path
return exob.is_exdir_object(directory)
def __getitem__(self, name):
"""
Retrieves the object with the given name if it exists in the group.
Parameters
----------
name: str
the case-sensitive name of the object to retrieve
Raises
------
KeyError:
if the name does not correspond to an exdir object in the group
"""
assert_file_open(self.file)
path = utils.path.name_to_asserted_group_path(name)
if len(path.parts) > 1:
top_directory = path.parts[0]
sub_name = pathlib.PurePosixPath(*path.parts[1:])
return self[top_directory][sub_name]
if name not in self:
error_message = "No such object: '{name}' in path '{path}'".format(
name=name,
path=str(self.directory)
)
raise KeyError(error_message)
directory = self.directory / path
if exob.is_raw_object_directory(directory): # TODO create one function that handles all Raw creation
return raw.Raw(
root_directory=self.root_directory,
parent_path=self.relative_path,
object_name=name,
file=self.file
)
if not exob.is_nonraw_object_directory(directory):
raise IOError(
"Directory '" + directory +
"' is not a valid exdir object."
)
meta_filename = directory / exob.META_FILENAME
with meta_filename.open("r", encoding="utf-8") as meta_file:
meta_data = yaml.safe_load(meta_file)
if meta_data[exob.EXDIR_METANAME][exob.TYPE_METANAME] == exob.DATASET_TYPENAME:
return self._dataset(name)
elif meta_data[exob.EXDIR_METANAME][exob.TYPE_METANAME] == exob.GROUP_TYPENAME:
return self._group(name)
else:
error_string = (
"Object {name} has data type {type}.\n"
"We cannot open objects of this type."
).format(
name=name,
type=meta_data[exob.EXDIR_METANAME][exob.TYPE_METANAME]
)
raise NotImplementedError(error_string)
def _dataset(self, name):
return ds.Dataset(
root_directory=self.root_directory,
parent_path=self.relative_path,
object_name=name,
file=self.file
)
def __setitem__(self, name, value):
"""
Set or create a dataset with the given name from the given value.
Parameters
----------
name: str
name of the existing or new dataset
value: object
value that will be used to create a new or set
the contents of an existing dataset
"""
assert_file_open(self.file)
path = utils.path.name_to_asserted_group_path(name)
if len(path.parts) > 1:
self[path.parent][path.name] = value
return
if name not in self:
self.create_dataset(name, data=value)
return
if not isinstance(self[name], ds.Dataset):
raise RuntimeError(
"Unable to assign value, {} already exists".format(name)
)
self[name].value = value
def __delitem__(self, name):
"""
Delete a child (an object contained in group).
Parameters
----------
name: str
name of the existing child
"""
assert_file_writable(self.file)
exob._remove_object_directory(self[name].directory)
[docs] def keys(self):
"""
Returns
-------
KeysView
A view of the names of the objects in the group.
"""
assert_file_open(self.file)
return abc.KeysView(self)
[docs] def items(self):
"""
Returns
-------
ItemsView
A view of the keys and objects in the group.
"""
assert_file_open(self.file)
return abc.ItemsView(self)
[docs] def values(self):
"""
Returns
-------
ValuesView
A view of the objects in the group.
"""
assert_file_open(self.file)
return abc.ValuesView(self)
def __iter__(self):
"""
Iterate over all the objects in the group.
"""
assert_file_open(self.file)
# NOTE os.walk is way faster than os.listdir + os.path.isdir
directories = next(os.walk(str(self.directory)))[1]
for name in sorted(directories):
yield name
def __len__(self):
"""
Number of objects in the group.
"""
assert_file_open(self.file)
return len([a for a in self])
[docs] def get(self, key):
"""
Get an object in the group.
Parameters
----------
key : str
The key of the desired object
Returns
-------
Value or None if object does not exist.
"""
assert_file_open(self.file)
if key in self:
return self[key]
else:
return None
def _ipython_key_completions_(self):
return self.keys()