import os, h5py, numpy as np def generate_dataset(path_to_recordings, output_path, dataset_name="data"): """ Generates a dataset from a folder of .npy files and saves it to an HDF5 file Parameters: path_to_recordings (str): Path to the folder containing .npy files output_path (str): Path to the output HDF5 file dataset_name (str): Name of the dataset in the HDF5 file (default: "data") Returns: dset (h5py.Dataset): The created dataset object """ parent = os.path.dirname(output_path) if parent: os.makedirs(parent, exist_ok=True) # we assume the recordings are in .npy format files = os.listdir(path_to_recordings) if not files: raise ValueError("No files found in the specified directory.") sample = np.load(os.path.join(path_to_recordings, files[0])) shape = sample.shape dtype = sample.dtype with h5py.File(output_path, "w") as hf: dset = hf.create_dataset( dataset_name, shape=(len(files),) + shape, dtype=dtype, compression="gzip" ) for idx, fname in enumerate(files): data = np.load(os.path.join(path_to_recordings, fname)) dset[idx, ...] = data return dset if __name__ == "__main__": print(generate_dataset("recordings", "data/dataset.h5"))