forked from qoherent/modrec-workflow
optimized script
This commit is contained in:
parent
4f5101bd7e
commit
a092b92174
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -6,3 +6,4 @@ __pycache__/
|
|||
*.ipynb
|
||||
*.onnx
|
||||
*.json
|
||||
*.h5
|
|
@ -37,12 +37,12 @@ jobs:
|
|||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
|
||||
# - name: 1. Build HDF5 Dataset
|
||||
# run: |
|
||||
# mkdir -p data/dataset
|
||||
# PYTHONPATH=. python data/scripts/produce_dataset.py
|
||||
# echo "datasets produced successfully"
|
||||
# shell: bash
|
||||
- name: 1. Build HDF5 Dataset
|
||||
run: |
|
||||
mkdir -p data/dataset
|
||||
PYTHONPATH=. python data/scripts/produce_dataset.py
|
||||
echo "datasets produced successfully"
|
||||
shell: bash
|
||||
|
||||
- name: Upload Dataset Artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
|
|
|
@ -49,12 +49,9 @@ def write_hdf5_file(records, output_path, dataset_name="data"):
|
|||
shape, dtype = sample.shape, sample.dtype
|
||||
|
||||
with h5py.File(output_path, "w") as hf:
|
||||
dset = hf.create_dataset(
|
||||
dataset_name, shape=(len(records),) + shape, dtype=dtype, compression="gzip"
|
||||
)
|
||||
data_arr = np.stack([rec[0] for rec in records])
|
||||
dset = hf.create_dataset(dataset_name, data=data_arr, compression="gzip")
|
||||
|
||||
for idx, (snip, md) in enumerate(records):
|
||||
dset[idx, ...] = snip
|
||||
|
||||
mg = hf.create_group("metadata")
|
||||
mg.create_dataset("metadata", data=meta_arr, compression="gzip")
|
||||
|
|
Loading…
Reference in New Issue
Block a user