forked from qoherent/modrec-workflow
optimized script
This commit is contained in:
parent
4f5101bd7e
commit
a092b92174
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -6,3 +6,4 @@ __pycache__/
|
||||||
*.ipynb
|
*.ipynb
|
||||||
*.onnx
|
*.onnx
|
||||||
*.json
|
*.json
|
||||||
|
*.h5
|
|
@ -37,12 +37,12 @@ jobs:
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
|
||||||
# - name: 1. Build HDF5 Dataset
|
- name: 1. Build HDF5 Dataset
|
||||||
# run: |
|
run: |
|
||||||
# mkdir -p data/dataset
|
mkdir -p data/dataset
|
||||||
# PYTHONPATH=. python data/scripts/produce_dataset.py
|
PYTHONPATH=. python data/scripts/produce_dataset.py
|
||||||
# echo "datasets produced successfully"
|
echo "datasets produced successfully"
|
||||||
# shell: bash
|
shell: bash
|
||||||
|
|
||||||
- name: Upload Dataset Artifacts
|
- name: Upload Dataset Artifacts
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v3
|
||||||
|
|
|
@ -49,12 +49,9 @@ def write_hdf5_file(records, output_path, dataset_name="data"):
|
||||||
shape, dtype = sample.shape, sample.dtype
|
shape, dtype = sample.shape, sample.dtype
|
||||||
|
|
||||||
with h5py.File(output_path, "w") as hf:
|
with h5py.File(output_path, "w") as hf:
|
||||||
dset = hf.create_dataset(
|
data_arr = np.stack([rec[0] for rec in records])
|
||||||
dataset_name, shape=(len(records),) + shape, dtype=dtype, compression="gzip"
|
dset = hf.create_dataset(dataset_name, data=data_arr, compression="gzip")
|
||||||
)
|
|
||||||
|
|
||||||
for idx, (snip, md) in enumerate(records):
|
|
||||||
dset[idx, ...] = snip
|
|
||||||
|
|
||||||
mg = hf.create_group("metadata")
|
mg = hf.create_group("metadata")
|
||||||
mg.create_dataset("metadata", data=meta_arr, compression="gzip")
|
mg.create_dataset("metadata", data=meta_arr, compression="gzip")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user