Skip to content

Commit

Permalink
updating workflow and prints for logs
Browse files Browse the repository at this point in the history
  • Loading branch information
camilolaiton committed Jun 11, 2024
1 parent 61d23d7 commit f30c30f
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 50 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_and_lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '3.10', '3.11' ]
python-version: [ '3.10' ]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
Expand Down
102 changes: 53 additions & 49 deletions src/aind_smartspim_data_transformation/compress/png_to_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from dask import config as da_cfg
from dask.array.core import Array
from dask.base import tokenize
from dask.distributed import Client, LocalCluster # , performance_report
from dask.distributed import Client, LocalCluster, performance_report

# from distributed import wait
from numcodecs import blosc
Expand Down Expand Up @@ -646,62 +646,66 @@ def smartspim_channel_zarr_writer(
metadata=_get_pyramid_metadata(),
)

# performance_report_path = f"{output_path}/report_{stack_name}.html"
performance_report_path = f"{output_path}/report_{stack_name}.html"

start_time = time.time()
# Writing zarr and performance report
# with performance_report(filename=performance_report_path):
logger.info(f"{'='*40}Writing channel {channel_name}{'='*40}")
with performance_report(filename=performance_report_path):
logger.info(f"{'='*40}Writing channel {channel_name}{'='*40}")

# Writing zarr
block_shape = list(
BlockedArrayWriter.get_block_shape(
arr=image_data, target_size_mb=12800 # 51200,
# Writing zarr
block_shape = list(
BlockedArrayWriter.get_block_shape(
arr=image_data, target_size_mb=12800 # 51200,
)
)
)

# Formatting to 5D block shape
block_shape = ([1] * (5 - len(block_shape))) + block_shape
written_pyramid = []
pyramid_group = None

# Writing multiple levels
for level in range(n_lvls):
if not level:
array_to_write = image_data

else:
# It's faster to write the scale and then read it back
# to compute the next scale
previous_scale = da.from_zarr(pyramid_group, pyramid_group.chunks)
new_scale_factor = (
[1] * (len(previous_scale.shape) - len(scale_factor))
) + scale_factor

previous_scale_pyramid, _ = compute_pyramid(
data=previous_scale,
scale_axis=new_scale_factor,
chunks=image_data.chunksize,
n_lvls=2,
# Formatting to 5D block shape
block_shape = ([1] * (5 - len(block_shape))) + block_shape
written_pyramid = []
pyramid_group = None

# Writing multiple levels
for level in range(n_lvls):
if not level:
array_to_write = image_data

else:
# It's faster to write the scale and then read it back
# to compute the next scale
previous_scale = da.from_zarr(
pyramid_group, pyramid_group.chunks
)
new_scale_factor = (
[1] * (len(previous_scale.shape) - len(scale_factor))
) + scale_factor

previous_scale_pyramid, _ = compute_pyramid(
data=previous_scale,
scale_axis=new_scale_factor,
chunks=image_data.chunksize,
n_lvls=2,
)
array_to_write = previous_scale_pyramid[-1]

logger.info(f"[level {level}]: pyramid level: {array_to_write}")

# Create the scale dataset
pyramid_group = new_channel_group.create_dataset(
name=level,
shape=array_to_write.shape,
chunks=array_to_write.chunksize,
dtype=array_to_write.dtype,
compressor=writing_options,
dimension_separator="/",
overwrite=True,
)
array_to_write = previous_scale_pyramid[-1]

logger.info(f"[level {level}]: pyramid level: {array_to_write}")

# Create the scale dataset
pyramid_group = new_channel_group.create_dataset(
name=level,
shape=array_to_write.shape,
chunks=array_to_write.chunksize,
dtype=array_to_write.dtype,
compressor=writing_options,
dimension_separator="/",
overwrite=True,
)

# Block Zarr Writer
BlockedArrayWriter.store(array_to_write, pyramid_group, block_shape)
written_pyramid.append(array_to_write)
# Block Zarr Writer
BlockedArrayWriter.store(
array_to_write, pyramid_group, block_shape
)
written_pyramid.append(array_to_write)

end_time = time.time()
logger.info(f"Time to write the dataset: {end_time - start_time}")
Expand Down

0 comments on commit f30c30f

Please sign in to comment.