From 7ef85cff24878dca6a18d647cb64cbb4b6ded79c Mon Sep 17 00:00:00 2001 From: bpinsard Date: Fri, 26 Jan 2024 13:23:15 -0500 Subject: [PATCH] add s3 remote init: wip --- docker/dicom_indexer/indexer/index_dicom.py | 33 +++++++++++++++++---- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/docker/dicom_indexer/indexer/index_dicom.py b/docker/dicom_indexer/indexer/index_dicom.py index 619c4fe..ab16f8d 100644 --- a/docker/dicom_indexer/indexer/index_dicom.py +++ b/docker/dicom_indexer/indexer/index_dicom.py @@ -334,7 +334,6 @@ def import_local_data( dest = dest + ".7z" # create 7z archive with 1block/file parameters cmd = ["7z", "u", str(dest), str(input_path)] + p7z_opts.split() - print(cmd) subprocess.run(cmd, cwd=dicom_session_ds.path) elif input_path.is_file(): dest = dicom_session_ds.pathobj / dest @@ -363,6 +362,7 @@ def export_to_ria( dicom_session_tag: str, session_metas: dict, export_ria_archive: bool = False, + ria_archive_7zopts: str = "-mx5 -ms=off", ): ria_name = pathlib.Path(ria_url.path).name ds.create_sibling_ria( @@ -379,7 +379,9 @@ def export_to_ria( ria_sibling_path = pathlib.Path(ds.siblings(name=ria_name)[0]["url"]) archive_path = ria_sibling_path / "archives" / "archive.7z" ds.export_archive_ora( - archive_path, opts=[f"-mx{COMPRESSION_LEVEL}"], missing_content="error" + archive_path, + opts=ria_archive_7zopts.split(), + missing_content="error", ) ds.repo.fsck(remote=f"{ria_url}-storage", fast=True) # index ds.push(to=ria_name, data="nothing") @@ -390,10 +392,31 @@ def export_to_s3( s3_url: urllib.parse.ParseResult, session_metas: dict, ): - ds.repo.initremote() + # TODO: check if we can reuse a single bucket (or per study) with fileprefix # git-annex initremote remotename ... - # git-annex wanted remotename include=**.{7z,tar.gz,zip} - # datalad push --data auto --to remotename + remote_name = s3_url.hostname + bucket_name, path = pathlib.Path(s3_url.path).parts + ds.repo.initremote( + remote_name, + [ + "type=S3", + "encryption=none", + "autoenable=true", + f"host={s3_url.hostname}", + "port=443", + "protocol=https", + "chunk=1GiB", + f"bucket={bucket_name}", + "requeststyle=path", + f"fileprefix={'/'.join(path)}", + ], + ) + ds.repo.set_preferred_content( + remote_name, + "include=**.{7z,tar.gz,zip}", + ) + + ds.push(to=remote_name) def connect_gitlab(