wip: dicom indexer

This commit is contained in:
bpinsard 2024-01-26 11:16:51 -05:00
parent 84d5a18ba6
commit 6f01d16af5
1 changed files with 47 additions and 25 deletions

View File

@ -114,7 +114,12 @@ def main() -> None:
or args.force_export or args.force_export
and output_remote and output_remote
): ):
export_data(dicom_session_ds, output_remote, session_metas) export_data(
dicom_session_ds,
output_remote,
dicom_session_tag=args.session_name_tag,
session_metas=session_metas,
)
setup_gitlab_repos( setup_gitlab_repos(
dicom_session_ds, dicom_session_ds,
@ -138,24 +143,24 @@ def index_dicoms(
dicom_session_ds = dlad.create(tmpdirname, fake_dates=fake_dates) dicom_session_ds = dlad.create(tmpdirname, fake_dates=fake_dates)
if not input.scheme or input.scheme == "file": if not input.scheme or input.scheme == "file":
dest = import_local_data( archive = import_local_data(
dicom_session_ds, dicom_session_ds,
pathlib.Path(input.path), pathlib.Path(input.path),
sort_series=sort_series, sort_series=sort_series,
p7z_opts=p7z_opts, p7z_opts=p7z_opts,
) )
elif input.scheme in ["http", "https", "s3"]: elif input.scheme in ["http", "https", "s3"]:
dest = import_remote_data(dicom_session_ds, input_url) archive = import_remote_data(dicom_session_ds, input_url)
# index dicoms files # index dicoms files
dlad.add_archive_content( dlad.add_archive_content(
dest, archive,
dataset=dicom_session_ds, dataset=dicom_session_ds,
strip_leading_dirs=True, strip_leading_dirs=True,
commit=False, commit=False,
) )
# cannot pass message above so commit now # cannot pass message above so commit now
dicom_session_ds.save(message="index dicoms from archive") # dicom_session_ds.save(message=f"index dicoms from archive {archive}") #
# optimize git index after large import # optimize git index after large import
dicom_session_ds.repo.gc() # aggressive by default dicom_session_ds.repo.gc() # aggressive by default
yield dicom_session_ds yield dicom_session_ds
@ -164,10 +169,16 @@ def index_dicoms(
def export_data( def export_data(
dicom_session_ds: dlad.Dataset, dicom_session_ds: dlad.Dataset,
output_remote: urllib.parse.ParseResult, output_remote: urllib.parse.ParseResult,
dicom_session_tag: str,
session_metas: dict, session_metas: dict,
) -> None: ) -> None:
if output_remote.scheme == "ria": if "ria" in output_remote.scheme:
export_to_ria(dicom_session_ds, output_remote, session_metas) export_to_ria(
dicom_session_ds,
output_remote,
dicom_session_tag=dicom_session_tag,
session_metas=session_metas,
)
elif output_remote.scheme == "s3": elif output_remote.scheme == "s3":
export_to_s3(dicom_session_ds, output_remote, session_metas) export_to_s3(dicom_session_ds, output_remote, session_metas)
@ -194,10 +205,12 @@ def setup_gitlab_repos(
name=GITLAB_REMOTE_NAME, name=GITLAB_REMOTE_NAME,
url=dicom_session_repo._attrs["ssh_url_to_repo"], url=dicom_session_repo._attrs["ssh_url_to_repo"],
) )
dicom_session_ds.repo.checkout("dev", ["-b"])
dicom_session_ds.push(to=GITLAB_REMOTE_NAME, force="gitpush") dicom_session_ds.push(to=GITLAB_REMOTE_NAME, force="gitpush")
study_group = get_or_create_gitlab_group(gitlab_conn, gitlab_group_path) study_group = get_or_create_gitlab_group(gitlab_conn, gitlab_group_path)
bot_user = gitlab_conn.users.list(username=GITLAB_BOT_USERNAME)[0] bot_user = gitlab_conn.users.list(username=GITLAB_BOT_USERNAME)[0]
if not any(m.id == bot_user.id for m in study_group.members.list()):
study_group.members.create( study_group.members.create(
{ {
"user_id": bot_user.id, "user_id": bot_user.id,
@ -229,9 +242,8 @@ def setup_gitlab_repos(
path=session_metas.get(dicom_session_tag), path=session_metas.get(dicom_session_tag),
) )
# Push to gitlab + local ria-store # Push to gitlab
dicom_study_ds.push(to="origin") dicom_study_ds.push(to="origin")
dicom_study_ds.push(to=UNF_DICOMS_RIA_NAME)
def init_bids( def init_bids(
@ -257,8 +269,9 @@ def init_bids(
# create dev branch and push for merge requests # create dev branch and push for merge requests
bids_project_ds.gitrepo.checkout(BIDS_DEV_BRANCH, ["-b"]) bids_project_ds.gitrepo.checkout(BIDS_DEV_BRANCH, ["-b"])
bids_project_ds.push(to="origin") bids_project_ds.push(to="origin")
bids_project_ds.protectedbranches.create(data={"name": "convert/*"}) # set protected branches
bids_project_ds.protectedbranches.create(data={"name": "dev"}) bids_project_repo.protectedbranches.create(data={"name": "convert/*"})
bids_project_repo.protectedbranches.create(data={"name": "dev"})
def init_dicom_study( def init_dicom_study(
@ -341,13 +354,22 @@ def import_remote_data(
def export_to_ria( def export_to_ria(
ds: dlad.Dataset, ds: dlad.Dataset,
ria_url: urllib.parse.ParseResult, ria_url: urllib.parse.ParseResult,
dicom_session_tag: str,
session_metas: dict, session_metas: dict,
export_ria_archive: bool = False,
): ):
ria_name = pathlib.Path(ria_url.path).name ria_name = pathlib.Path(ria_url.path).name
ds.create_sibling_ria( ds.create_sibling_ria(
ria_url, name=ria_name, alias=session_metas["PatientID"], existing="reconfigure" ria_url.geturl(),
name=ria_name,
alias=session_metas[dicom_session_tag],
existing="reconfigure",
new_store_ok=True,
) )
ds.push(to=ria_name, data="nothing") ds.push(to=ria_name, data="nothing")
# keep the old ria-archive before add-archive-content, not used for now
if export_ria_archive:
ria_sibling_path = pathlib.Path(ds.siblings(name=ria_name)[0]["url"]) ria_sibling_path = pathlib.Path(ds.siblings(name=ria_name)[0]["url"])
archive_path = ria_sibling_path / "archives" / "archive.7z" archive_path = ria_sibling_path / "archives" / "archive.7z"
ds.export_archive_ora( ds.export_archive_ora(