wip: dicom indexre
This commit is contained in:
parent
a579defbb0
commit
54881b3166
|
|
@ -7,7 +7,8 @@ import datalad.api as dlad
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
GITLAB_REMOTE_NAME = os.environ.get('GITLAB_REMOTE_NAME', 'gitlab')
|
GITLAB_REMOTE_NAME = os.environ.get("GITLAB_REMOTE_NAME", "gitlab")
|
||||||
|
|
||||||
|
|
||||||
def sort_series(path: str) -> None:
|
def sort_series(path: str) -> None:
|
||||||
"""Sort series in separate folder
|
"""Sort series in separate folder
|
||||||
|
|
@ -18,7 +19,7 @@ def sort_series(path: str) -> None:
|
||||||
path to dicoms
|
path to dicoms
|
||||||
|
|
||||||
"""
|
"""
|
||||||
files = glob.glob(os.path.join(path, '*'))
|
files = glob.glob(os.path.join(path, "*"))
|
||||||
for f in files:
|
for f in files:
|
||||||
if not os.path.isfile(f):
|
if not os.path.isfile(f):
|
||||||
continue
|
continue
|
||||||
|
|
@ -33,18 +34,21 @@ def sort_series(path: str) -> None:
|
||||||
|
|
||||||
def _build_arg_parser() -> argparse.ArgumentParser:
|
def _build_arg_parser() -> argparse.ArgumentParser:
|
||||||
p = argparse.ArgumentParser(
|
p = argparse.ArgumentParser(
|
||||||
description="dicom_indexer - indexes dicoms into datalad")
|
description="dicom_indexer - indexes dicoms into datalad"
|
||||||
|
)
|
||||||
|
p.add_argument("input", help="path/url of the dicom.")
|
||||||
p.add_argument(
|
p.add_argument(
|
||||||
'input', nargs='+',
|
"--gitlab-url",
|
||||||
help='path/url of the dicom.')
|
type=str,
|
||||||
p.add_argument()
|
help="http(s) url to the gitlab server where to push repos",
|
||||||
|
)
|
||||||
p.add_argument(
|
p.add_argument(
|
||||||
'gitlab_group_template',
|
"--gitlab-group-template",
|
||||||
default='{ReferringPhysicianName}/{StudyDescription.replace('^','/')}'
|
default="{ReferringPhysicianName}/{StudyDescription.replace(" ^ "," / ")}",
|
||||||
type=str)
|
type=str,
|
||||||
p.add_argument(
|
help="string with placeholder for dicom tags",
|
||||||
'--storage-remote',
|
)
|
||||||
help='url to the datalad remote')
|
p.add_argument("--storage-remote", help="url to the datalad remote")
|
||||||
p.add_argument(
|
p.add_argument(
|
||||||
"--sort-series",
|
"--sort-series",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
|
|
@ -60,8 +64,8 @@ def _build_arg_parser() -> argparse.ArgumentParser:
|
||||||
)
|
)
|
||||||
return p
|
return p
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
parser = _build_arg_parser()
|
parser = _build_arg_parser()
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
@ -76,25 +80,24 @@ def main() -> None:
|
||||||
fake_dates=p.fake_dates,
|
fake_dates=p.fake_dates,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def process(
|
def process(
|
||||||
input:urllib.parse.ParseResult,
|
input: urllib.parse.ParseResult,
|
||||||
output_remote: urllib.parse.ParseResult,
|
output_remote: urllib.parse.ParseResult,
|
||||||
sort_series: bool,
|
sort_series: bool,
|
||||||
fake_dates: bool,
|
fake_dates: bool,
|
||||||
p7z_opts: str,
|
p7z_opts: str,
|
||||||
gitlab_url: urllib.parse.ParseResult,
|
gitlab_url: urllib.parse.ParseResult,
|
||||||
gitlab_group_template: str,
|
gitlab_group_template: str,
|
||||||
force_export: bool=False,
|
force_export: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Process incoming dicoms into datalad repo
|
"""Process incoming dicoms into datalad repo"""
|
||||||
|
|
||||||
"""
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||||
dicom_session_ds = dlad.create(tmpdirname, fake_dates=fake_dates)
|
dicom_session_ds = dlad.create(tmpdirname, fake_dates=fake_dates)
|
||||||
|
|
||||||
do_export = force_export
|
do_export = force_export
|
||||||
|
|
||||||
if not input.scheme or input.scheme == 'file':
|
if not input.scheme or input.scheme == "file":
|
||||||
dest = import_local_data(
|
dest = import_local_data(
|
||||||
dicom_session_ds,
|
dicom_session_ds,
|
||||||
pathlib.Path(input.path),
|
pathlib.Path(input.path),
|
||||||
|
|
@ -102,7 +105,7 @@ def process(
|
||||||
p7z_opts=p7z_opts,
|
p7z_opts=p7z_opts,
|
||||||
)
|
)
|
||||||
do_export = True
|
do_export = True
|
||||||
elif input.scheme in ['http', 'https', 's3']:
|
elif input.scheme in ["http", "https", "s3"]:
|
||||||
dest = import_remote_data(dicom_session_ds, input_url)
|
dest = import_remote_data(dicom_session_ds, input_url)
|
||||||
|
|
||||||
# index dicoms files
|
# index dicoms files
|
||||||
|
|
@ -112,25 +115,21 @@ def process(
|
||||||
commit=False,
|
commit=False,
|
||||||
)
|
)
|
||||||
# cannot pass message above so commit now
|
# cannot pass message above so commit now
|
||||||
dicom_session_ds.save(message='index dicoms from archive')#
|
dicom_session_ds.save(message="index dicoms from archive") #
|
||||||
# optimize git index after large import
|
# optimize git index after large import
|
||||||
dicom_session_ds.repo.gc() # aggressive by default
|
dicom_session_ds.repo.gc() # aggressive by default
|
||||||
|
|
||||||
session_metas = extract_session_metas(dicom_session_ds)
|
session_metas = extract_session_metas(dicom_session_ds)
|
||||||
|
|
||||||
if do_export:
|
if do_export:
|
||||||
if output_remote.scheme == 'ria':
|
if output_remote.scheme == "ria":
|
||||||
export_to_ria(dicom_session_ds, output_remote, session_metas)
|
export_to_ria(dicom_session_ds, output_remote, session_metas)
|
||||||
elif output_remote.scheme == 's3':
|
elif output_remote.scheme == "s3":
|
||||||
export_to_s3(dicom_session_ds, output_remote, session_metas)
|
export_to_s3(dicom_session_ds, output_remote, session_metas)
|
||||||
|
|
||||||
|
|
||||||
setup_gitlab_remote(dicom_session_ds, gitlab_url, session_metas)
|
setup_gitlab_remote(dicom_session_ds, gitlab_url, session_metas)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def setup_gitlab_repos(
|
def setup_gitlab_repos(
|
||||||
dicom_session_ds: dlad.Dataset,
|
dicom_session_ds: dlad.Dataset,
|
||||||
gitlab_url: urllib.parse.ParseResult,
|
gitlab_url: urllib.parse.ParseResult,
|
||||||
|
|
@ -139,36 +138,37 @@ def setup_gitlab_repos(
|
||||||
gitlab_conn = connect_gitlab()
|
gitlab_conn = connect_gitlab()
|
||||||
|
|
||||||
gitlab_group_path = gitlab_group_template.format(session_metas)
|
gitlab_group_path = gitlab_group_template.format(session_metas)
|
||||||
dicom_sourcedata_path = '/'.join([dicom_session_path, 'sourcedata/dicoms'])
|
dicom_sourcedata_path = "/".join([dicom_session_path, "sourcedata/dicoms"])
|
||||||
dicom_session_path = '/'.join([dicom_sourcedata_path, ['StudyInstanceUID']])
|
dicom_session_path = "/".join([dicom_sourcedata_path, ["StudyInstanceUID"]])
|
||||||
dicom_study_path = '/'.join([dicom_sourcedata_path, 'study'])
|
dicom_study_path = "/".join([dicom_sourcedata_path, "study"])
|
||||||
|
|
||||||
dicom_session_repo = get_or_create_gitlab_project(gl, dicom_session_path)
|
dicom_session_repo = get_or_create_gitlab_project(gl, dicom_session_path)
|
||||||
ds.siblings(
|
ds.siblings(
|
||||||
action='configure', # allow to overwrite existing config
|
action="configure", # allow to overwrite existing config
|
||||||
name=GITLAB_REMOTE_NAME,
|
name=GITLAB_REMOTE_NAME,
|
||||||
url=dicom_session_repo._attrs['ssh_url_to_repo'],
|
url=dicom_session_repo._attrs["ssh_url_to_repo"],
|
||||||
)
|
)
|
||||||
ds.push(to=GITLAB_REMOTE_NAME)
|
ds.push(to=GITLAB_REMOTE_NAME)
|
||||||
|
|
||||||
study_group = get_or_create_group(gl, gitlab_group_path)
|
study_group = get_or_create_group(gl, gitlab_group_path)
|
||||||
bot_user = gl.users.list(username=GITLAB_BOT_USERNAME)[0]
|
bot_user = gl.users.list(username=GITLAB_BOT_USERNAME)[0]
|
||||||
study_group.members.create({
|
study_group.members.create(
|
||||||
'user_id': bot_user.id,
|
{
|
||||||
'access_level': gitlab.const.AccessLevel.MAINTAINER,
|
"user_id": bot_user.id,
|
||||||
})
|
"access_level": gitlab.const.AccessLevel.MAINTAINER,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
dicom_study_repo = get_or_create_project(gl, dicom_study_path)
|
dicom_study_repo = get_or_create_project(gl, dicom_study_path)
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
dicom_study_ds = datalad.api.install(
|
dicom_study_ds = datalad.api.install(
|
||||||
source = dicom_study_repo._attrs['ssh_url_to_repo'],
|
source=dicom_study_repo._attrs["ssh_url_to_repo"],
|
||||||
path=tmpdir,
|
path=tmpdir,
|
||||||
)
|
)
|
||||||
|
|
||||||
if dicom_study_ds.repo.get_hexsha() is None or dicom_study_ds.id is None:
|
if dicom_study_ds.repo.get_hexsha() is None or dicom_study_ds.id is None:
|
||||||
dicom_study_ds.create(force=True)
|
dicom_study_ds.create(force=True)
|
||||||
dicom_study_ds.push(to='origin')
|
dicom_study_ds.push(to="origin")
|
||||||
# add default study DS structure.
|
# add default study DS structure.
|
||||||
init_dicom_study(dicom_study_ds, PI, study_name)
|
init_dicom_study(dicom_study_ds, PI, study_name)
|
||||||
# initialize BIDS project
|
# initialize BIDS project
|
||||||
|
|
@ -177,103 +177,100 @@ def setup_gitlab_repos(
|
||||||
create_group(gl, [PI, study_name, "qc"])
|
create_group(gl, [PI, study_name, "qc"])
|
||||||
|
|
||||||
dicom_study_ds.install(
|
dicom_study_ds.install(
|
||||||
source=dicom_session_repo._attrs['ssh_url_to_repo'],
|
source=dicom_session_repo._attrs["ssh_url_to_repo"],
|
||||||
path=session_meta['PatientName'],
|
path=session_meta["PatientName"],
|
||||||
)
|
)
|
||||||
dicom_study_ds.create_sibling_ria(
|
dicom_study_ds.create_sibling_ria(
|
||||||
UNF_DICOMS_RIA_URL,
|
UNF_DICOMS_RIA_URL,
|
||||||
name=UNF_DICOMS_RIA_NAME,
|
name=UNF_DICOMS_RIA_NAME,
|
||||||
alias=study_name,
|
alias=study_name,
|
||||||
existing='reconfigure')
|
existing="reconfigure",
|
||||||
|
)
|
||||||
|
|
||||||
# Push to gitlab + local ria-store
|
# Push to gitlab + local ria-store
|
||||||
dicom_study_ds.push(to='origin')
|
dicom_study_ds.push(to="origin")
|
||||||
dicom_study_ds.push(to=UNF_DICOMS_RIA_NAME)
|
dicom_study_ds.push(to=UNF_DICOMS_RIA_NAME)
|
||||||
|
|
||||||
|
|
||||||
SESSION_META_KEYS = [
|
SESSION_META_KEYS = [
|
||||||
'StudyInstanceUID',
|
"StudyInstanceUID",
|
||||||
'PatientID',
|
"PatientID",
|
||||||
'PatientName',
|
"PatientName",
|
||||||
'ReferringPhysicianName',
|
"ReferringPhysicianName",
|
||||||
'StudyDate',
|
"StudyDate",
|
||||||
'StudyDescription',
|
"StudyDescription",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def extract_session_metas(dicom_session_ds: dlad.Dataset):
|
def extract_session_metas(dicom_session_ds: dlad.Dataset):
|
||||||
all_files = dicom_session_ds.repo.find('*')
|
all_files = dicom_session_ds.repo.find("*")
|
||||||
for f in all_files:
|
for f in all_files:
|
||||||
try:
|
try:
|
||||||
dic = dicom.read_file(f, stop_before_pixels=True)
|
dic = dicom.read_file(f, stop_before_pixels=True)
|
||||||
except Exception: # TODO: what exception occurs when non-dicom ?
|
except Exception: # TODO: what exception occurs when non-dicom ?
|
||||||
continue
|
continue
|
||||||
# return at first dicom found
|
# return at first dicom found
|
||||||
return {k:getattr(dic, k) for k in SESSION_META_KEYS}
|
return {k: getattr(dic, k) for k in SESSION_META_KEYS}
|
||||||
|
|
||||||
|
|
||||||
def import_local_data(
|
def import_local_data(
|
||||||
dicom_session_ds: dlad.Dataset,
|
dicom_session_ds: dlad.Dataset,
|
||||||
input_path: pathlib.Path,
|
input_path: pathlib.Path,
|
||||||
sort_series: bool=True,
|
sort_series: bool = True,
|
||||||
p7z_opts: str='-mx5'
|
p7z_opts: str = "-mx5",
|
||||||
):
|
):
|
||||||
dest = input_path.basename()
|
dest = input_path.basename()
|
||||||
|
|
||||||
if input_path.is_dir():
|
if input_path.is_dir():
|
||||||
dest = dest + '.7z'
|
dest = dest + ".7z"
|
||||||
# create 7z archive with 1block/file parameters
|
# create 7z archive with 1block/file parameters
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
['7z', 'u', str(dest), '.'] + p7z_opts,
|
["7z", "u", str(dest), "."] + p7z_opts,
|
||||||
cwd=str(dicom_session_ds.path),
|
cwd=str(dicom_session_ds.path),
|
||||||
)
|
)
|
||||||
elif input_path.is_file():
|
elif input_path.is_file():
|
||||||
dest = dicom_session_ds.path / dest
|
dest = dicom_session_ds.path / dest
|
||||||
try: # try hard-linking to avoid copying
|
try: # try hard-linking to avoid copying
|
||||||
os.link(str(input_path), str(dest))
|
os.link(str(input_path), str(dest))
|
||||||
except OSError: #fallback if hard-linking not supported
|
except OSError: # fallback if hard-linking not supported
|
||||||
shutil.copyfile(str(input_path), str(dest))
|
shutil.copyfile(str(input_path), str(dest))
|
||||||
dicom_session_ds.save(dest, message='add dicoms archive')
|
dicom_session_ds.save(dest, message="add dicoms archive")
|
||||||
return dest
|
return dest
|
||||||
|
|
||||||
|
|
||||||
def import_remote_data(
|
def import_remote_data(
|
||||||
dicom_session_ds:dlad.Dataset,
|
dicom_session_ds: dlad.Dataset, input_url: urllib.parse.ParseResult
|
||||||
input_url:urllib.parse.ParseResult):
|
):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
dest = pathlib.Path(url.path).basename
|
dest = pathlib.Path(url.path).basename
|
||||||
dicom_session_ds.repo.add_url_to_file(dest, url)
|
dicom_session_ds.repo.add_url_to_file(dest, url)
|
||||||
except Exception:
|
except Exception:
|
||||||
... #TODO: check how things can fail here and deal with it.
|
... # TODO: check how things can fail here and deal with it.
|
||||||
return dest
|
return dest
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def export_to_ria(
|
def export_to_ria(
|
||||||
ds: dlad.Dataset,
|
ds: dlad.Dataset,
|
||||||
ria_url:urllib.parse.ParseResult,
|
ria_url: urllib.parse.ParseResult,
|
||||||
session_metas: dict,
|
session_metas: dict,
|
||||||
):
|
):
|
||||||
ria_name = pathlib.Path(ria_url.path).basename
|
ria_name = pathlib.Path(ria_url.path).basename
|
||||||
ds.create_sibling_ria(
|
ds.create_sibling_ria(
|
||||||
ria_url,
|
ria_url, name=ria_name, alias=session_meta["PatientID"], existing="reconfigure"
|
||||||
name=ria_name,
|
)
|
||||||
alias=session_meta['PatientID'],
|
ds.push(to=ria_name, data="nothing")
|
||||||
existing='reconfigure')
|
ria_sibling_path = pathlib.Path(ds.siblings(name=ria_name)[0]["url"])
|
||||||
ds.push(to=ria_name, data='nothing')
|
archive_path = ria_sibling_path / "archives" / "archive.7z"
|
||||||
ria_sibling_path = pathlib.Path(ds.siblings(name=ria_name)[0]['url'])
|
|
||||||
archive_path = ria_sibling_path / 'archives' / 'archive.7z'
|
|
||||||
ds.export_archive_ora(
|
ds.export_archive_ora(
|
||||||
archive_path,
|
archive_path, opts=[f"-mx{COMPRESSION_LEVEL}"], missing_content="error"
|
||||||
opts=[f'-mx{COMPRESSION_LEVEL}'],
|
)
|
||||||
missing_content='error')
|
ds.repo.fsck(remote=f"{ria_url}-storage", fast=True) # index
|
||||||
ds.repo.fsck(remote=f"{ria_url}-storage", fast=True) #index
|
ds.push(to=ria_name, data="nothing")
|
||||||
ds.push(to=ria_name, data='nothing')
|
|
||||||
|
|
||||||
def export_to_s3(
|
def export_to_s3(
|
||||||
ds: dlad.Dataset,
|
ds: dlad.Dataset,
|
||||||
s3_url:urllib.parse.ParseResult,
|
s3_url: urllib.parse.ParseResult,
|
||||||
session_metas: dict,
|
session_metas: dict,
|
||||||
):
|
):
|
||||||
...
|
...
|
||||||
|
|
@ -294,17 +291,16 @@ def connect_gitlab(debug=False):
|
||||||
|
|
||||||
|
|
||||||
def get_or_create_gitlab_group(gl, group_list):
|
def get_or_create_gitlab_group(gl, group_list):
|
||||||
"""
|
""" """
|
||||||
"""
|
|
||||||
found = False
|
found = False
|
||||||
for keep_groups in reversed(range(len(group_list)+1)):
|
for keep_groups in reversed(range(len(group_list) + 1)):
|
||||||
tmp_repo_path = '/'.join(group_list[0:keep_groups])
|
tmp_repo_path = "/".join(group_list[0:keep_groups])
|
||||||
logging.warning(tmp_repo_path)
|
logging.warning(tmp_repo_path)
|
||||||
gs = gl.groups.list(search=tmp_repo_path)
|
gs = gl.groups.list(search=tmp_repo_path)
|
||||||
for g in gs:
|
for g in gs:
|
||||||
if g.full_path == tmp_repo_path:
|
if g.full_path == tmp_repo_path:
|
||||||
found = True
|
found = True
|
||||||
break
|
break
|
||||||
if found:
|
if found:
|
||||||
break
|
break
|
||||||
for nb_groups in range(keep_groups, len(group_list)):
|
for nb_groups in range(keep_groups, len(group_list)):
|
||||||
|
|
@ -312,34 +308,36 @@ def get_or_create_gitlab_group(gl, group_list):
|
||||||
msg = "Creating group {}".format(group_list[nb_groups])
|
msg = "Creating group {}".format(group_list[nb_groups])
|
||||||
logging.warning(msg)
|
logging.warning(msg)
|
||||||
logging.warning(len(msg) * "=")
|
logging.warning(len(msg) * "=")
|
||||||
g = gl.groups.create({'name': group_list[nb_groups],
|
g = gl.groups.create(
|
||||||
'path': group_list[nb_groups]})
|
{"name": group_list[nb_groups], "path": group_list[nb_groups]}
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
msg = 'Creating group {} from {}'.format(group_list[nb_groups],
|
msg = "Creating group {} from {}".format(group_list[nb_groups], g.name)
|
||||||
g.name)
|
|
||||||
logging.warning(msg)
|
logging.warning(msg)
|
||||||
logging.warning(len(msg) * "=")
|
logging.warning(len(msg) * "=")
|
||||||
g = gl.groups.create({'name': group_list[nb_groups],
|
g = gl.groups.create(
|
||||||
'path': group_list[nb_groups],
|
{
|
||||||
'parent_id': g.id})
|
"name": group_list[nb_groups],
|
||||||
|
"path": group_list[nb_groups],
|
||||||
|
"parent_id": g.id,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return g
|
return g
|
||||||
|
|
||||||
|
|
||||||
def get_or_create_gitlab_project(gl, project_name):
|
def get_or_create_gitlab_project(gl, project_name):
|
||||||
"""
|
""" """
|
||||||
"""
|
|
||||||
if len(project_name) == 1:
|
if len(project_name) == 1:
|
||||||
# Check if exists
|
# Check if exists
|
||||||
p = gl.projects.list(search=project_name[0])
|
p = gl.projects.list(search=project_name[0])
|
||||||
if not p:
|
if not p:
|
||||||
p = gl.projects.create({'name': project_name[0],
|
p = gl.projects.create({"name": project_name[0], "path": project_name[0]})
|
||||||
'path': project_name[0]})
|
|
||||||
return p.id
|
return p.id
|
||||||
else:
|
else:
|
||||||
return p[0].id
|
return p[0].id
|
||||||
|
|
||||||
repo_full_path = '/'.join(project_name)
|
repo_full_path = "/".join(project_name)
|
||||||
|
|
||||||
# Look for exact repo/project:
|
# Look for exact repo/project:
|
||||||
p = gl.projects.list(search=project_name[-1])
|
p = gl.projects.list(search=project_name[-1])
|
||||||
|
|
@ -349,6 +347,5 @@ def get_or_create_gitlab_project(gl, project_name):
|
||||||
return curr_p
|
return curr_p
|
||||||
|
|
||||||
g = get_or_create_gitlab_group(gl, project_name[:-1])
|
g = get_or_create_gitlab_group(gl, project_name[:-1])
|
||||||
p = gl.projects.create({'name': project_name[-1],
|
p = gl.projects.create({"name": project_name[-1], "namespace_id": g.id})
|
||||||
'namespace_id': g.id})
|
|
||||||
return p
|
return p
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue