containers/docker/dicom_indexer/indexer/index_dicom.py

527 lines
17 KiB
Python
Raw Normal View History

2024-01-23 21:37:03 +00:00
import os
2024-01-25 16:40:52 +00:00
import pydicom as dicom
2024-01-23 21:37:03 +00:00
import argparse
import pathlib
import urllib.parse
import datalad.api as dlad
import shutil
2024-01-25 16:40:52 +00:00
import gitlab
2024-01-25 18:35:38 +00:00
import tempfile
2024-01-25 20:08:22 +00:00
import logging
2024-01-25 19:10:20 +00:00
import subprocess
2024-01-25 21:38:10 +00:00
import yaml
2024-01-25 18:35:38 +00:00
from contextlib import contextmanager
2024-01-23 21:37:03 +00:00
2024-01-26 19:57:58 +00:00
DEBUG = bool(os.environ.get("DEBUG", False))
2024-01-31 15:31:07 +00:00
if DEBUG:
logging.basicConfig(level=logging.DEBUG)
2024-01-23 21:37:03 +00:00
2024-01-26 19:57:58 +00:00
GITLAB_REMOTE_NAME = os.environ.get("GITLAB_REMOTE_NAME", "origin")
2024-01-25 18:35:38 +00:00
GITLAB_TOKEN = os.environ.get("GITLAB_TOKEN", None)
2024-01-25 20:56:51 +00:00
GITLAB_BOT_USERNAME = os.environ.get("GITLAB_BOT_USERNAME", None)
2024-01-30 16:47:30 +00:00
BIDS_DEV_BRANCH = os.environ.get("BIDS_DEV_BRANCH", "dev")
2024-02-02 15:40:52 +00:00
NI_DATAOPS_GITLAB_ROOT = os.environ.get("NI_DATAOPS_GITLAB_ROOT", "ni-dataops")
2024-01-23 21:37:03 +00:00
2024-01-26 19:57:58 +00:00
S3_REMOTE_DEFAULT_PARAMETERS = [
"type=S3",
"encryption=none",
"autoenable=true",
"port=443",
"protocol=https",
"chunk=1GiB",
"requeststyle=path",
]
2024-01-25 18:35:38 +00:00
# TODO: rewrite for pathlib.Path input
2024-01-25 16:40:52 +00:00
def sort_series(path: pathlib.Path) -> None:
2024-01-23 21:37:03 +00:00
"""Sort series in separate folder
Parameters
----------
path : str
path to dicoms
"""
2024-01-25 16:40:52 +00:00
files = path.glob(os.path.join(path, "*"))
2024-01-23 21:37:03 +00:00
for f in files:
if not os.path.isfile(f):
continue
dic = dicom.read_file(f, stop_before_pixels=True)
# series_number = dic.SeriesNumber
series_instance_uid = dic.SeriesInstanceUID
subpath = os.path.join(path, series_instance_uid)
if not os.path.exists(subpath):
os.mkdir(subpath)
2024-01-25 18:35:38 +00:00
os.rename(f, os.path.join(subpath, f.name))
2024-01-23 21:37:03 +00:00
def _build_arg_parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(
2024-01-24 14:15:36 +00:00
description="dicom_indexer - indexes dicoms into datalad"
)
p.add_argument("input", help="path/url of the dicom.")
2024-01-23 21:37:03 +00:00
p.add_argument(
2024-01-24 14:15:36 +00:00
"--gitlab-url",
type=str,
2024-01-25 16:40:52 +00:00
default=os.environ.get("GITLAB_SERVER", None),
2024-01-24 14:15:36 +00:00
help="http(s) url to the gitlab server where to push repos",
)
2024-01-23 21:37:03 +00:00
p.add_argument(
2024-01-24 14:15:36 +00:00
"--gitlab-group-template",
2024-01-25 20:08:22 +00:00
default="{ReferringPhysicianName}/{StudyDescription}",
2024-01-24 14:15:36 +00:00
type=str,
help="string with placeholder for dicom tags",
)
2024-01-25 16:40:52 +00:00
p.add_argument(
"--session-name-tag",
default="PatientName",
type=str,
help="dicom tags that contains the name of the session",
)
2024-01-24 14:15:36 +00:00
p.add_argument("--storage-remote", help="url to the datalad remote")
2024-01-23 21:37:03 +00:00
p.add_argument(
"--sort-series",
type=bool,
default=True,
help="sort dicom series in separate folders",
)
p.add_argument(
"--fake-dates",
action="store_true",
help="use fake dates for datalad dataset",
)
2024-01-25 16:40:52 +00:00
p.add_argument(
"--p7z-opts",
type=str,
default="-mx5 -ms=off",
help="option for 7z generated archives",
)
2024-01-23 21:37:03 +00:00
return p
2024-01-24 14:15:36 +00:00
def main() -> None:
2024-01-23 21:37:03 +00:00
parser = _build_arg_parser()
args = parser.parse_args()
2024-01-25 20:56:51 +00:00
if not GITLAB_REMOTE_NAME:
raise RuntimeError("missing GITLAB_REMOTE_NAME env var")
if not GITLAB_TOKEN:
raise RuntimeError("missing GITLAB_TOKEN env var")
if not GITLAB_BOT_USERNAME:
raise RuntimeError("missing GITLAB_BOT_USERNAME env var")
2024-01-23 21:37:03 +00:00
input = urllib.parse.urlparse(args.input)
output_remote = urllib.parse.urlparse(args.storage_remote)
2024-01-25 16:40:52 +00:00
gitlab_url = urllib.parse.urlparse(args.gitlab_url)
2024-01-23 21:37:03 +00:00
2024-01-25 16:40:52 +00:00
with index_dicoms(
2024-01-23 21:37:03 +00:00
input,
2024-01-25 18:35:38 +00:00
sort_series=args.sort_series,
fake_dates=args.fake_dates,
p7z_opts=args.p7z_opts,
2024-01-25 16:40:52 +00:00
) as dicom_session_ds:
session_metas = extract_session_metas(dicom_session_ds)
2024-01-25 18:35:38 +00:00
if (
not input.scheme
or input.scheme == "file"
or args.force_export
and output_remote
):
2024-01-26 16:16:51 +00:00
export_data(
dicom_session_ds,
output_remote,
dicom_session_tag=args.session_name_tag,
session_metas=session_metas,
)
2024-01-25 16:40:52 +00:00
2024-01-25 18:35:38 +00:00
setup_gitlab_repos(
2024-01-25 16:40:52 +00:00
dicom_session_ds,
gitlab_url=gitlab_url,
2024-01-25 18:35:38 +00:00
dicom_session_tag=args.session_name_tag,
session_metas=session_metas,
2024-01-25 20:56:51 +00:00
gitlab_group_template=args.gitlab_group_template,
2024-01-25 16:40:52 +00:00
)
2024-01-23 21:37:03 +00:00
2024-01-24 14:15:36 +00:00
2024-01-25 18:35:38 +00:00
@contextmanager
2024-01-25 16:40:52 +00:00
def index_dicoms(
2024-01-24 14:15:36 +00:00
input: urllib.parse.ParseResult,
2024-01-23 21:37:03 +00:00
sort_series: bool,
fake_dates: bool,
p7z_opts: str,
2024-01-25 16:40:52 +00:00
) -> dlad.Dataset:
2024-01-24 14:15:36 +00:00
"""Process incoming dicoms into datalad repo"""
2024-01-25 16:40:52 +00:00
2024-01-26 19:57:58 +00:00
with tempfile.TemporaryDirectory(delete=not DEBUG) as tmpdirname:
2024-01-23 21:37:03 +00:00
dicom_session_ds = dlad.create(tmpdirname, fake_dates=fake_dates)
2024-01-24 14:15:36 +00:00
if not input.scheme or input.scheme == "file":
2024-01-26 16:16:51 +00:00
archive = import_local_data(
2024-01-23 21:37:03 +00:00
dicom_session_ds,
pathlib.Path(input.path),
sort_series=sort_series,
p7z_opts=p7z_opts,
)
2024-01-24 14:15:36 +00:00
elif input.scheme in ["http", "https", "s3"]:
2024-01-26 16:16:51 +00:00
archive = import_remote_data(dicom_session_ds, input_url)
2024-01-23 21:37:03 +00:00
# index dicoms files
2024-01-25 18:35:38 +00:00
dlad.add_archive_content(
2024-01-26 16:16:51 +00:00
archive,
2024-01-25 18:35:38 +00:00
dataset=dicom_session_ds,
2024-01-23 21:37:03 +00:00
strip_leading_dirs=True,
commit=False,
)
# cannot pass message above so commit now
2024-01-26 16:16:51 +00:00
dicom_session_ds.save(message=f"index dicoms from archive {archive}") #
2024-01-23 21:37:03 +00:00
# optimize git index after large import
2024-01-24 14:15:36 +00:00
dicom_session_ds.repo.gc() # aggressive by default
2024-01-25 16:40:52 +00:00
yield dicom_session_ds
2024-01-23 21:37:03 +00:00
2024-01-25 16:40:52 +00:00
def export_data(
dicom_session_ds: dlad.Dataset,
output_remote: urllib.parse.ParseResult,
2024-01-26 16:16:51 +00:00
dicom_session_tag: str,
2024-01-25 16:40:52 +00:00
session_metas: dict,
2024-01-25 20:56:51 +00:00
) -> None:
2024-01-26 16:16:51 +00:00
if "ria" in output_remote.scheme:
export_to_ria(
dicom_session_ds,
output_remote,
dicom_session_tag=dicom_session_tag,
session_metas=session_metas,
)
2024-01-25 16:40:52 +00:00
elif output_remote.scheme == "s3":
export_to_s3(dicom_session_ds, output_remote, session_metas)
2024-01-23 21:37:03 +00:00
2024-01-31 19:38:49 +00:00
def set_bot_privileges(gitlab_conn: gitlab.Gitlab, gitlab_group_path: pathlib.Path) -> None:
2024-01-26 19:57:58 +00:00
# add maint permissions for the dicom bot user on the study repos
study_group = get_or_create_gitlab_group(gitlab_conn, gitlab_group_path)
bot_user = gitlab_conn.users.list(username=GITLAB_BOT_USERNAME)
if not bot_user:
raise RuntimeError(
f"bot_user: {GITLAB_BOT_USERNAME} does not exists in gitlab instance"
)
bot_user = bot_user[0]
if not any(m.id == bot_user.id for m in study_group.members.list()):
study_group.members.create(
{
"user_id": bot_user.id,
"access_level": gitlab.const.AccessLevel.MAINTAINER,
}
)
2024-01-23 21:37:03 +00:00
def setup_gitlab_repos(
dicom_session_ds: dlad.Dataset,
gitlab_url: urllib.parse.ParseResult,
session_metas: dict,
2024-01-25 18:35:38 +00:00
dicom_session_tag: str,
2024-01-25 20:08:22 +00:00
gitlab_group_template: str,
2024-01-25 16:40:52 +00:00
) -> None:
gitlab_conn = connect_gitlab(gitlab_url)
2024-01-23 21:37:03 +00:00
2024-01-26 16:56:49 +00:00
# generate gitlab group/repo paths
2024-01-31 15:31:07 +00:00
gitlab_group_path = pathlib.Path(gitlab_group_template.format(**session_metas))
dicom_sourcedata_path = gitlab_group_path / "sourcedata/dicoms"
dicom_session_path = dicom_sourcedata_path / session_metas["StudyInstanceUID"]
dicom_study_path = dicom_sourcedata_path / "study"
2024-01-23 21:37:03 +00:00
2024-01-26 16:56:49 +00:00
# create repo (should not exists unless rerun)
2024-01-25 20:08:22 +00:00
dicom_session_repo = get_or_create_gitlab_project(gitlab_conn, dicom_session_path)
2024-01-25 20:56:51 +00:00
dicom_session_ds.siblings(
2024-01-24 14:15:36 +00:00
action="configure", # allow to overwrite existing config
2024-01-23 21:37:03 +00:00
name=GITLAB_REMOTE_NAME,
2024-01-24 14:15:36 +00:00
url=dicom_session_repo._attrs["ssh_url_to_repo"],
2024-01-23 21:37:03 +00:00
)
2024-01-26 19:57:58 +00:00
"""
# prevent warnings
dicom_session_ds.config.add(
f"remote.{GITLAB_REMOTE_NAME}.annex-ignore",
value='false',
scope='local'
)"""
set_bot_privileges(gitlab_conn, gitlab_group_path)
2024-01-26 16:56:49 +00:00
# and push
2024-01-31 15:31:07 +00:00
dicom_session_ds.push(to=GITLAB_REMOTE_NAME)
2024-01-23 21:37:03 +00:00
2024-01-25 16:40:52 +00:00
## add the session to the dicom study repo
2024-01-25 20:56:51 +00:00
dicom_study_repo = get_or_create_gitlab_project(gitlab_conn, dicom_study_path)
2024-01-26 19:57:58 +00:00
with tempfile.TemporaryDirectory(delete=not DEBUG) as tmpdir:
2024-01-25 21:38:10 +00:00
dicom_study_ds = dlad.install(
2024-01-24 14:15:36 +00:00
source=dicom_study_repo._attrs["ssh_url_to_repo"],
2024-01-23 21:37:03 +00:00
path=tmpdir,
2024-01-24 14:15:36 +00:00
)
2024-01-26 19:57:58 +00:00
"""
# prevent warnings when pushing
dicom_study_ds.config.add(
f"remote.origin.annex-ignore",
value='false',
scope='local'
)"""
2024-01-23 21:37:03 +00:00
if dicom_study_ds.repo.get_hexsha() is None or dicom_study_ds.id is None:
dicom_study_ds.create(force=True)
# add default study DS structure.
2024-01-25 16:40:52 +00:00
init_dicom_study(dicom_study_ds, gitlab_group_path)
2024-01-23 21:37:03 +00:00
# initialize BIDS project
2024-01-25 20:08:22 +00:00
init_bids(gitlab_conn, dicom_study_repo, gitlab_group_path)
2024-01-25 16:40:52 +00:00
# create subgroup for QC and derivatives repos
2024-01-31 19:38:49 +00:00
get_or_create_gitlab_group(gitlab_conn, gitlab_group_path / "derivatives")
get_or_create_gitlab_group(gitlab_conn, gitlab_group_path / "qc")
2024-01-23 21:37:03 +00:00
dicom_study_ds.install(
2024-01-24 14:15:36 +00:00
source=dicom_session_repo._attrs["ssh_url_to_repo"],
2024-01-25 18:35:38 +00:00
path=session_metas.get(dicom_session_tag),
2024-01-24 14:15:36 +00:00
)
2024-01-23 21:37:03 +00:00
2024-01-26 16:16:51 +00:00
# Push to gitlab
2024-01-26 19:57:58 +00:00
dicom_study_ds.push(to="origin")
2024-01-23 21:37:03 +00:00
2024-01-25 16:40:52 +00:00
def init_bids(
gl: gitlab.Gitlab,
dicom_study_repo: dlad.Dataset,
2024-01-31 15:31:07 +00:00
gitlab_group_path: pathlib.Path,
2024-01-25 16:40:52 +00:00
) -> None:
2024-01-31 15:31:07 +00:00
bids_project_repo = get_or_create_gitlab_project(gl, gitlab_group_path / "bids")
2024-01-25 16:40:52 +00:00
with tempfile.TemporaryDirectory() as tmpdir:
2024-01-30 16:47:30 +00:00
bids_project_ds = dlad.install(
2024-01-25 16:40:52 +00:00
source=bids_project_repo._attrs["ssh_url_to_repo"],
path=tmpdir,
)
bids_project_ds.create(force=True)
shutil.copytree("repo_templates/bids", bids_project_ds.path, dirs_exist_ok=True)
2024-02-02 15:40:52 +00:00
write_ci_env(bids_project_ds, gitlab_group_path)
2024-01-25 16:40:52 +00:00
bids_project_ds.save(path=".", message="init structure and pipelines")
bids_project_ds.install(
path="sourcedata/dicoms",
source=dicom_study_repo._attrs["ssh_url_to_repo"],
)
# TODO: setup sensitive / non-sensitive S3 buckets
bids_project_ds.push(to="origin")
# create dev branch and push for merge requests
2024-01-30 16:47:30 +00:00
bids_project_ds.repo.checkout(BIDS_DEV_BRANCH, ["-b"])
2024-01-25 16:40:52 +00:00
bids_project_ds.push(to="origin")
2024-01-26 16:16:51 +00:00
# set protected branches
bids_project_repo.protectedbranches.create(data={"name": "convert/*"})
bids_project_repo.protectedbranches.create(data={"name": "dev"})
2024-01-25 16:40:52 +00:00
def init_dicom_study(
dicom_study_ds: dlad.Dataset,
gitlab_group_path: pathlib.Path,
2024-01-25 16:40:52 +00:00
) -> None:
shutil.copytree(
"repo_templates/dicom_study", dicom_study_ds.path, dirs_exist_ok=True
)
2024-02-02 15:40:52 +00:00
write_ci_env(dicom_study_ds, gitlab_group_path)
dicom_study_ds.save(path=".", message="init structure and pipelines")
dicom_study_ds.push(to="origin")
def write_ci_env(
ds: dlad.Dataset
gitlab_group_path: pathlib.Path
):
2024-01-25 16:40:52 +00:00
env = {
"variables": {
"STUDY_PATH": str(gitlab_group_path),
"BIDS_PATH": str(gitlab_group_path / "bids"),
2024-02-02 15:40:52 +00:00
"NI_DATAOPS_GITLAB_ROOT": NI_DATAOPS_GITLAB_ROOT,
2024-01-25 16:40:52 +00:00
}
}
2024-02-02 15:40:52 +00:00
with (pathlib.Path(ds.path) / ".ci-env.yml").open("w") as outfile:
2024-01-25 16:40:52 +00:00
yaml.dump(env, outfile, default_flow_style=False)
2024-01-23 21:37:03 +00:00
SESSION_META_KEYS = [
2024-01-24 14:15:36 +00:00
"StudyInstanceUID",
"PatientID",
"PatientName",
"ReferringPhysicianName",
"StudyDate",
"StudyDescription",
2024-01-23 21:37:03 +00:00
]
2024-01-24 14:15:36 +00:00
2024-01-25 16:40:52 +00:00
def extract_session_metas(dicom_session_ds: dlad.Dataset) -> dict:
2024-01-25 20:08:22 +00:00
all_files = dicom_session_ds.repo.get_files()
2024-01-23 21:37:03 +00:00
for f in all_files:
try:
2024-01-25 20:56:51 +00:00
dic = dicom.read_file(dicom_session_ds.pathobj / f, stop_before_pixels=True)
2024-01-25 20:08:22 +00:00
except Exception as e: # TODO: what exception occurs when non-dicom ?
2024-01-23 21:37:03 +00:00
continue
# return at first dicom found
2024-01-25 20:56:51 +00:00
return {k: str(getattr(dic, k)).replace("^", "/") for k in SESSION_META_KEYS}
raise InputError("no dicom found")
2024-01-23 21:37:03 +00:00
def import_local_data(
dicom_session_ds: dlad.Dataset,
input_path: pathlib.Path,
2024-01-24 14:15:36 +00:00
sort_series: bool = True,
2024-01-26 16:56:49 +00:00
p7z_opts: str = "-mx5 -ms=off",
2024-01-23 21:37:03 +00:00
):
2024-01-25 18:35:38 +00:00
dest = input_path.name
2024-01-23 21:37:03 +00:00
if input_path.is_dir():
2024-01-24 14:15:36 +00:00
dest = dest + ".7z"
2024-01-23 21:37:03 +00:00
# create 7z archive with 1block/file parameters
2024-01-26 16:56:49 +00:00
cmd = ["7z", "u", str(dest), str(input_path)] + p7z_opts.split()
subprocess.run(cmd, cwd=dicom_session_ds.path)
2024-01-23 21:37:03 +00:00
elif input_path.is_file():
2024-01-25 18:35:38 +00:00
dest = dicom_session_ds.pathobj / dest
2024-01-24 14:15:36 +00:00
try: # try hard-linking to avoid copying
2024-01-23 21:37:03 +00:00
os.link(str(input_path), str(dest))
2024-01-24 14:15:36 +00:00
except OSError: # fallback if hard-linking not supported
2024-01-23 21:37:03 +00:00
shutil.copyfile(str(input_path), str(dest))
2024-01-24 14:15:36 +00:00
dicom_session_ds.save(dest, message="add dicoms archive")
2024-01-23 21:37:03 +00:00
return dest
def import_remote_data(
2024-01-24 14:15:36 +00:00
dicom_session_ds: dlad.Dataset, input_url: urllib.parse.ParseResult
):
2024-01-23 21:37:03 +00:00
try:
2024-01-25 18:35:38 +00:00
dest = pathlib.Path(url.path).name
2024-01-23 21:37:03 +00:00
dicom_session_ds.repo.add_url_to_file(dest, url)
except Exception:
2024-01-24 14:15:36 +00:00
... # TODO: check how things can fail here and deal with it.
2024-01-23 21:37:03 +00:00
return dest
def export_to_ria(
ds: dlad.Dataset,
2024-01-24 14:15:36 +00:00
ria_url: urllib.parse.ParseResult,
2024-01-26 16:16:51 +00:00
dicom_session_tag: str,
2024-01-23 21:37:03 +00:00
session_metas: dict,
2024-01-26 16:16:51 +00:00
export_ria_archive: bool = False,
2024-01-26 18:23:15 +00:00
ria_archive_7zopts: str = "-mx5 -ms=off",
2024-01-23 21:37:03 +00:00
):
2024-01-25 18:35:38 +00:00
ria_name = pathlib.Path(ria_url.path).name
2024-01-23 21:37:03 +00:00
ds.create_sibling_ria(
2024-01-26 16:16:51 +00:00
ria_url.geturl(),
name=ria_name,
alias=session_metas[dicom_session_tag],
existing="reconfigure",
new_store_ok=True,
2024-01-24 14:15:36 +00:00
)
ds.push(to=ria_name, data="nothing")
2024-01-26 16:16:51 +00:00
# keep the old ria-archive before add-archive-content, not used for now
if export_ria_archive:
ria_sibling_path = pathlib.Path(ds.siblings(name=ria_name)[0]["url"])
archive_path = ria_sibling_path / "archives" / "archive.7z"
ds.export_archive_ora(
2024-01-26 18:23:15 +00:00
archive_path,
opts=ria_archive_7zopts.split(),
missing_content="error",
2024-01-26 16:16:51 +00:00
)
ds.repo.fsck(remote=f"{ria_url}-storage", fast=True) # index
ds.push(to=ria_name, data="nothing")
2024-01-24 14:15:36 +00:00
2024-01-23 21:37:03 +00:00
def export_to_s3(
ds: dlad.Dataset,
2024-01-24 14:15:36 +00:00
s3_url: urllib.parse.ParseResult,
2024-01-23 21:37:03 +00:00
session_metas: dict,
):
2024-01-26 18:23:15 +00:00
# TODO: check if we can reuse a single bucket (or per study) with fileprefix
2024-01-23 21:37:03 +00:00
# git-annex initremote remotename ...
2024-01-26 18:23:15 +00:00
remote_name = s3_url.hostname
2024-01-30 18:43:32 +00:00
_, bucket_name, *fileprefix = pathlib.Path(s3_url.path).parts
fileprefix.append(session_metas["StudyInstanceUID"] + "/")
2024-01-30 18:43:32 +00:00
ds.repo.init_remote(
2024-01-26 18:23:15 +00:00
remote_name,
2024-01-26 19:57:58 +00:00
S3_REMOTE_DEFAULT_PARAMETERS
+ [
2024-01-26 18:23:15 +00:00
f"host={s3_url.hostname}",
f"bucket={bucket_name}",
2024-01-30 18:43:32 +00:00
f"fileprefix={'/'.join(fileprefix)}",
2024-01-26 18:23:15 +00:00
],
)
ds.repo.set_preferred_content(
2024-01-30 18:43:32 +00:00
"wanted",
"include=*7z or include=*.tar.gz or include=*zip",
remote=remote_name,
2024-01-26 18:23:15 +00:00
)
ds.push(to=remote_name, data="auto")
# It does not push the data to the S3 unless I set data="anything" which pushes everyhing including the deflated archived data
2024-01-23 21:37:03 +00:00
2024-01-25 16:40:52 +00:00
def connect_gitlab(
gitlab_url: urllib.parse.ParseResult, debug: bool = False
) -> gitlab.Gitlab:
2024-01-23 21:37:03 +00:00
"""
Connection to Gitlab
"""
2024-01-25 18:35:38 +00:00
gl = gitlab.Gitlab(gitlab_url.geturl(), private_token=GITLAB_TOKEN)
2024-01-23 21:37:03 +00:00
if debug:
gl.enable_debug()
gl.auth()
return gl
2024-01-25 16:40:52 +00:00
def get_or_create_gitlab_group(
gl: gitlab.Gitlab,
2024-01-31 15:31:07 +00:00
group_path: pathlib.Path,
2024-01-25 16:40:52 +00:00
):
"""fetch or create a gitlab group"""
2024-01-31 15:31:07 +00:00
group_list = group_path.parts
2024-01-23 21:37:03 +00:00
found = False
2024-01-24 14:15:36 +00:00
for keep_groups in reversed(range(len(group_list) + 1)):
tmp_repo_path = "/".join(group_list[0:keep_groups])
2024-01-31 15:31:07 +00:00
logging.debug(tmp_repo_path)
2024-01-23 21:37:03 +00:00
gs = gl.groups.list(search=tmp_repo_path)
for g in gs:
2024-01-24 14:15:36 +00:00
if g.full_path == tmp_repo_path:
found = True
break
2024-01-23 21:37:03 +00:00
if found:
break
for nb_groups in range(keep_groups, len(group_list)):
if nb_groups == 0:
2024-01-31 15:31:07 +00:00
logging.debug(f"Creating group {group_list[nb_groups]}")
2024-01-24 14:15:36 +00:00
g = gl.groups.create(
{"name": group_list[nb_groups], "path": group_list[nb_groups]}
)
2024-01-23 21:37:03 +00:00
else:
2024-01-31 15:31:07 +00:00
logging.debug(f"Creating group {group_list[nb_groups]} from {g.name}")
2024-01-24 14:15:36 +00:00
g = gl.groups.create(
{
"name": group_list[nb_groups],
"path": group_list[nb_groups],
"parent_id": g.id,
}
)
2024-01-23 21:37:03 +00:00
return g
2024-01-31 15:31:07 +00:00
def get_or_create_gitlab_project(gl: gitlab.Gitlab, project_path: pathlib.Path):
2024-01-25 16:40:52 +00:00
"""fetch or create a gitlab repo"""
2024-01-31 15:31:07 +00:00
project_name = project_path.parts
2024-01-23 21:37:03 +00:00
# Look for exact repo/project:
p = gl.projects.list(search=project_name[-1])
if p:
for curr_p in p:
2024-01-31 15:31:07 +00:00
if curr_p.path_with_namespace == str(project_path):
2024-01-23 21:37:03 +00:00
return curr_p
2024-01-31 15:31:07 +00:00
g = get_or_create_gitlab_group(gl, project_path.parent)
logging.debug(f"Creating project {project_name[-1]} from {g.name}")
2024-01-24 14:15:36 +00:00
p = gl.projects.create({"name": project_name[-1], "namespace_id": g.id})
2024-01-23 21:37:03 +00:00
return p
2024-01-25 18:35:38 +00:00
if __name__ == "__main__":
main()