Last active
July 17, 2023 08:13
-
-
Save jlareck/809290575540790712ecb27b71cbb8f7 to your computer and use it in GitHub Desktop.
Constructing databus client deploy command
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
import urllib.parse | |
from datetime import datetime | |
import argparse | |
import re | |
def construct_string_from_json(file_path, data_folder, api_key): | |
with open(file_path, 'r') as f: | |
data = json.load(f) | |
_, _, _, _, _, group, artifact, version = data_folder.split('/') | |
base_url = f'http://downloads.dbpedia.org/repo/dbpedia/{group}/{artifact}/{version}/' | |
data["versionid"] = f"https://databus.dbpedia.org/dbpedia/{group}/{artifact}/{version}" | |
dataset_strings = [] | |
for filename in os.listdir(data_folder): | |
if filename.endswith(".ttl.bz2"): | |
lang = filename.split('=')[1].split('.')[0] | |
encoded_filename = urllib.parse.quote_plus(filename) | |
dataset_url = f'"{base_url}{encoded_filename}%7Clang={lang}"' | |
dataset_strings.append(dataset_url) | |
datasets = ' '.join(dataset_strings) | |
command = (f'databusclient deploy --versionid {data["versionid"]} ' | |
f'--title "{data["title"]}" ' | |
f'--abstract "{data.get("abstract", "")}" ' | |
f'--description "{data.get("description", "")}" ' | |
f'--license "{data["license"]}" ' | |
f'--apikey {api_key} ' | |
f'{datasets}') | |
return command | |
def process_latest_datasets(group_folder_path, api_key): | |
for artifact in os.listdir(group_folder_path): | |
artifact_path = os.path.join(group_folder_path, artifact) | |
if os.path.isdir(artifact_path): | |
pattern = re.compile(r'^\d{4}.\d{2}.\d{2}$') | |
date_dirs = [dir_name for dir_name in os.listdir(artifact_path) | |
if os.path.isdir(os.path.join(artifact_path, dir_name)) | |
and pattern.match(dir_name)] | |
date_dirs.sort(key=lambda date: datetime.strptime(date, '%Y.%m.%d'), reverse=True) | |
if date_dirs: | |
latest_dataset_path = os.path.join(artifact_path, date_dirs[0]) | |
json_files = [file for file in os.listdir(artifact_path) if file.endswith('.json')] | |
if json_files: | |
json_file_path = os.path.join(artifact_path, json_files[0]) | |
print(construct_string_from_json(json_file_path, latest_dataset_path, api_key)) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('group_folder_path', type=str) | |
parser.add_argument('apikey', type=str) | |
args = parser.parse_args() | |
process_latest_datasets(args.group_folder_path, args.apikey) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"title": "persondata dataset 2022.12.01", | |
"description": "help needed", | |
"abstract": "help needed", | |
"license": "http://purl.oclc.org/NET/rdflicense/cc-by3.0" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
To run the script use command
python3 construct_command.py /path/generic api_key