Skip to content

Instantly share code, notes, and snippets.

@jlareck
Last active July 17, 2023 08:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jlareck/809290575540790712ecb27b71cbb8f7 to your computer and use it in GitHub Desktop.
Save jlareck/809290575540790712ecb27b71cbb8f7 to your computer and use it in GitHub Desktop.
Constructing databus client deploy command
import os
import json
import urllib.parse
from datetime import datetime
import argparse
import re
def construct_string_from_json(file_path, data_folder, api_key):
with open(file_path, 'r') as f:
data = json.load(f)
_, _, _, _, _, group, artifact, version = data_folder.split('/')
base_url = f'http://downloads.dbpedia.org/repo/dbpedia/{group}/{artifact}/{version}/'
data["versionid"] = f"https://databus.dbpedia.org/dbpedia/{group}/{artifact}/{version}"
dataset_strings = []
for filename in os.listdir(data_folder):
if filename.endswith(".ttl.bz2"):
lang = filename.split('=')[1].split('.')[0]
encoded_filename = urllib.parse.quote_plus(filename)
dataset_url = f'"{base_url}{encoded_filename}%7Clang={lang}"'
dataset_strings.append(dataset_url)
datasets = ' '.join(dataset_strings)
command = (f'databusclient deploy --versionid {data["versionid"]} '
f'--title "{data["title"]}" '
f'--abstract "{data.get("abstract", "")}" '
f'--description "{data.get("description", "")}" '
f'--license "{data["license"]}" '
f'--apikey {api_key} '
f'{datasets}')
return command
def process_latest_datasets(group_folder_path, api_key):
for artifact in os.listdir(group_folder_path):
artifact_path = os.path.join(group_folder_path, artifact)
if os.path.isdir(artifact_path):
pattern = re.compile(r'^\d{4}.\d{2}.\d{2}$')
date_dirs = [dir_name for dir_name in os.listdir(artifact_path)
if os.path.isdir(os.path.join(artifact_path, dir_name))
and pattern.match(dir_name)]
date_dirs.sort(key=lambda date: datetime.strptime(date, '%Y.%m.%d'), reverse=True)
if date_dirs:
latest_dataset_path = os.path.join(artifact_path, date_dirs[0])
json_files = [file for file in os.listdir(artifact_path) if file.endswith('.json')]
if json_files:
json_file_path = os.path.join(artifact_path, json_files[0])
print(construct_string_from_json(json_file_path, latest_dataset_path, api_key))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('group_folder_path', type=str)
parser.add_argument('apikey', type=str)
args = parser.parse_args()
process_latest_datasets(args.group_folder_path, args.apikey)
{
"title": "persondata dataset 2022.12.01",
"description": "help needed",
"abstract": "help needed",
"license": "http://purl.oclc.org/NET/rdflicense/cc-by3.0"
}
@jlareck
Copy link
Author

jlareck commented Jul 11, 2023

To run the script use command python3 construct_command.py /path/generic api_key

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment