Skip to content

Instantly share code, notes, and snippets.

@zoharbabin
Last active November 9, 2023 05:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zoharbabin/9fad69a595747037340867d99822c7c7 to your computer and use it in GitHub Desktop.
Save zoharbabin/9fad69a595747037340867d99822c7c7 to your computer and use it in GitHub Desktop.
Utility to manage Kaltura entry metadata, supporting retrieval, creation, and updating of custom metadata profiles and items
"""
Kaltura Metadata Utility Script
This script facilitates the management of custom metadata associated with entries on the Kaltura media platform.
It provides a set of operations that interface with the Kaltura API, enabling users to retrieve, update, and maintain
the consistency of metadata. The script ensures that metadata fields are structured and ordered according to the rules
defined in a Metadata Profile's XSD (XML Schema Definition).
Usage Example:
```bash
python kaltura_metadata_xml_util.py PARTNER_ID "API_ADMIN_SECRET" METADATA_PROFILE_ID "ENTRY_ID"
```
Prerequisites:
- A Kaltura account with administrative privileges.
- Access to the Kaltura admin secret and partner ID for API authentication.
- The Kaltura Python client libraries installed in your Python environment.
- Basic understanding of XML and XSD structures.
Main Features:
- Fetch and parse the XSD of a Metadata Profile to understand the structure of metadata expected.
- Generate a new XML metadata template based on the profile's XSD, which can be used as a starting point for new entries.
- Retrieve existing metadata for an entry and present it in a structured format that aligns with the profile's XSD.
- Validate and add new metadata values to an entry, ensuring that the values meet the constraints set by the profile's XSD.
- Remove empty or redundant metadata elements that do not contain any data.
- Update existing metadata entries with new or modified values, and apply these updates to the Kaltura platform.
Usage and Extension:
- To use the script, one must pass the partner ID, admin secret, metadata profile ID, and the entry ID as arguments.
- The script can be extended to handle bulk updates by looping over multiple entries.
- Additional functions can be implemented to support more complex metadata operations, such as conditional updates, or synchronization with external data sources.
- Users can extend the error handling capabilities to provide more granular feedback.
- To facilitate ease of use, consider adding an interactive command-line interface or integrating with a web-based UI.
"""
import sys
import logging
import argparse
from lxml import etree as ET
from typing import List, Any, Optional
from KalturaClient import KalturaClient, KalturaConfiguration
from KalturaClient.Plugins.Core import KalturaSessionType, KalturaFilterPager
from KalturaClient.Plugins.Metadata import ( KalturaMetadataFilter, KalturaMetadataProfile, KalturaMetadata,
KalturaMetadataObjectType )
from KalturaClient.exceptions import KalturaException
# Configuration Constants
SERVICE_URL = "https://cdnapi-ev.kaltura.com/"
SESSION_TYPE = KalturaSessionType.ADMIN
SESSION_DURATION = 86400
SESSION_PRIVILEGES = '*,disableentitlement'
SCRIPT_USER_ID = "metadata-tester"
XSD_NAMESPACE_URL = 'http://www.w3.org/2001/XMLSchema'
XSD_NAMESPACE = {'xsd': XSD_NAMESPACE_URL}
# Configure logging
logging.basicConfig(level=logging.INFO)
# Helper functions and classes
class MetadataUtils:
@staticmethod
def parse_xsd(xsd_string: str) -> ET.Element:
try:
parser = ET.XMLParser(resolve_entities=False)
return ET.fromstring(xsd_string, parser=parser)
except ET.XMLSyntaxError as e:
logging.error(f"Error parsing XSD: {e}")
raise
@staticmethod
def build_metadata_template(xsd_root: ET.Element) -> ET.Element:
metadata_element = ET.Element('metadata')
for element in xsd_root.findall(".//xsd:element", XSD_NAMESPACE):
if element.get('name') != 'metadata':
ET.SubElement(metadata_element, element.get('name')).text = ''
return metadata_element
@staticmethod
def get_metadata_template_with_values(metadata_item: KalturaMetadata, xsd_root: ET.Element) -> ET.Element:
template_tree = MetadataUtils.build_metadata_template(xsd_root)
item_tree = ET.fromstring(metadata_item.xml)
for elem in template_tree.iter():
corresponding = item_tree.find(f'.//{elem.tag}')
if corresponding is not None:
elem.text = corresponding.text
return template_tree
@staticmethod
def pretty_print_element(element: ET.Element) -> str:
return ET.tostring(element, pretty_print=True, encoding='unicode')
@staticmethod
def is_field_multi_valued(field_name: str, xsd_root: ET.Element) -> bool:
"""
Determines whether a field is multi-valued based on the XSD.
"""
xsd_element = xsd_root.find(f".//xsd:element[@name='{field_name}']", namespaces=XSD_NAMESPACE)
if xsd_element is not None:
return xsd_element.get('maxOccurs') not in (None, '1')
else:
logging.warning(f"XSD does not define field '{field_name}'.")
return False
@staticmethod
def get_restriction_values(field_name: str, xsd_root: ET.Element) -> List[str]:
"""
Retrieves a list of allowed values for a field based on the XSD restrictions.
"""
field_type_element = xsd_root.find(
f".//xsd:element[@name='{field_name}']/xsd:simpleType", namespaces=XSD_NAMESPACE
)
if field_type_element is None:
field_type_element = xsd_root.find(
f".//xsd:element[@name='{field_name}']/../xsd:simpleType", namespaces=XSD_NAMESPACE
)
if field_type_element is not None:
restriction = field_type_element.find('xsd:restriction', namespaces=XSD_NAMESPACE)
if restriction is not None:
return [enum.get('value') for enum in restriction.findall('xsd:enumeration', namespaces=XSD_NAMESPACE)]
return []
@staticmethod
def find_position_for_new_element(metadata_element: ET.Element, field_name: str, xsd_root: ET.Element) -> Optional[int]:
"""
Finds the position where the new element should be inserted in the metadata element.
"""
# Assume the first sequence is where the metadata fields should be ordered
sequence = xsd_root.find('.//xsd:complexType/xsd:sequence', XSD_NAMESPACE)
if sequence is not None:
for index, element in enumerate(sequence.findall('xsd:element', XSD_NAMESPACE)):
if element.get('name') == field_name:
return index
return None
@staticmethod
def remove_empty_elements(parent: ET.Element, field_name: str) -> None:
"""
Removes all empty elements with the given field name from the parent element.
"""
for element in parent.findall(f".//{field_name}"):
if element.text is None or not element.text.strip():
parent.remove(element)
@staticmethod
def add_value_to_metadata(metadata_element: ET.Element, field_name: str, value: Any, xsd_root: ET.Element) -> None:
"""
Adds or updates a value for a specific field within the metadata structure.
"""
multi_valued = MetadataUtils.is_field_multi_valued(field_name, xsd_root)
restriction_values = MetadataUtils.get_restriction_values(field_name, xsd_root)
if restriction_values and value not in restriction_values:
raise ValueError(f"Value '{value}' is not allowed for field '{field_name}' based on the XSD restrictions.")
existing_elements = metadata_element.findall(f".//{field_name}")
if multi_valued:
# For multi-valued fields, we add a new element for each value
new_value_element = ET.Element(field_name)
new_value_element.text = str(value)
position = MetadataUtils.find_position_for_new_element(metadata_element, field_name, xsd_root)
if position is not None:
metadata_element.insert(position, new_value_element)
else:
# If position is None, append at the end
metadata_element.append(new_value_element)
# Optionally, remove empty elements if needed
MetadataUtils.remove_empty_elements(metadata_element, field_name)
else:
if existing_elements:
# For single-valued fields, we update the existing element
existing_elements[0].text = str(value)
# Remove any additional elements that may exist
for elem in existing_elements[1:]:
metadata_element.remove(elem)
else:
# If no element found, create a new one
new_value_element = ET.Element(field_name)
new_value_element.text = str(value)
position = MetadataUtils.find_position_for_new_element(metadata_element, field_name, xsd_root)
if position is not None:
metadata_element.insert(position, new_value_element)
else:
# If position is None, append at the end
metadata_element.append(new_value_element)
return metadata_element
class KalturaMetadataManager:
def __init__(self, partner_id: int, admin_secret: str):
self.client = self._create_client(partner_id, admin_secret)
def _create_client(self, partner_id: int, admin_secret: str) -> KalturaClient:
config = KalturaConfiguration(partner_id)
config.serviceUrl = SERVICE_URL
client = KalturaClient(config)
ks = client.generateSessionV2(
admin_secret, SCRIPT_USER_ID, SESSION_TYPE,
partner_id, SESSION_DURATION, SESSION_PRIVILEGES)
client.setKs(ks)
return client
def fetch_metadata_profile(self, profile_id: int) -> str:
try:
metadata_profile: KalturaMetadataProfile = self.client.metadata.metadataProfile.get(profile_id)
return metadata_profile.xsd
except KalturaException as e:
logging.error(f"Error fetching metadata profile: {e}")
raise
def check_metadata_exists(self, entry_id: str, profile_id: int) -> bool:
filter = KalturaMetadataFilter()
filter.metadataProfileIdEqual = profile_id
filter.metadataObjectTypeEqual = KalturaMetadataObjectType.ENTRY
filter.objectIdEqual = entry_id
pager = KalturaFilterPager()
result = self.client.metadata.metadata.list(filter, pager).objects
return len(result) > 0, result[0] if result else None
def create_or_get_metadata(self, entry_id: str, profile_id: int, xsd_root: ET.Element) -> ET.Element:
metadata_exists, metadata_item = self.check_metadata_exists(entry_id, profile_id)
if metadata_exists and metadata_item:
metadata_xml = MetadataUtils.get_metadata_template_with_values(metadata_item, xsd_root)
else:
metadata_xml = MetadataUtils.build_metadata_template(xsd_root)
self.populate_default_values(metadata_xml, xsd_root)
return metadata_xml
def populate_default_values(self, metadata_xml: ET.Element, xsd_root: ET.Element) -> None:
"""
Populates default values for list types based on XSD enumeration restrictions.
If an empty value is not allowed, the first value in the enumeration is selected.
"""
for element in xsd_root.findall(".//xsd:element", XSD_NAMESPACE):
name = element.get('name')
if name and name != 'metadata':
restriction_values = MetadataUtils.get_restriction_values(name, xsd_root)
if restriction_values and not MetadataUtils.is_field_multi_valued(name, xsd_root):
first_value = restriction_values[0]
metadata_element = metadata_xml.find(f".//{name}")
if metadata_element is not None and (not metadata_element.text or not metadata_element.text.strip()):
metadata_element.text = first_value
def update_metadata(self, metadata_id: int, xml: str) -> KalturaMetadata:
try:
return self.client.metadata.metadata.update(metadata_id, xml)
except KalturaException as e:
logging.error(f"Error updating metadata: {e}")
raise
def add_metadata(self, profile_id: int, object_type: KalturaMetadataObjectType, object_id: str, xml: str) -> KalturaMetadata:
try:
return self.client.metadata.metadata.add(profile_id, object_type, object_id, xml)
except KalturaException as e:
logging.error(f"Error adding metadata: {e}")
raise
def apply_metadata_to_entry(self, entry_id: str, profile_id: int, xml: str) -> KalturaMetadata:
metadata_exists, metadata_item = self.check_metadata_exists(entry_id, profile_id)
if metadata_exists:
return self.update_metadata(metadata_item.id, xml)
else:
return self.add_metadata(profile_id, KalturaMetadataObjectType.ENTRY, entry_id, xml)
def parse_arguments() -> argparse.Namespace:
parser = argparse.ArgumentParser(description='Kaltura Metadata Utility Script')
parser.add_argument('partner_id', type=int, help='Kaltura partner ID')
parser.add_argument('admin_secret', help='Kaltura admin secret')
parser.add_argument('profile_id', type=int, help='Metadata profile ID')
parser.add_argument('entry_id', help='Entry ID to update metadata for')
return parser.parse_args()
def main():
args = parse_arguments()
# instantiate
kaltura_manager = KalturaMetadataManager(args.partner_id, args.admin_secret)
# parse the schema
xsd_string = kaltura_manager.fetch_metadata_profile(args.profile_id)
xsd_root = MetadataUtils.parse_xsd(xsd_string)
# create a metadata template or fetch an existing metadata item xml from the API
metadata_xml = kaltura_manager.create_or_get_metadata(args.entry_id, args.profile_id, xsd_root)
try:
# make updates to specific fields
metadata_xml = MetadataUtils.add_value_to_metadata(metadata_xml, 'Email', 'someone@test.com', xsd_root)
metadata_xml = MetadataUtils.add_value_to_metadata(metadata_xml, 'Email', 'someone@example.com', xsd_root) # will override the previous value
metadata_xml = MetadataUtils.add_value_to_metadata(metadata_xml, 'Format', 'Go-Pro camera', xsd_root)
metadata_xml = MetadataUtils.add_value_to_metadata(metadata_xml, 'Categories', 'Testimonials', xsd_root)
metadata_xml = MetadataUtils.add_value_to_metadata(metadata_xml, 'Categories', 'Nature party', xsd_root)
print("Metadata updated successfully.")
except ValueError as e:
print(f"Error while updating metadata: {e}")
try:
# add or update the metadata item to the entry
updated_metadata = kaltura_manager.apply_metadata_to_entry(args.entry_id, args.profile_id, ET.tostring(metadata_xml, encoding='unicode'))
print(f"Metadata for entry {args.entry_id} has been upsert.")
except KalturaException as e:
print(f"Error while applying metadata to entry: {e}")
# pretty print the xml
pretty_xml = MetadataUtils.pretty_print_element(metadata_xml)
print(pretty_xml)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment