pcattori/continuous-operation-of-categorization-project.py

## continuous-operation-of-categorization-project.py
import unifyapi.v1 as api

unify = api.Client() # can specify host/port if needed, but by default localhost/9100

project = unify.projects.named('Automotive parts classification 2018') # some collection models offer alternative ways to lookup instances, e.g. by name

source_datasets = [
  'car_parts_2018.csv',
  'SteeringWheelSupplierINC_parts.csv',
]
# 1. update dataset(s)
for source_dataset in source_datasets:
  dataset = unify.datasets(source_dataset.name) # providing an ID to a collection returns the model identified by that ID; in this case dataset IDs are just their names
  dataset.update_records() # ignore returned `Operation` model; methods that trigger Unify tasks/jobs are synchronous by default

# DEV NOTE: we could implement project.source_datasets to make this even easier
# for dataset in project.source_datasets():
#   dataset.update_records()

# 2. update unified dataset
operation = project.unified_dataset().refresh(async=True) # we can opt-in to asynchronous calls if we want to
print(f'Started operation with ID: {operation.id}')
# 3. wait for operation
operation.wait()

categorizations = project.categorizations
# DEV NOTE: which is best? `project.model.refresh()` vs `project.categorizations.model.refresh()` or `project.categorization_model.refresh()`
# DEV NOTE (continued): in other words, is `categorizations` a meaningful resource?

# 4. refresh model
categorizations.model.refresh()

# 5. wait for operation
# didn't opt-in to async for model refresh, so don't need to wait for operation since categorizations.model.refresh ran synchronously

# 6. refresh categorizations
categorizations.refresh()

# 7. wait for operation
# again, we ran refresh in synchronous mode (by default) so don't need to wait

# 8. refresh dataset export
# DEV NOTE: how do we currently support finding out the dataset ID for the export dataset on a categorization project
project.export.refresh() # ???


## description.md

      
    Raw
  

              description.md
            
          
    Goal is to support the "Continous operation of a categorization project" workflow.
In this design, the client interface is model-centric, meaning that the client's methods return models (as opposed to POJO or an API request builder). Those models' methods also return models.
Models are just objects with properties to access data owned by that model (e.g. Project model has a name property) and methods to get other models (e.g. Project model has a unified_dataset() method that returns a Dataset model).
Models can also have methods for trigger tasks/jobs in Unify. These methods will return Operation models. By default, these methods are synchronous, so you might not need access to the returned operation model.

  
## uncommented.py
import unifyapi.v1 as api

unify = api.Client()
project = unify.projects.named('Automotive parts classification 2018')

source_datasets = [
  'car_parts_2018.csv',
  'SteeringWheelSupplierINC_parts.csv',
]

# 1. update dataset(s)
for source_dataset in source_datasets:
  dataset = unify.datasets(source_dataset.name)
  dataset.update_records()
# 2. update unified dataset
operation = project.unified_dataset().refresh(async=True)
# 3. wait for operation
operation.wait()
categorizations = project.categorizations
# 4. refresh model
categorizations.model.refresh()
# 5. wait for operation (N/A)
# 6. refresh categorizations
categorizations.refresh()
# 7. wait for operation (N/A)
# 8. refresh dataset export
project.export.refresh()
	import unifyapi.v1 as api

	unify = api.Client() # can specify host/port if needed, but by default localhost/9100

	project = unify.projects.named('Automotive parts classification 2018') # some collection models offer alternative ways to lookup instances, e.g. by name

	source_datasets = [
	'car_parts_2018.csv',
	'SteeringWheelSupplierINC_parts.csv',
	]
	# 1. update dataset(s)
	for source_dataset in source_datasets:
	dataset = unify.datasets(source_dataset.name) # providing an ID to a collection returns the model identified by that ID; in this case dataset IDs are just their names
	dataset.update_records() # ignore returned `Operation` model; methods that trigger Unify tasks/jobs are synchronous by default

	# DEV NOTE: we could implement project.source_datasets to make this even easier
	# for dataset in project.source_datasets():
	# dataset.update_records()

	# 2. update unified dataset
	operation = project.unified_dataset().refresh(async=True) # we can opt-in to asynchronous calls if we want to
	print(f'Started operation with ID: {operation.id}')
	# 3. wait for operation
	operation.wait()

	categorizations = project.categorizations
	# DEV NOTE: which is best? `project.model.refresh()` vs `project.categorizations.model.refresh()` or `project.categorization_model.refresh()`
	# DEV NOTE (continued): in other words, is `categorizations` a meaningful resource?

	# 4. refresh model
	categorizations.model.refresh()

	# 5. wait for operation
	# didn't opt-in to async for model refresh, so don't need to wait for operation since categorizations.model.refresh ran synchronously

	# 6. refresh categorizations
	categorizations.refresh()

	# 7. wait for operation
	# again, we ran refresh in synchronous mode (by default) so don't need to wait

	# 8. refresh dataset export
	# DEV NOTE: how do we currently support finding out the dataset ID for the export dataset on a categorization project
	project.export.refresh() # ???
	import unifyapi.v1 as api

	unify = api.Client()
	project = unify.projects.named('Automotive parts classification 2018')

	source_datasets = [
	'car_parts_2018.csv',
	'SteeringWheelSupplierINC_parts.csv',
	]

	# 1. update dataset(s)
	for source_dataset in source_datasets:
	dataset = unify.datasets(source_dataset.name)
	dataset.update_records()
	# 2. update unified dataset
	operation = project.unified_dataset().refresh(async=True)
	# 3. wait for operation
	operation.wait()
	categorizations = project.categorizations
	# 4. refresh model
	categorizations.model.refresh()
	# 5. wait for operation (N/A)
	# 6. refresh categorizations
	categorizations.refresh()
	# 7. wait for operation (N/A)
	# 8. refresh dataset export
	project.export.refresh()