Skip to content

Instantly share code, notes, and snippets.

@ZiiSolutions
Created July 6, 2017 16:08
Show Gist options
  • Save ZiiSolutions/42ab2e8aa6bf92d004cb1fb3d4263b58 to your computer and use it in GitHub Desktop.
Save ZiiSolutions/42ab2e8aa6bf92d004cb1fb3d4263b58 to your computer and use it in GitHub Desktop.
Old getCollectionFromSubject() CAPI
'use strict';
const config = require('konfig')({ path: 'config' });
const util = require('util');
const _ = require('lodash');
const RequestServiceDiscovery = require('request-service-discovery');
const QueryBuilder = require('elastibuild');
const PubStatuses = require('../statics/pubstatuses');
const Sorts = require('../statics/sorts');
// Custom Errors
const ContentRepository = require('../errors/content-repository-error');
const BadRequestError = require('../errors/bad-request-error');
const NoContentError = require('../errors/no-content-error');
// Statics
const ES_INDEX = "content-repository";
const ES_TYPE = "items";
const BAD_REQUEST_STATUS_CODE = 400;
const NOT_FOUND_STATUS_CODE = 404;
const SERVICE_PROFILE = 'paservice';
const DEFAULT_REQ_TIMEOUT_MS = 10000;
const DEFAULT_REQ_RETRIES = 3;
const DEFAULT_VERBOSE_MODE = false;
const DEFAULT_MIN_SCORE = 0.20;
const DEFAULT_MIN_RETRY_TIMEOUT_MS = 25;
const DEFAULT_MAX_RETRY_TIMEOUT_MS = 500;
const DEFAULT_GEO_SEARCH_FIELDS = [ "place.geometry_location", "person.place.geometry_location" ];
const DEFAULT_GEO_DISTANCE_KM = 30;
const DEFAULT_REQ_HEADERS = { headers: { "Content-Type": "application/json" } };
// Functionality Flags
const ENABLE_EXPERIMENTAL_SEARCH = true;
const ENABLE_ENTITLE_USABLE_ITEMS_ONLY = true;
const ITEM = 'item';
// Singleton Instance
let INSTANCE = null;
class ContentService {
/**
* Constructor
*/
constructor() {
if (!INSTANCE){
INSTANCE = this;
}
this.client = new RequestServiceDiscovery({
connectionString: config.app.zookeeper.connectionString,
basePath: config.app.contentRepository.basePath,
serviceName: config.app.contentRepository.server.name,
timeout: config.app.contentRepository.timeout || DEFAULT_REQ_TIMEOUT_MS,
retries: config.app.contentRepository.retries || DEFAULT_REQ_RETRIES,
verbose: config.app.contentRepository.verbose || DEFAULT_VERBOSE_MODE,
minTimeout: config.app.contentRepository.minRetryTimeout || DEFAULT_MIN_RETRY_TIMEOUT_MS,
maxTimeout: config.app.contentRepository.maxRetryTimeout || DEFAULT_MAX_RETRY_TIMEOUT_MS
});
return INSTANCE;
}
/**
* List collections.
*
* @param options
* @param callback
*/
getCollections(options, callback) {
options = options || {};
let builder = QueryBuilder.buildQuery();
if (options.offset) {
builder.withFrom(options.offset);
}
if (options.limit) {
builder.withSize(options.limit);
}
if (options.query) {
builder.withQueryString(["_all"], options.query);
}
if (options.uri) {
builder.withMatch("_id", options.uri);
}
if (options.code) {
builder.withMatch("subject", options.code);
}
if (options.sort) {
builder.withSortUri(Sorts.normaliseSort(options.sort));
} else {
builder.withSortUri([Sorts.DESCRIPTION_TEXT_ASC, Sorts.VERSIONCREATED_DESC]);
}
if (options.entitlement) {
this._decorateEntitlementsToCollectionQuery(builder, options.entitlement);
}
const query = builder.build();
this.client.post('collection/search', DEFAULT_REQ_HEADERS, query, (err, res) => {
if (err) {
return this._handleRequestError(err, res, query, callback);
}
callback(err, res.body);
});
}
/**
* Get a specific Collection.
*
* @param options
* @param callback
*/
getCollection(options, callback) {
options = options || {};
this.getCollections(options, (err, items) => {
callback(err, items);
});
}
/**
* Method to retrieve collection items.
*
* @param params containing entity uri of collection
* @param callback
* @return Callback
*/
getCollectionItems(params, callback) {
this.getCollection(params, (err, collection) => {
if (err) {
return callback(err);
}
let options = {};
options.uri = this._getItemUris(collection);
options.entitlement = {
exclude: (params.entitlement && params.entitlement.hasOwnProperty('exclude')) ? params.entitlement.exclude : null
};
this.getItems(options, (err, items) => {
items = this._appendSeeAlsoAssociations(items, collection);
callback(err, items);
});
});
}
/**
* Gets collections from subject search endpoint.
*
* @param options
* @param callback
*/
getCollectionsFromSubjectSearch(options, callback) {
options = options || {};
let builder = QueryBuilder.buildQuery();
if (options.offset) {
builder.withFrom(options.offset);
}
if (options.limit) {
builder.withSize(options.limit);
}
if (options.query) {
builder.withQueryString(["_all"], options.query);
}
if (options.profile) {
builder.withMatch("profile", options.profile);
}
if (options.sort) {
builder.withSortUri(Sorts.normaliseSort(options.sort));
} else {
builder.withSortUri(Sorts.DESCRIPTION_TEXT_ASC);
}
if (options.entitlement) {
this._decorateEntitlementsToCollectionQuery(builder, options.entitlement);
}
const query = builder.build();
this.client.post('subject/search', DEFAULT_REQ_HEADERS, query, (err, res) => {
if (err) {
return this._handleRequestError(err, res, query, callback);
}
callback(err, res.body);
});
}
/**
* List Services.
*
* @param options
* @param callback
*/
getServices(options, callback) {
options = options || {};
let builder = QueryBuilder.buildQuery();
builder.withMatch("profile", SERVICE_PROFILE);
if (options.offset) {
builder.withFrom(options.offset);
}
if (options.limit) {
builder.withSize(options.limit);
}
if (options.query) {
builder.withQueryString([ "_all" ], options.query);
}
if (options.uri) {
builder.withMatch("_id", options.uri);
}
if (options.parentSubject) {
builder.withMatch("subject.code", options.parentSubject);
builder.withMatch("subject.rel", 'childOf');
}
if (options.sort) {
builder.withSortUri(Sorts.normaliseSort(options.sort));
} else {
builder.withSortUri(Sorts.DESCRIPTION_TEXT_ASC);
}
if (options.entitlement) {
this._decorateEntitlementsToServiceQuery(builder, options.entitlement);
}
const query = builder.build();
this.client.post('subject/search', DEFAULT_REQ_HEADERS, query, (err, res) => {
if (err) {
return this._handleRequestError(err, res, query, callback);
}
callback(err, res.body);
});
}
/**
* Get a specific Service.
*
* @param options
* @param callback
*/
getService(options, callback) {
options = options || {};
options.profile = SERVICE_PROFILE;
this.getServices(options, (err, items) => {
callback(err, items);
});
}
/**
* List Events.
*
* @param options
* @param callback
*/
getEvents(options, callback) {
options = options || {};
let builder = QueryBuilder.buildQuery();
if (options.offset) {
builder.withFrom(options.offset);
}
if (options.limit) {
builder.withSize(options.limit);
}
if (options.uri) {
builder.withMatch("_id", options.uri);
}
if (options.profile) {
builder.withMatch("profile", options.profile);
}
if (options.start) {
builder.withRange("start", { gte: options.start });
}
if (options.end) {
builder.withRange("end", { lte: options.end });
}
if (options.subject) {
builder.withMatch("subject.code", options.subject);
}
if (options.query) {
builder.withQueryString([ "_all" ], options.query);
}
if (options.sort) {
builder.withSortUri(Sorts.normaliseSort(options.sort));
} else {
builder.withSortUri(Sorts.DESCRIPTION_TEXT_ASC);
}
const query = builder.build();
this.client.post('event/search', DEFAULT_REQ_HEADERS, query, (err, res) => {
if (err) {
return this._handleRequestError(err, res, query, callback);
}
callback(err, res.body);
});
}
/**
* Get a specific Event.
*
* @param options
* @param callback
*/
getEvent(options, callback) {
options = options || {};
this.getEvents(options, (err, items) => {
callback(err, items);
});
}
/**
* List Subjects.
*
* @param options
* @param callback
*/
getSubjects(options, callback) {
options = options || {};
var builder = QueryBuilder.buildQuery();
if (options.offset) {
builder.withFrom(options.offset);
}
if (options.limit) {
builder.withSize(options.limit);
}
if (options.profile) {
builder.withMatch("profile", options.profile);
}
if (options.uri) {
builder.withMatch("uri", options.uri);
}
if (options.parentSubject) {
builder.withMatch("subject.code", options.parentSubject);
builder.withMatch("subject.rel", 'childOf');
}
if (options.query) {
builder.withQueryString([ "_all" ], options.query);
}
if (options.sort) {
builder.withSortUri(Sorts.normaliseSort(options.sort));
} else {
builder.withSortUri(Sorts.DESCRIPTION_TEXT_ASC);
}
const query = builder.build();
this.client.post('subject/search', DEFAULT_REQ_HEADERS, query, (err, res) => {
if (err) {
return this._handleRequestError(err, res, query, callback);
}
callback(err, res.body);
});
}
/**
* Get a specific Subject.
*
* @param options
* @param callback
*/
getSubject(options, callback) {
options = options || {};
this.getSubjects(options, (err, items) => {
callback(err, items);
});
}
/**
* List Items.
*
* @param options
* @param callback
*/
getItems(options, callback) {
options = options || {};
let builder = QueryBuilder.buildQuery();
if (options.offset) {
builder.withFrom(options.offset);
}
if (options.limit) {
builder.withSize(options.limit);
}
if (options.uri) {
builder.withMatch("_id", options.uri);
}
if (options.pubstatus) {
builder.withMatch("pubstatus", options.pubstatus);
}
if (options.type) {
builder.withMatch("type", options.type);
}
if (options.event) {
builder.withMatch("event.code", options.event);
}
if (options.subject) {
builder.withMatch("subject.code", options.subject);
}
if (options.notSubject) {
builder.withNotMatch("subject.code", options.notSubject);
}
if (options.andSubject) {
builder.withMustMatch("subject.code", options.andSubject);
}
if (options.service) {
builder.withMatch("subject.code", options.service);
}
if (options.object) {
builder.withMatch("object.code", options.object);
}
if (options.notObject) {
builder.withNotMatch("object.code", options.notObject);
}
if (options.product && options.entitlement) {
const contentClassifications = this._convertProductToContentClassifications(options.product, options.entitlement);
if (contentClassifications) {
builder.withMustFilter("object.code", contentClassifications);
}
}
if (options.profile) {
builder.withMatch("profile", options.profile);
}
if (options.start || options.end) {
builder.withRange("versioncreated", { gte: options.start, lte: options.end });
}
if (options.query) {
builder.withQueryString([ "_all" ], options.query, { use_dis_max: true, auto_generate_phrase_queries: true });
}
if (ENABLE_EXPERIMENTAL_SEARCH && options.q) {
// If the Journo has added a tag, then this is very important
builder.withShouldMatch("subject.name", `(\"${options.q}\")`, { boost: 200 });
// If the exact phrase is mentioned in the article, then it is also very important
builder.withShouldMatchQueryString([ "headline", "description_text", "body_text" ], `(\"${options.q}\")`, { boost: 100, use_dis_max: true });
// The Story should get a boost if the Person / Place has been linked by the Entity Extractor
builder.withShouldMatch("person.name", options.q, { boost: 50, operator: "AND" });
// The Story should get a boost if the Person / Place has been linked by the Entity Extractor
builder.withShouldMatch("place.name", options.q, { boost: 50, operator: "AND" });
// The normal phrase query - i.e. this will match stories where just "boris" or "johnson" are in the text
builder.withShouldMatchQueryString([ "headline", "description_text", "body_text" ], `(${options.q})`, { boost: 20, use_dis_max: true, auto_generate_phrase_queries: true });
// 2nd degree Entity links come last, providing a slight boost
builder.withShouldMatch("person.person.name", options.q, { boost: 10, operator: "AND" });
builder.withShouldMatch("person.place.name", options.q, { boost: 10, operator: "AND" });
builder.withMinScore(options.minScore || DEFAULT_MIN_SCORE);
}
if (options.minScore) {
builder.withMinScore(options.minScore);
}
if (options.geoLat && options.geoLong) {
builder.withGeoDistance(options.geoField || DEFAULT_GEO_SEARCH_FIELDS, options.geoLat, options.geoLong, options.geoDistanceKm || DEFAULT_GEO_DISTANCE_KM);
}
if (options.fieldExist) {
builder.withFieldExist(options.fieldExist, null);
}
if (options.entitlement) {
this._decorateEntitlementsToItemQuery(builder, options.entitlement);
}
if (options.sort) {
builder.withSortUri(Sorts.normaliseSort(options.sort));
} else {
builder.withSortUri(Sorts.VERSIONCREATED_DESC);
}
const query = builder.build();
this.client.post('item/search', DEFAULT_REQ_HEADERS, query, (err, res) => {
if (err) {
return this._handleRequestError(err, res, query, callback);
}
callback(err, res.body);
});
}
/**
* Get a specific Item.
*
* @param options
* @param callback
*/
getItem(options, callback) {
let builder = QueryBuilder.buildQuery();
if (options.uri) {
builder.withMatch("_id", options.uri);
}
if (options.product && options.entitlement) {
const contentClassifications = this._convertProductToContentClassifications(options.product, options.entitlement);
if (contentClassifications) {
builder.withMustFilter("object.code", contentClassifications);
}
}
if (options.entitlement) {
this._decorateEntitlementsToItemQuery(builder, options.entitlement);
}
const query = builder.build();
// Check if the item exists
this.client.get('item/' + options.uri, null, (err, res) => {
if (err && ((err.status === BAD_REQUEST_STATUS_CODE) || (err.causedBy && err.causedBy.status === BAD_REQUEST_STATUS_CODE))) {
return callback(new NoContentError('Content item not found: ' + options.uri));
}
if (err && ((err.status === NOT_FOUND_STATUS_CODE) || (err.causedBy && err.causedBy.status === NOT_FOUND_STATUS_CODE))) {
return callback(new NoContentError('Content item not found: ' + options.uri));
}
if (err) return callback(err);
this.client.post('item/search', DEFAULT_REQ_HEADERS, query, (err, res) => {
if (err) {
return this._handleRequestError(err, res, query, callback);
}
callback(err, res.body.item[0]);
});
});
}
/**
* Retrieve Items that are "like" the one provided.
*
* @param options
* @param callback
*/
getMoreLikeThisItem(options, callback) {
let builder = QueryBuilder.buildQuery();
if (options.offset) {
builder.withFrom(options.offset);
}
if (options.limit) {
builder.withSize(options.limit);
}
var likeOptions = {
min_term_freq: options.min_term_freq,
minimum_should_match: options.minimum_should_match,
_index: ES_INDEX,
_type: ES_TYPE
};
builder.withMoreLikeThis([
"subject.code",
"headline",
"event.code"
],
options.uri,
likeOptions);
if (options.profile) {
builder.withMustFilter("profile", [options.profile]);
}
if (options.subject) {
builder.withMatch("subject.code", options.subject);
}
if (options.notSubject) {
builder.withNotMatch("subject.code", options.notSubject);
}
if (options.product && options.entitlement) {
const contentClassifications = this._convertProductToContentClassifications(options.product, options.entitlement);
if (contentClassifications) {
builder.withMustFilter("object.code", contentClassifications);
}
}
if (options.start || options.end) {
builder.withMustFilter("versioncreated", { gte: options.start, lte: options.end });
}
if (options.type) {
builder.withMustFilter("type", [options.type]);
}
if (options.entitlement) {
this._decorateEntitlementsToItemQuery(builder, options.entitlement);
}
const query = builder.build();
this.client.post('item/search', DEFAULT_REQ_HEADERS, query, (err, res) => {
if (err) {
return this._handleRequestError(err, res, query, callback);
}
callback(err, res.body);
});
}
/**
* Decorate an Item Query with entitlement filters.
*
* @param builder
* @param entitlement
* @returns {*}
* @private
*/
_decorateEntitlementsToCollectionQuery(builder, entitlement) {
const collections = _.get(entitlement, '_decorated.collection', null);
if (!collections) {
return;
}
builder.withMustFilter("object.code", entitlement._decorated.collection.map((c) => `pacontent:${c}`));
}
/**
* Decorate an Item Query with entitlement filters.
*
* @param builder
* @param entitlement
* @returns {*}
* @private
*/
_decorateEntitlementsToItemQuery(builder, entitlement) {
const contentInclude = (entitlement._decorated && entitlement._decorated.content) ? entitlement._decorated.content.include : null;
const contentExclude = (entitlement._decorated && entitlement._decorated.content) ? entitlement._decorated.content.exclude : null;
const include = entitlement.include;
const exclude = entitlement.exclude;
// Apply Content Filters
if (contentInclude && contentInclude.length > 0) {
builder.withMustFilter("object.code", contentInclude);
}
if (contentExclude && contentExclude.length > 0) {
builder.withMustNotFilter("object.code", contentExclude);
}
// Apply Include Filters
if (include && include.item) {
if (include.item.type && include.item.type.length > 0) {
builder.withMustFilter("type", include.item.type);
}
if (include.item.profile && include.item.profile.length > 0) {
builder.withMustFilter("profile", include.item.profile);
}
if (include.item.subject && include.item.subject.length > 0) {
builder.withMustFilter("subject.code", include.item.subject);
}
if (include.item.object && include.item.object.length > 0) {
builder.withMustFilter("object.code", include.item.object);
}
}
if (include && include.service) {
if (include.service.length > 0) {
builder.withMustFilter("subject.code", include.service);
}
}
// Apply Exclude Filters
if (exclude && exclude.item) {
if (exclude.item.type && exclude.item.type.length > 0) {
builder.withMustNotFilter("type", exclude.item.type);
}
if (exclude.item.profile && exclude.item.profile.length > 0) {
builder.withMustNotFilter("profile", exclude.item.profile);
}
if (exclude.item.subject && exclude.item.subject.length > 0) {
builder.withMustNotFilter("subject.code", exclude.item.subject);
}
if (exclude.item.object && exclude.item.object.length > 0) {
builder.withMustNotFilter("object.code", exclude.item.object);
}
}
if (exclude && exclude.service) {
if (exclude.service.length > 0) {
builder.withMustNotFilter("subject.code", exclude.service);
}
}
if (ENABLE_ENTITLE_USABLE_ITEMS_ONLY) {
builder.withMustFilter("pubstatus", [ PubStatuses.USABLE ]);
}
}
/**
* Decorate a Service Query with entitlement filters.
*
* @param builder
* @param entitlement
* @returns {*}
* @private
*/
_decorateEntitlementsToServiceQuery(builder, entitlement) {
const service = (entitlement._decorated) ? entitlement._decorated.service : null;
const include = entitlement.include;
const exclude = entitlement.exclude;
// Apply Product Service Filters
if (service && service.length > 0) {
// If we have a parent service, include its children also
if (this._containsParentService(service)) {
// Include all expanded services
builder.withShouldFilter(["uri"], this._expandServiceEntitlements(service));
// Include all children of parent services
builder.withShouldFilter(["subject.code"], this._getParentServices(service));
} else {
builder.withMustFilter("uri", this._expandServiceEntitlements(service));
}
}
if (include && include.service) {
if (include.service.length > 0) {
// If we have a parent service, include its children also
if (this._containsParentService(include.service)) {
// Include all expanded services
builder.withShouldFilter(["uri"], this._expandServiceEntitlements(include.service));
// Include all children of parent services
builder.withShouldFilter(["subject.code"], this._getParentServices(include.service));
} else {
builder.withMustFilter("uri", this._expandServiceEntitlements(include.service));
}
}
}
if (exclude && exclude.service) {
if (exclude.service.length > 0) {
builder.withMustNotFilter("uri", exclude.service);
}
}
}
/**
* Expand service entitlements to include any parent services (service listing only).
*
* Expansion: 'paservice:news:uk' has a parent of 'paservice:news'.
*
* @param services
* @returns {Array}
* @private
*/
_expandServiceEntitlements(services) {
var expandedServices = [];
services.forEach(function(service) {
const matcher = service.match(/(.*?\:.*?)(\:.*?)/);
if (matcher && matcher[1]) {
expandedServices.push(matcher[1]);
}
expandedServices.push(service);
});
// Ensure we only have unique services
expandedServices = _.uniq(expandedServices);
return expandedServices;
}
/**
* Get Parent Services Only.
*
* @param services
* @returns {Array}
* @private
*/
_getParentServices(services) {
var parentServices = services.map((service) => {
const matcher = service.match(/:/g);
if (matcher && matcher.length === 1) {
return service;
}
}).filter((service) => service);
// Ensure we only have unique services... (ES6 spread currently not supported)
parentServices = _.uniq(parentServices);
return parentServices;
}
/**
* Does the service array contain a parent service?
*
* Expansion: e.g. 'paservice:sport' is a parent service (with a 'paservice:sport:rugby' being one of its children)
* @param services
* @returns {boolean}
* @private
*/
_containsParentService(services) {
let containsParent = false;
services.forEach(function(service) {
const matcher = service.match(/(.*?\:.*?)(\:.*?)/);
if (!matcher) {
containsParent = true;
return;
}
});
return containsParent;
}
/**
* Request Error Handler.
*
* @param err
* @param res
* @param query
* @param callback
* @returns {*}
* @private
*/
_handleRequestError(err, res, query, callback) {
if (!callback) {
return;
}
if (err.causedBy && (err.causedBy.status === BAD_REQUEST_STATUS_CODE)) {
return callback(new BadRequestError('Bad Query', query, res));
} else {
return callback(new ContentRepository(err));
}
}
/**
* Convert a provided product to an entitled classifications array.
*
* @param product Selected product code.
* @param entitlement Current API key entitlement.
* @returns {*}
* @private
*/
_convertProductToContentClassifications(product, entitlement) {
const products = (entitlement._decorated) ? entitlement._decorated.product : null;
if (!products) {
return [];
}
if (products[product] && products[product].length > 0) {
return products[product];
} else {
return [];
}
}
/**
* Unpack and map a wrapped ES search response.
*
* @param res
* @returns {{total: *, item: Array}}
* @private
*/
_mapElasticsearchListResponse(res) {
const response = { total: res.hits.total, item: [] };
response.item = _.map(res.hits.hits, (hit) => {
const item = hit._source;
item.searchmeta = { score: hit._score };
return item;
});
return response;
}
/**
* Append see also associations to items.
*
* @param items item ninjs
* @param collection collection ninjs
* @return [] Array containing items with see also associations
*/
_appendSeeAlsoAssociations(items, collection) {
if (!collection || !collection.hasOwnProperty('item') || collection.item.length !== 1) {
return items;
}
if (!items || !items.hasOwnProperty('item')) {
return items;
}
let index = 0;
Object.keys(collection.item[0].associations).map((k) => {
if (collection.item[0].associations[k].associations) {
if (items.item[index].hasOwnProperty('associations')) {
items.item[index].associations = Object.assign(items.item[index].associations,
collection.item[0].associations[k].associations);
}
index++;
}
});
return items;
}
/**
* Gets item uris provided a valid collection ninjs.
*
* @param collection collection ninjs
* @return [] Array containing item uris
*/
_getItemUris(collection) {
const itemUriList = [];
if (!collection || !collection.hasOwnProperty('item') || collection.item.length !== 1) {
return itemUriList;
}
if (collection.item[0].associations) {
Object.keys(collection.item[0].associations).map((k) => {
itemUriList.push(collection.item[0].associations[k].uri);
});
}
return itemUriList;
}
}
module.exports = new ContentService();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment