bluemont/alchemy_api.rb

## alchemy_api.rb
# Currently used in a Rails 3.2.6 application
#
# Notes:
# * add 'faraday' to your Gemfile
# * put this file in '#{Rails.root}/lib/alchemy_api.rb'
# * `export ALCHEMY_API_KEY="..."` in your shell
#
class AlchemyAPI

  attr_accessor :api_key, :base_url

  def initialize(options = {})
    self.api_key = options[:api_key] || ENV['ALCHEMY_API_KEY']
    self.base_url = options[:base_url] || "http://access.alchemyapi.com"
  end

  # Entity Extraction: Text API
  #
  # Documentation from:
  # http://www.alchemyapi.com/api/entity/textc.html
  #
  # AlchemyAPI provides easy-to-use facilities for extracting the semantic
  # richness from any text content: Post (upload) any content directly to our
  # service for analysis.
  #
  # Posted content is analyzed to detect the primary document language, and
  # named entities are automatically extracted.
  #
  # These API calls may be utilized to process posted (uploaded) text content.
  # If you are processing content hosted on a publicly accessible website,
  # consider using our URL processing calls instead. Or if you are processing
  # uploaded HTML content, use our HTML API calls.
  #
  # TextGetRankedNamedEntities
  # Extract a grouped, relevancy-ranked list of named entities from some text.
  #
  # API Call: TextGetRankedNamedEntities
  # Description: The TextGetRankedNamedEntities call is utilized to extract a
  # grouped, ranked list of named entities (people, companies, organizations,
  # etc.) from within a posted text document.
  #
  # Endpoint:
  # http://access.alchemyapi.com/calls/text/TextGetRankedNamedEntities
  #
  # Parameters:
  #
  # apikey
  # your private api key
  # (required parameter)
  #
  # text
  # Text document content (must be uri-argument encoded)
  # (required parameter)
  #
  # url
  # Text document URL
  # (optional parameter. must be uri-argument encoded)
  #
  # outputMode
  # desired API output format
  # Possible values:
  # xml (default)
  # json
  # rdf
  # rel-tag
  # rel-tag-raw
  # (optional parameter)
  #
  # jsonp
  # desired JSONP callback
  # (optional parameter, requires "outputMode" to be set to json)
  #
  # disambiguate
  # whether to disambiguate detected entities.
  # Possible values:
  # 1 - enabled (default)
  # 0 - disabled
  # (optional parameter)
  #
  # linkedData
  # whether to include Linked Data content links with disambiguated entities.
  # Possible values:
  # 1 - enabled (default)
  # 0 - disabled
  # (optional parameter. disambiguation must be enabled to utilize the
  # linkedData feature.)
  #
  # coreference
  # whether to resolve he/she/etc coreferences into detected entities.
  # Possible values:
  # 1 - enabled (default)
  # 0 - disabled
  # (optional parameter)
  #
  # quotations
  # whether to enable quotations extraction.
  # Possible values:
  # 1 - enabled
  # 0 - disabled (default)
  # (optional parameter)
  #
  # sentiment
  # whether to enable entity-level sentiment analysis.
  # Possible values:
  # 1 - enabled
  # 0 - disabled (default)
  # (optional parameter - Note that enabling this option will incur usage of
  # one (1) additional AlchemyAPI transaction)
  #
  # showSourceText
  # whether to include the original 'source text' the entities were extracted
  # from within the API response.
  # Possible values:
  # 1 - enabled
  # 0 - disabled (default)
  # (optional parameter)
  #
  # maxRetrieve
  # maximum number of named entities to extract (default: 50)
  # (optional parameter)
  #
  # baseUrl
  # rel-tag output base http url
  # (optional parameter, used with rel-tag or rel-tag-raw outputMode. must be
  # uri-argument encoded)
  #
  # API Notes:
  # 1. Calls to TextGetRankedNamedEntities should be made using HTTP POST.
  # 2. HTTP POST calls should include the Content-Type header:
  #    application/x-www-form-urlencoded
  # 3. Posted text documents can be a maximum of 150 kilobytes. Larger
  #    documents will result in a "content-exceeds-size-limit" error response.
  # 4. Language detection is performed on the retrieved document before
  #    attempting named entity extraction. A minimum of 15 characters of text
  #    must exist within the requested HTTP document to perform language
  #    detection.
  # 5. Documents containing less than 15 characters of text are assumed to be
  #    English-language content.
  # 6. Disambiguation of detected entities is enabled by default.
  #    Disambiguation information will be included for each entity that is
  #    successfully resolved.
  # 7. Entity extraction is currently supported for all languages listed on
  #    the language support page. Other non-supported language submissions
  #    will be rejected and an error response returned.
  # 8. Enabling entity-level sentiment analysis results in one additional
  #    transaction utilized against your daily API limit.
  # 9. Disambiguation, sentiment analysis, and quotations extraction are
  #    currently available for English-language content only. Support for
  #    other languages is in development.
  def ranked_named_entities_from_text(text, options = {})
    require_api_key!
    params = params_for_ranked_named_entities(options)
    params['apikey'] = api_key
    params['text'] = text
    params['outputMode'] = 'json'
    endpoint = "#{base_url}/calls/text/TextGetRankedNamedEntities"
    response = Faraday.post(endpoint, params)
    require_json_content_type!(response)
    JSON.parse(response.body)
  end

  protected

  def require_api_key!
    raise "Need API key" if api_key.blank?
  end

  def require_json_content_type!(response)
    content_type = response['content-type']
    unless content_type == 'application/json'
      raise "Expected application/json content-type but got: #{content_type}"
    end
  end

  def params_for_ranked_named_entities(options)
    params = {}
    if options[:disambiguate]
      params['disambiguate'] = options.delete(:disambiguate) ? 1 : 0
    end
    if options[:linked_data]
      params['linkedData'] = options.delete(:linked_data) ? 1 : 0
    end
    if options[:coreference]
      params['coreference'] = options.delete(:coreference) ? 1 : 0
    end
    if options[:quotations]
      params['quotations'] = options.delete(:quotations) ? 1 : 0
    end
    if options[:sentiment]
      params['sentiment'] = options.delete(:sentiment) ? 1 : 0
    end
    if options[:show_source_text]
      params['showSourceText'] = options.delete(:show_source_text) ? 1 : 0
    end
    if options[:max_retrieve]
      params['maxRetrieve'] = options.delete(:max_retrieve)
    end
    if options[:base_url]
      params['baseUrl'] = options.delete(:base_url)
    end
    unless options.empty?
      raise "Unexpected options: #{options.keys}"
    end
    params
  end

end
	# Currently used in a Rails 3.2.6 application
	#
	# Notes:
	# * add 'faraday' to your Gemfile
	# * put this file in '#{Rails.root}/lib/alchemy_api.rb'
	# * `export ALCHEMY_API_KEY="..."` in your shell
	#
	class AlchemyAPI

	attr_accessor :api_key, :base_url

	def initialize(options = {})
	self.api_key = options[:api_key] \|\| ENV['ALCHEMY_API_KEY']
	self.base_url = options[:base_url] \|\| "http://access.alchemyapi.com"
	end

	# Entity Extraction: Text API
	#
	# Documentation from:
	# http://www.alchemyapi.com/api/entity/textc.html
	#
	# AlchemyAPI provides easy-to-use facilities for extracting the semantic
	# richness from any text content: Post (upload) any content directly to our
	# service for analysis.
	#
	# Posted content is analyzed to detect the primary document language, and
	# named entities are automatically extracted.
	#
	# These API calls may be utilized to process posted (uploaded) text content.
	# If you are processing content hosted on a publicly accessible website,
	# consider using our URL processing calls instead. Or if you are processing
	# uploaded HTML content, use our HTML API calls.
	#
	# TextGetRankedNamedEntities
	# Extract a grouped, relevancy-ranked list of named entities from some text.
	#
	# API Call: TextGetRankedNamedEntities
	# Description: The TextGetRankedNamedEntities call is utilized to extract a
	# grouped, ranked list of named entities (people, companies, organizations,
	# etc.) from within a posted text document.
	#
	# Endpoint:
	# http://access.alchemyapi.com/calls/text/TextGetRankedNamedEntities
	#
	# Parameters:
	#
	# apikey
	# your private api key
	# (required parameter)
	#
	# text
	# Text document content (must be uri-argument encoded)
	# (required parameter)
	#
	# url
	# Text document URL
	# (optional parameter. must be uri-argument encoded)
	#
	# outputMode
	# desired API output format
	# Possible values:
	# xml (default)
	# json
	# rdf
	# rel-tag
	# rel-tag-raw
	# (optional parameter)
	#
	# jsonp
	# desired JSONP callback
	# (optional parameter, requires "outputMode" to be set to json)
	#
	# disambiguate
	# whether to disambiguate detected entities.
	# Possible values:
	# 1 - enabled (default)
	# 0 - disabled
	# (optional parameter)
	#
	# linkedData
	# whether to include Linked Data content links with disambiguated entities.
	# Possible values:
	# 1 - enabled (default)
	# 0 - disabled
	# (optional parameter. disambiguation must be enabled to utilize the
	# linkedData feature.)
	#
	# coreference
	# whether to resolve he/she/etc coreferences into detected entities.
	# Possible values:
	# 1 - enabled (default)
	# 0 - disabled
	# (optional parameter)
	#
	# quotations
	# whether to enable quotations extraction.
	# Possible values:
	# 1 - enabled
	# 0 - disabled (default)
	# (optional parameter)
	#
	# sentiment
	# whether to enable entity-level sentiment analysis.
	# Possible values:
	# 1 - enabled
	# 0 - disabled (default)
	# (optional parameter - Note that enabling this option will incur usage of
	# one (1) additional AlchemyAPI transaction)
	#
	# showSourceText
	# whether to include the original 'source text' the entities were extracted
	# from within the API response.
	# Possible values:
	# 1 - enabled
	# 0 - disabled (default)
	# (optional parameter)
	#
	# maxRetrieve
	# maximum number of named entities to extract (default: 50)
	# (optional parameter)
	#
	# baseUrl
	# rel-tag output base http url
	# (optional parameter, used with rel-tag or rel-tag-raw outputMode. must be
	# uri-argument encoded)
	#
	# API Notes:
	# 1. Calls to TextGetRankedNamedEntities should be made using HTTP POST.
	# 2. HTTP POST calls should include the Content-Type header:
	# application/x-www-form-urlencoded
	# 3. Posted text documents can be a maximum of 150 kilobytes. Larger
	# documents will result in a "content-exceeds-size-limit" error response.
	# 4. Language detection is performed on the retrieved document before
	# attempting named entity extraction. A minimum of 15 characters of text
	# must exist within the requested HTTP document to perform language
	# detection.
	# 5. Documents containing less than 15 characters of text are assumed to be
	# English-language content.
	# 6. Disambiguation of detected entities is enabled by default.
	# Disambiguation information will be included for each entity that is
	# successfully resolved.
	# 7. Entity extraction is currently supported for all languages listed on
	# the language support page. Other non-supported language submissions
	# will be rejected and an error response returned.
	# 8. Enabling entity-level sentiment analysis results in one additional
	# transaction utilized against your daily API limit.
	# 9. Disambiguation, sentiment analysis, and quotations extraction are
	# currently available for English-language content only. Support for
	# other languages is in development.
	def ranked_named_entities_from_text(text, options = {})
	require_api_key!
	params = params_for_ranked_named_entities(options)
	params['apikey'] = api_key
	params['text'] = text
	params['outputMode'] = 'json'
	endpoint = "#{base_url}/calls/text/TextGetRankedNamedEntities"
	response = Faraday.post(endpoint, params)
	require_json_content_type!(response)
	JSON.parse(response.body)
	end

	protected

	def require_api_key!
	raise "Need API key" if api_key.blank?
	end

	def require_json_content_type!(response)
	content_type = response['content-type']
	unless content_type == 'application/json'
	raise "Expected application/json content-type but got: #{content_type}"
	end
	end

	def params_for_ranked_named_entities(options)
	params = {}
	if options[:disambiguate]
	params['disambiguate'] = options.delete(:disambiguate) ? 1 : 0
	end
	if options[:linked_data]
	params['linkedData'] = options.delete(:linked_data) ? 1 : 0
	end
	if options[:coreference]
	params['coreference'] = options.delete(:coreference) ? 1 : 0
	end
	if options[:quotations]
	params['quotations'] = options.delete(:quotations) ? 1 : 0
	end
	if options[:sentiment]
	params['sentiment'] = options.delete(:sentiment) ? 1 : 0
	end
	if options[:show_source_text]
	params['showSourceText'] = options.delete(:show_source_text) ? 1 : 0
	end
	if options[:max_retrieve]
	params['maxRetrieve'] = options.delete(:max_retrieve)
	end
	if options[:base_url]
	params['baseUrl'] = options.delete(:base_url)
	end
	unless options.empty?
	raise "Unexpected options: #{options.keys}"
	end
	params
	end

	end