pietrop/spoken_data_api_library.rb

## spoken_data_api_library.rb
=begin
`spoken_data_api_library.rb`
an attempt to make a SDK for the spoken data API.
spokenData API ruby unofficial SDK v1
@author : Pietro Passarelli
@date: 22/07/2015
@url:  http://spokendata.com/api

##  About

It provides an SDK for the spokendata API.
It does not implement all of the RESTFULL methods,
it implements
	- retrieving srt file of recording if ready **`retrieve_subtitles_srt(recording_id)`** if ready returns srt file string, if not returns false boolean.
	-  send video by URL **`send_by_video_url(video_url)`** which returns the video uid,that you should save for later retrieval.
	- also implemented getting **list of recordings** (not in use in example)
	- **`get_recording_by_recording_id`**, returns a recording object  (also used as helper method)
	-

you can test this as rails runner, save this file in the `lib` folder
and run from terminal, fom root of the project
		rails r lib/spokenDataAPI.rb

## Usage

to use this API in rails you need to set
 - ENV['SPOKEN_DATA_USER_ID']
 - ENV['SPOKEN_DATA_API_TOKEN']


then you can add this file `spoken_data_api_library.rb` to `/lib/` folder.

and [load it in application throuhg config file](https://hackhands.com/rails-nameerror-uninitialized-constant-class-solution/)
by adding  in `application.rb`

```ruby
config.autoload_paths += %W(#{config.root}/lib)
```

then in one of your controllers where you want to use it, you can do

```ruby
spokenDataAPIObject = SpokenDataAPI.new(ENV['SPOKEN_DATA_USER_ID'],ENV['SPOKEN_DATA_API_TOKEN'])


uid = spokenDataAPIObject.send_by_video_url(a_string_containing_link_to_your_video_ie_on_amazon_s3)
# here is good to do something like saving the `uid`
	@video.uid = uid
	@video.save
```

then when you want to retrieve the srt file or check if it's ready

```
response = spokenDataAPIObject.retrieve_subtitles_srt(uid)
```

it returns a string containing the srt if ready, or it returns false boolean if not ready.

for more details see http://spokendata.com
=end

require 'net/http'
require 'open-uri'
require 'json'
require 'nokogiri'
## if you want to test it outside of rails uncomment the line below, is required to parse the xml to ruby hash
# require 'active_support/core_ext/hash'

####################
class SpokenDataAPI

		@@BASE_URL ="https://spokendata.com/api/"
		#Setup the endpoints
		@@ENDPOINTS = {}

		#recordingList	returns all user recordings --> this contains `status` , either 'processing' or 'done'
		@@ENDPOINTS['recordingList']="/recordingList"


	 	##################### initializer
	 	def initialize(user_id, api_token)
	 		@@USER_ID = user_id.to_s
	 		@@API_TOKEN = api_token.to_s
	 	end

	 	##################### helper methods
	 	# external libraries helper methods
	 	# open URLs
	 	def open(url)
 			Net::HTTP.get(URI.parse(url))
		end

		# parses url / XML (as received by the API)
		# returns a ruby hash
		def parse_xml(url)
			# IMPORTANT: there is an issue with the xml, the encoding returned by the API is written `utf8` instead of `utf-8`. and that trips up the parser. enche the substitution
			result_string = open(url).gsub("utf8","utf-8")
			# Parse the xml with nokogiri
			nokogiri_xml_document = Nokogiri::XML(result_string)
			# transform the xml into a ruby hash using built in active support methods.
			result = Hash.from_xml(nokogiri_xml_document.to_s)
			# `data` tag encapsulate the rest of the keys /tags/
			return result['data']
		end

	 	##################### API Query builder helper methods
	 	def get_base_url
			return @@BASE_URL
		end

		def get_api_key
			return @@API_TOKEN
		end

		def get_user_id
			return @@USER_ID
		end

		def get_base_api_request
			url = get_base_url + get_user_id+"/" + get_api_key
			return url
		end

		##################### getter methods recordings list
	 	# def get_user
	 	# 	url = get_base_api_request + @@ENDPOINTS['user']
	 	# 	xml_Hash = parse_xml(open(url))
	 	# 	return xml_Hash #['user'] #TODO to check if it's actual combination or not some nested ['var']['user']
	 	# end

	 	def get_recordings_list
	 		url = get_base_api_request + @@ENDPOINTS['recordingList']
	 		result = parse_xml(url)
	 		return  result['recordings']['recording'] # TODO: fix this
	 	end


	 	# gets the recording based on id
	 	# the id is the one defined by spokendata
	 	# returns the whole recording object.
	 	def get_recording_by_recording_id(recording_i)
	 		url = get_base_api_request + @@ENDPOINTS['recordingList']
	 		result = parse_xml(url)
	 		recordings =  result['recordings']['recording'] # TODO: fix this
	 		# compares `id`
	 		recordings.select do |k,v|
	 			if k['id'].to_i == recording_i.to_i
	 				result = k
	 			end
	 		end
	 		#retursn the recording
	 		return result
	 	end

	 	##################### getter method recording status
	 	# helper method for get_recordings_status
	 	# takes in a recording object
	 	# returns true if "done"
	 	# returns false if "processing"
	 	def get_recording_status(recording)
	 		recording_status = recording['status']
	 		if recording_status =="done"
	 			return true
	 		elsif recording_status =="processing"
	 			return false
	 		else
	 			"There was an error assessing the status of the recording"
	 		end
	 	end

	 	# from recording `id` return boolean for status of the recording.
	 	# true if "done" false if "processing"
	 	def recording_processed?(recording_id)
	 		recording = get_recording_by_recording_id(recording_id)
	 		return get_recording_status(recording)
	 	end

	 	##################### getter methods srt
	 	# helper method for retrieve_subtitles_srt
	 	 def get_srt(recording_id)

	 	 	url = get_base_api_request.to_s + "/recording/#{recording_id.to_s}/subtitles.srt"
	 	 	return open(url)
	 	 end
	 	 # retrieves subitles srt
	 	 # takes in recording_id
	 	 # returns false if recording status is "processing".
	 	 # returns string containing srt file if status is "done".
	 	def retrieve_subtitles_srt(recording_id)
	 		if recording_processed?(recording_id) == true
	 			return get_srt(recording_id)
	 		else
	 			false
	 		end

	 	end

	 	##################### send video for captioning
	 	# takes in the location url of the video, for instance if you are using amazon S3 this is the full path, if using youtube is just normal URL, also works with Vimeo.
	 	# returns the recording id, to be able to check status and retrieve captions subsequently, best to save this in the db
	 	# language options are
	 	# from API documentation:
	 	# 	RECORDING-URL - YouTube or any direct URL of a media file
		# 	LANGUAGE - english | english-broadcastnews | english-test | russian | chinese-ma | spanish-us | czech | czech-medicine | czech-broadcastnews | slovak
		# 	ANNOTATOR-ID = id of assigned annotator (leave empty if no annotator)
		# if you are working with languages other then english you could modify params of this url to change language option.
	 	def send_by_video_url(url)
	 		 request_url = get_base_api_request.to_s + "/recording/add?url=#{url.to_s}&language=english"
	 		 response = parse_xml(request_url)
	 		 # example response from api #{"message"=>"This media URL and language have already been entered.", "recording"=>{"id"=>"5747"}}
	 		 result = response['recording']['id']
	 	end

end # end of class SpokenDataAPI
####################


#################### How to use, example in 3 steps
# for this example you need to set the user id and data api environment variables.
# 1.reate a spoken data api instance initialising it with user_id and API key.
# you'll find these in http://spokendata.com/api once you have loged in.
# use enviroment variables to store user id and api key, and remember to put the file in `.gitignore`
# spokenDataAPIObject = SpokenDataAPI.new(ENV['SPOKEN_DATA_USER_ID'],ENV['SPOKEN_DATA_API_TOKEN'])

spokenDataAPIObject = SpokenDataAPI.new(ENV['SPOKEN_DATA_USER_ID'],ENV['SPOKEN_DATA_API_TOKEN'])

# 2. send video by URL
# sending a video by url will return you the spoken data api assigned video ID
uid = spokenDataAPIObject.send_by_video_url("https://www.youtube.com/watch?v=u5CVsCnxyXg")
puts "UID: "+uid

# 3. retrieve captions
# use recording id / uid to retrieve subtitles, when ready. (will return false if they are not ready)
# (good idea to save uid in db for checking on processing of video captions)
# returns srt file if ready, boolean false if not => you can `if response != false` etc..
response= spokenDataAPIObject.retrieve_subtitles_srt(uid)
puts "Response: "+response.to_s
####################
	=begin
	`spoken_data_api_library.rb`
	an attempt to make a SDK for the spoken data API.
	spokenData API ruby unofficial SDK v1
	@author : Pietro Passarelli
	@date: 22/07/2015
	@url: http://spokendata.com/api

	## About

	It provides an SDK for the spokendata API.
	It does not implement all of the RESTFULL methods,
	it implements
	- retrieving srt file of recording if ready `retrieve_subtitles_srt(recording_id)` if ready returns srt file string, if not returns false boolean.
	- send video by URL `send_by_video_url(video_url)` which returns the video uid,that you should save for later retrieval.
	- also implemented getting list of recordings (not in use in example)
	- `get_recording_by_recording_id`, returns a recording object (also used as helper method)
	-

	you can test this as rails runner, save this file in the `lib` folder
	and run from terminal, fom root of the project
	rails r lib/spokenDataAPI.rb

	## Usage

	to use this API in rails you need to set
	- ENV['SPOKEN_DATA_USER_ID']
	- ENV['SPOKEN_DATA_API_TOKEN']


	then you can add this file `spoken_data_api_library.rb` to `/lib/` folder.

	and [load it in application throuhg config file](https://hackhands.com/rails-nameerror-uninitialized-constant-class-solution/)
	by adding in `application.rb`

	```ruby
	config.autoload_paths += %W(#{config.root}/lib)
	```

	then in one of your controllers where you want to use it, you can do

	```ruby
	spokenDataAPIObject = SpokenDataAPI.new(ENV['SPOKEN_DATA_USER_ID'],ENV['SPOKEN_DATA_API_TOKEN'])


	uid = spokenDataAPIObject.send_by_video_url(a_string_containing_link_to_your_video_ie_on_amazon_s3)
	# here is good to do something like saving the `uid`
	@video.uid = uid
	@video.save
	```

	then when you want to retrieve the srt file or check if it's ready

	```
	response = spokenDataAPIObject.retrieve_subtitles_srt(uid)
	```

	it returns a string containing the srt if ready, or it returns false boolean if not ready.

	for more details see http://spokendata.com
	=end

	require 'net/http'
	require 'open-uri'
	require 'json'
	require 'nokogiri'
	## if you want to test it outside of rails uncomment the line below, is required to parse the xml to ruby hash
	# require 'active_support/core_ext/hash'

	####################
	class SpokenDataAPI

	@@BASE_URL ="https://spokendata.com/api/"
	#Setup the endpoints
	@@ENDPOINTS = {}

	#recordingList returns all user recordings --> this contains `status` , either 'processing' or 'done'
	@@ENDPOINTS['recordingList']="/recordingList"


	##################### initializer
	def initialize(user_id, api_token)
	@@USER_ID = user_id.to_s
	@@API_TOKEN = api_token.to_s
	end

	##################### helper methods
	# external libraries helper methods
	# open URLs
	def open(url)
	Net::HTTP.get(URI.parse(url))
	end

	# parses url / XML (as received by the API)
	# returns a ruby hash
	def parse_xml(url)
	# IMPORTANT: there is an issue with the xml, the encoding returned by the API is written `utf8` instead of `utf-8`. and that trips up the parser. enche the substitution
	result_string = open(url).gsub("utf8","utf-8")
	# Parse the xml with nokogiri
	nokogiri_xml_document = Nokogiri::XML(result_string)
	# transform the xml into a ruby hash using built in active support methods.
	result = Hash.from_xml(nokogiri_xml_document.to_s)
	# `data` tag encapsulate the rest of the keys /tags/
	return result['data']
	end

	##################### API Query builder helper methods
	def get_base_url
	return @@BASE_URL
	end

	def get_api_key
	return @@API_TOKEN
	end

	def get_user_id
	return @@USER_ID
	end

	def get_base_api_request
	url = get_base_url + get_user_id+"/" + get_api_key
	return url
	end

	##################### getter methods recordings list
	# def get_user
	# url = get_base_api_request + @@ENDPOINTS['user']
	# xml_Hash = parse_xml(open(url))
	# return xml_Hash #['user'] #TODO to check if it's actual combination or not some nested ['var']['user']
	# end

	def get_recordings_list
	url = get_base_api_request + @@ENDPOINTS['recordingList']
	result = parse_xml(url)
	return result['recordings']['recording'] # TODO: fix this
	end


	# gets the recording based on id
	# the id is the one defined by spokendata
	# returns the whole recording object.
	def get_recording_by_recording_id(recording_i)
	url = get_base_api_request + @@ENDPOINTS['recordingList']
	result = parse_xml(url)
	recordings = result['recordings']['recording'] # TODO: fix this
	# compares `id`
	recordings.select do \|k,v\|
	if k['id'].to_i == recording_i.to_i
	result = k
	end
	end
	#retursn the recording
	return result
	end

	##################### getter method recording status
	# helper method for get_recordings_status
	# takes in a recording object
	# returns true if "done"
	# returns false if "processing"
	def get_recording_status(recording)
	recording_status = recording['status']
	if recording_status =="done"
	return true
	elsif recording_status =="processing"
	return false
	else
	"There was an error assessing the status of the recording"
	end
	end

	# from recording `id` return boolean for status of the recording.
	# true if "done" false if "processing"
	def recording_processed?(recording_id)
	recording = get_recording_by_recording_id(recording_id)
	return get_recording_status(recording)
	end

	##################### getter methods srt
	# helper method for retrieve_subtitles_srt
	def get_srt(recording_id)

	url = get_base_api_request.to_s + "/recording/#{recording_id.to_s}/subtitles.srt"
	return open(url)
	end
	# retrieves subitles srt
	# takes in recording_id
	# returns false if recording status is "processing".
	# returns string containing srt file if status is "done".
	def retrieve_subtitles_srt(recording_id)
	if recording_processed?(recording_id) == true
	return get_srt(recording_id)
	else
	false
	end

	end

	##################### send video for captioning
	# takes in the location url of the video, for instance if you are using amazon S3 this is the full path, if using youtube is just normal URL, also works with Vimeo.
	# returns the recording id, to be able to check status and retrieve captions subsequently, best to save this in the db
	# language options are
	# from API documentation:
	# RECORDING-URL - YouTube or any direct URL of a media file
	# LANGUAGE - english \| english-broadcastnews \| english-test \| russian \| chinese-ma \| spanish-us \| czech \| czech-medicine \| czech-broadcastnews \| slovak
	# ANNOTATOR-ID = id of assigned annotator (leave empty if no annotator)
	# if you are working with languages other then english you could modify params of this url to change language option.
	def send_by_video_url(url)
	request_url = get_base_api_request.to_s + "/recording/add?url=#{url.to_s}&language=english"
	response = parse_xml(request_url)
	# example response from api #{"message"=>"This media URL and language have already been entered.", "recording"=>{"id"=>"5747"}}
	result = response['recording']['id']
	end

	end # end of class SpokenDataAPI
	####################


	#################### How to use, example in 3 steps
	# for this example you need to set the user id and data api environment variables.
	# 1.reate a spoken data api instance initialising it with user_id and API key.
	# you'll find these in http://spokendata.com/api once you have loged in.
	# use enviroment variables to store user id and api key, and remember to put the file in `.gitignore`
	# spokenDataAPIObject = SpokenDataAPI.new(ENV['SPOKEN_DATA_USER_ID'],ENV['SPOKEN_DATA_API_TOKEN'])

	spokenDataAPIObject = SpokenDataAPI.new(ENV['SPOKEN_DATA_USER_ID'],ENV['SPOKEN_DATA_API_TOKEN'])

	# 2. send video by URL
	# sending a video by url will return you the spoken data api assigned video ID
	uid = spokenDataAPIObject.send_by_video_url("https://www.youtube.com/watch?v=u5CVsCnxyXg")
	puts "UID: "+uid

	# 3. retrieve captions
	# use recording id / uid to retrieve subtitles, when ready. (will return false if they are not ready)
	# (good idea to save uid in db for checking on processing of video captions)
	# returns srt file if ready, boolean false if not => you can `if response != false` etc..
	response= spokenDataAPIObject.retrieve_subtitles_srt(uid)
	puts "Response: "+response.to_s
	####################