fractaledmind/pythonista_searchlink

## pythonista_searchlink
# Original SearchLink by Brett Terpstra (@ttscoff)
#
# Project page:
# http://brettterpstra.com/projects/searchlink/
#
# Port to Python/Editorial by Ole Zorn (@olemoritz)
# Based on SearchLink 2.0

# Configuration:

# set to True to force inline links
inline = False

# set to True to add titles to links based on site title
include_titles = False

# change this to set a specific country for search (default US)
country_code = 'US'

# set to True to include a random string in ref titles
# allows running SearchLink multiple times w/out conflicts
prefix_random = False

# append affiliate link info to iTunes urls, empty quotes for none
# Example:
itunes_affiliate = '&at=10l4tL&ct=searchlink'

# to create Amazon affiliate links, set amazon_partner to:
# [tag, camp, creative]
# Use the amazon link tool to create any affiliate link and examine
# to find the needed parts. Set to False to return regular amazon links
# Example:
amazon_partner = ["brettterpcom-20","1789","390957"]

# To create custom abbreviations for Google Site Searches,
# add to (or replace) the hash below.
# "abbreviation" => "site.url",
# This allows you, for example to use [search term](!bt)
# as a shortcut to search brettterpstra.com. Keys in this
# hash can override existing search triggers.
custom_site_searches = {
  'bt': 'brettterpstra.com',
  'md': 'www.macdrifter.com'
}

import requests
import json
import re
import urllib
import random
import workflow
import editor

params = workflow.get_parameters()
input_ = workflow.get_input()

if not inline and len(re.findall(r'\]\(', input_)) == 1:
	inline = True

def clean_string(s):
	s = re.sub(r'\n+', ' ', s)
	s = re.sub(r'"', '&quot', s)
	s = re.sub(r'\|', '-', s)
	return s.strip()

# TODO: Read custom config...

def wiki(terms):
	uri = 'http://en.wikipedia.org/w/api.php?action=query&format=json&prop=info&inprop=url&titles=' + urllib.quote(terms)
	json_string = requests.get(uri, headers={'Referer': 'http://bretterpstra.com', 'User-Agent': 'SearchLink (http://brettterpstra.com)'}).text
	result = json.loads(json_string)
	if result:
		pages = result['query']['pages']
		first_page = pages[pages.keys()[0]]
		return (first_page['fullurl'], first_page['title'])

def zero_click(terms):
	url = 'http://api.duckduckgo.com/?q=%s&format=json&no_redirect=1&no_html=1&skip_disambig=1' % urllib.quote(terms)
	json_string = requests.get(url).text
	result = json.loads(json_string)
	if result:
		definition = result.get('Definition', None)
		definition_link = result.get('DefinitionURL', None)
		wiki_link = result.get('AbstractURL', None)
		title = result.get('Heading', None)
		return (title, definition, definition_link, wiki_link)
	else:
		return (None, None, None, None)

def itunes(entity, terms, dev, aff='', country_code='US'):
	url = 'http://itunes.apple.com/search?term=%s&country=%s&entity=%s&attribute=allTrackTerm' % (urllib.quote(terms), country_code, entity)
	json_string = requests.get(url).text
	json_dict = json.loads(json_string)
	if json_dict.get('resultCount') and json_dict.get('resultCount') > 0:
		result = json_dict['results'][0]
		if re.match(r'(mac|iPad)Software'):
			output_url = result['sellerUrl'] if dev else result['trackViewUrl']
			output_title = result['trackName']
		elif re.match(r'(musicArtist|song|album)'):
			wrapper_type = result['wrapperType']
			if wrapper_type == 'track':
				output_url = result['trackViewUrl']
				output_title = result['trackName'] + ' by ' + result['artistName']
			elif wrapper_type == 'collection':
				output_url = result['collectionViewUrl']
				output_title = result['collectionName'] + ' by ' + result['artistName']
			elif wrapper_type == 'artist':
				output_url = result['artistLinkUrl']
				output_title = result['artistName']
		if dev:
			return (output_url, output_title)
		else:
			return (output_url + aff, output_title)
	else:
		return (None, None)

def lastfm(entity, terms):
	url = 'http://ws.audioscrobbler.com/2.0/?method=%s.search&%s=%s&api_key=2f3407ec29601f97ca8a18ff580477de&format=json' % (entity, entity, urllib.quote(terms))
	res = requests.get(url).text
	json_dict = json.loads(res)
	if json_dict.get('results', None):
		try:
			if entity == 'track':
				result = json_dict['results']['trackmatches']['track'][0]
				url = result['url']
				title = result['name'] + ' by ' + result['artist']
			elif entity == 'artist':
				result = json_dict['results']['artistmatches']['artist'][0]
				url = result['url']
				title = result['name']
		except:
			return (None, None)
		return (url, title)
	else:
		return (None, None)

def google(terms, define=False):
	uri = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&filter=1&rsz=small&q=' + urllib.quote(terms)
	json_string = requests.get(uri, headers={'Referer': 'http://bretterpstra.com', 'User-Agent': 'SearchLink (http://brettterpstra.com)'}).text
	json_dict = json.loads(json_string)
	if json_dict.get('responseData', None):
		result = json_dict['responseData'].get('results', None)
		if not result:
			return [None, None]
		result = result[0]
		output_url = result['unescapedUrl']
		if define and re.match(r'dictionary', output_url):
			output_title = re.sub(r'<\/?.*?>', '', result['content'])
		else:
			output_title = result['titleNoFormatting']
		return [output_url, output_title]

def amazon_affiliatize(url, amazon_partner):
	if not amazon_partner:
		return url
	url_match = re.match(r'http:\/\/www.amazon.com\/(?:(.*?)\/)?dp\/([^\?]+)', url)
	if url_match:
		title = url_match.group(1)
		id_ = url_match.group(2)
		tag = url_match.group(3)
		az_url = 'http://www.amazon.com/gp/product/%s/ref=as_li_ss_tl?ie=UTF8&camp=%s&creative=%s&creativeASIN=%s&linkCode=as2&tag=%s' % (id_, amazon_partner[1], amazon_partner[2], id_, amazon_partner[0])
		return (az_url, title)
	else:
		return (url, '')

links = {}
footer = ''
prefix = ('%04d' % random.randint(0, 9999)) + '-' if prefix_random else ''
highest_marker = 0

for match in re.finditer(r'\[(?:%s-)?(\d+)\]: ' % (prefix), input_):
	marker = int(match.group(1))
	print 'marker:', marker
	highest_marker = max(marker, highest_marker)

if re.search(r'\[(.*?)\]\((.*?)\)', input_):
	def repl(match):
		global footer
		link_text = match.group(1)
		link_info = match.group(2)
		search_type = ''
		search_terms = ''

		link_info_match = re.match(r'^(?:\!(.+) )?"(.*?)"$', link_info)
		if link_info_match:
			if not link_info_match.group(1):
				search_type = 'g'
			else:
				search_type = link_info_match.group(1)
			search_terms = link_info_match.group(2)
		if not link_info_match:
			link_info_match = re.match(r'^\!', link_info)
			if link_info_match:
				search_word_match = re.match(r'^\!(.+)', link_info)
				if len(search_word_match.groups()) > 0:
					search_type = search_word_match.group(1)
				search_word = search_word_match.group(0)
				search_terms = link_text
		if not link_info_match and link_text and not link_info:
			search_type = 'g'
			search_terms = link_text
		if not link_info_match and link_text:
			search_type = 'g'
			search_terms = link_text

		if search_type and search_terms:
			for k, v in custom_site_searches.iteritems():
				if search_type == k:
					search_type = 'g'
					search_terms = 'site:%s %s' % (v, search_terms)

		url = None
		title = None

		if search_type == 'a':
			az_url, title = google('site:amazon.com %s' % search_terms, False)
			url, title = amazon_affiliatize(az_url, amazon_partner)
		elif search_type == 'g': # google lucky search
			url, title = google(search_terms)
		elif search_type == 'wiki':
			url, title = wiki(search_terms)
		elif search_type == 'def':
			url, title = google("define " + search_terms, True)
		elif search_type == 'mas' or search_type == 'masd': # Mac App Store search (mas = itunes link, masd = developer link)
			dev = True if search_type == 'masd' else False
			url, title = itunes('macSoftware', search_terms, dev, itunes_affiliate, country_code)
		elif search_type == 'itu' or search_type == 'itud': # iTunes app search
			dev = True if search_type == 'itud' else False
			url, title = itunes('iPadSoftware',search_terms, dev, itunes_affiliate, country_code)
		elif search_type == 's': # software search (google)
			url, title = google('(software OR app OR mac) %s' % search_terms)
			if link_text == '':
				link_text = title
		elif search_type == 'isong': # iTunes Song Search
			url, title = itunes('song', search_terms, False)
		elif search_type == 'iart': # iTunes Artist Search
			url, title = itunes('musicArtist', search_terms, False)
		elif search_type == 'ialb': # iTunes Album Search
			url, title = itunes('album', search_terms, False)
		elif search_type == 'lsong': # Last.fm Song Search
			url, title = lastfm('track', search_terms)
		elif search_type == 'lart':
			url, title = lastfm('artist', search_terms)
		else:
			if search_terms:
				if re.match(r'.+?\.\w{2,4}$', search_type):
					url, title = google('site:%s %s' % (search_type, search_terms))
				else:
					url, title = google(search_terms)

		if url:
			if link_text == '' and title:
				link_text = title
			if inline:
				if title and include_titles:
					return '[%s](%s "%s")' % (link_text, url, clean_string(title))
				else:
					return '[%s](%s)' % (link_text, url)
			else:
				if url in links:
					marker = prefix + ('%d' % (int(links[url]) + highest_marker))
				else:
					links[url] = prefix + ('%d' % (len(links) + 1 + highest_marker))
					footer += '\n[%s]: %s' % (links[url], url)
					if title and include_titles:
						footer += '"%s"' % clean_string(title)

				if title:
					return '[%s][%s]' % (link_text, links[url])
				else:
					return '[%s](%s)' % (link_text, url)
		else:
			return match.group(0)

	result = re.sub(r'\[(.*?)\]\((.*?)\)', repl, input_)
else:
	url, title = google(input_)
	if include_titles:
		result = '[%s](%s "%s")' % (input_.strip(), url, clean_string(title))
	else:
		result = '[%s](%s)' % (input_.strip(), url)

if not inline and footer:
	result += '\n' + footer
workflow.set_output(result)
	# Original SearchLink by Brett Terpstra (@ttscoff)
	#
	# Project page:
	# http://brettterpstra.com/projects/searchlink/
	#
	# Port to Python/Editorial by Ole Zorn (@olemoritz)
	# Based on SearchLink 2.0

	# Configuration:

	# set to True to force inline links
	inline = False

	# set to True to add titles to links based on site title
	include_titles = False

	# change this to set a specific country for search (default US)
	country_code = 'US'

	# set to True to include a random string in ref titles
	# allows running SearchLink multiple times w/out conflicts
	prefix_random = False

	# append affiliate link info to iTunes urls, empty quotes for none
	# Example:
	itunes_affiliate = '&at=10l4tL&ct=searchlink'

	# to create Amazon affiliate links, set amazon_partner to:
	# [tag, camp, creative]
	# Use the amazon link tool to create any affiliate link and examine
	# to find the needed parts. Set to False to return regular amazon links
	# Example:
	amazon_partner = ["brettterpcom-20","1789","390957"]

	# To create custom abbreviations for Google Site Searches,
	# add to (or replace) the hash below.
	# "abbreviation" => "site.url",
	# This allows you, for example to use [search term](!bt)
	# as a shortcut to search brettterpstra.com. Keys in this
	# hash can override existing search triggers.
	custom_site_searches = {
	'bt': 'brettterpstra.com',
	'md': 'www.macdrifter.com'
	}

	import requests
	import json
	import re
	import urllib
	import random
	import workflow
	import editor

	params = workflow.get_parameters()
	input_ = workflow.get_input()

	if not inline and len(re.findall(r'\]\(', input_)) == 1:
	inline = True

	def clean_string(s):
	s = re.sub(r'\n+', ' ', s)
	s = re.sub(r'"', '&quot', s)
	s = re.sub(r'\\|', '-', s)
	return s.strip()

	# TODO: Read custom config...

	def wiki(terms):
	uri = 'http://en.wikipedia.org/w/api.php?action=query&format=json&prop=info&inprop=url&titles=' + urllib.quote(terms)
	json_string = requests.get(uri, headers={'Referer': 'http://bretterpstra.com', 'User-Agent': 'SearchLink (http://brettterpstra.com)'}).text
	result = json.loads(json_string)
	if result:
	pages = result['query']['pages']
	first_page = pages[pages.keys()[0]]
	return (first_page['fullurl'], first_page['title'])

	def zero_click(terms):
	url = 'http://api.duckduckgo.com/?q=%s&format=json&no_redirect=1&no_html=1&skip_disambig=1' % urllib.quote(terms)
	json_string = requests.get(url).text
	result = json.loads(json_string)
	if result:
	definition = result.get('Definition', None)
	definition_link = result.get('DefinitionURL', None)
	wiki_link = result.get('AbstractURL', None)
	title = result.get('Heading', None)
	return (title, definition, definition_link, wiki_link)
	else:
	return (None, None, None, None)

	def itunes(entity, terms, dev, aff='', country_code='US'):
	url = 'http://itunes.apple.com/search?term=%s&country=%s&entity=%s&attribute=allTrackTerm' % (urllib.quote(terms), country_code, entity)
	json_string = requests.get(url).text
	json_dict = json.loads(json_string)
	if json_dict.get('resultCount') and json_dict.get('resultCount') > 0:
	result = json_dict['results'][0]
	if re.match(r'(mac\|iPad)Software'):
	output_url = result['sellerUrl'] if dev else result['trackViewUrl']
	output_title = result['trackName']
	elif re.match(r'(musicArtist\|song\|album)'):
	wrapper_type = result['wrapperType']
	if wrapper_type == 'track':
	output_url = result['trackViewUrl']
	output_title = result['trackName'] + ' by ' + result['artistName']
	elif wrapper_type == 'collection':
	output_url = result['collectionViewUrl']
	output_title = result['collectionName'] + ' by ' + result['artistName']
	elif wrapper_type == 'artist':
	output_url = result['artistLinkUrl']
	output_title = result['artistName']
	if dev:
	return (output_url, output_title)
	else:
	return (output_url + aff, output_title)
	else:
	return (None, None)

	def lastfm(entity, terms):
	url = 'http://ws.audioscrobbler.com/2.0/?method=%s.search&%s=%s&api_key=2f3407ec29601f97ca8a18ff580477de&format=json' % (entity, entity, urllib.quote(terms))
	res = requests.get(url).text
	json_dict = json.loads(res)
	if json_dict.get('results', None):
	try:
	if entity == 'track':
	result = json_dict['results']['trackmatches']['track'][0]
	url = result['url']
	title = result['name'] + ' by ' + result['artist']
	elif entity == 'artist':
	result = json_dict['results']['artistmatches']['artist'][0]
	url = result['url']
	title = result['name']
	except:
	return (None, None)
	return (url, title)
	else:
	return (None, None)

	def google(terms, define=False):
	uri = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&filter=1&rsz=small&q=' + urllib.quote(terms)
	json_string = requests.get(uri, headers={'Referer': 'http://bretterpstra.com', 'User-Agent': 'SearchLink (http://brettterpstra.com)'}).text
	json_dict = json.loads(json_string)
	if json_dict.get('responseData', None):
	result = json_dict['responseData'].get('results', None)
	if not result:
	return [None, None]
	result = result[0]
	output_url = result['unescapedUrl']
	if define and re.match(r'dictionary', output_url):
	output_title = re.sub(r'<\/?.*?>', '', result['content'])
	else:
	output_title = result['titleNoFormatting']
	return [output_url, output_title]

	def amazon_affiliatize(url, amazon_partner):
	if not amazon_partner:
	return url
	url_match = re.match(r'http:\/\/www.amazon.com\/(?:(.*?)\/)?dp\/([^\?]+)', url)
	if url_match:
	title = url_match.group(1)
	id_ = url_match.group(2)
	tag = url_match.group(3)
	az_url = 'http://www.amazon.com/gp/product/%s/ref=as_li_ss_tl?ie=UTF8&camp=%s&creative=%s&creativeASIN=%s&linkCode=as2&tag=%s' % (id_, amazon_partner[1], amazon_partner[2], id_, amazon_partner[0])
	return (az_url, title)
	else:
	return (url, '')

	links = {}
	footer = ''
	prefix = ('%04d' % random.randint(0, 9999)) + '-' if prefix_random else ''
	highest_marker = 0

	for match in re.finditer(r'\[(?:%s-)?(\d+)\]: ' % (prefix), input_):
	marker = int(match.group(1))
	print 'marker:', marker
	highest_marker = max(marker, highest_marker)

	if re.search(r'\[(.?)\]\((.?)\)', input_):
	def repl(match):
	global footer
	link_text = match.group(1)
	link_info = match.group(2)
	search_type = ''
	search_terms = ''

	link_info_match = re.match(r'^(?:\!(.+) )?"(.*?)"$', link_info)
	if link_info_match:
	if not link_info_match.group(1):
	search_type = 'g'
	else:
	search_type = link_info_match.group(1)
	search_terms = link_info_match.group(2)
	if not link_info_match:
	link_info_match = re.match(r'^\!', link_info)
	if link_info_match:
	search_word_match = re.match(r'^\!(.+)', link_info)
	if len(search_word_match.groups()) > 0:
	search_type = search_word_match.group(1)
	search_word = search_word_match.group(0)
	search_terms = link_text
	if not link_info_match and link_text and not link_info:
	search_type = 'g'
	search_terms = link_text
	if not link_info_match and link_text:
	search_type = 'g'
	search_terms = link_text

	if search_type and search_terms:
	for k, v in custom_site_searches.iteritems():
	if search_type == k:
	search_type = 'g'
	search_terms = 'site:%s %s' % (v, search_terms)

	url = None
	title = None

	if search_type == 'a':
	az_url, title = google('site:amazon.com %s' % search_terms, False)
	url, title = amazon_affiliatize(az_url, amazon_partner)
	elif search_type == 'g': # google lucky search
	url, title = google(search_terms)
	elif search_type == 'wiki':
	url, title = wiki(search_terms)
	elif search_type == 'def':
	url, title = google("define " + search_terms, True)
	elif search_type == 'mas' or search_type == 'masd': # Mac App Store search (mas = itunes link, masd = developer link)
	dev = True if search_type == 'masd' else False
	url, title = itunes('macSoftware', search_terms, dev, itunes_affiliate, country_code)
	elif search_type == 'itu' or search_type == 'itud': # iTunes app search
	dev = True if search_type == 'itud' else False
	url, title = itunes('iPadSoftware',search_terms, dev, itunes_affiliate, country_code)
	elif search_type == 's': # software search (google)
	url, title = google('(software OR app OR mac) %s' % search_terms)
	if link_text == '':
	link_text = title
	elif search_type == 'isong': # iTunes Song Search
	url, title = itunes('song', search_terms, False)
	elif search_type == 'iart': # iTunes Artist Search
	url, title = itunes('musicArtist', search_terms, False)
	elif search_type == 'ialb': # iTunes Album Search
	url, title = itunes('album', search_terms, False)
	elif search_type == 'lsong': # Last.fm Song Search
	url, title = lastfm('track', search_terms)
	elif search_type == 'lart':
	url, title = lastfm('artist', search_terms)
	else:
	if search_terms:
	if re.match(r'.+?\.\w{2,4}$', search_type):
	url, title = google('site:%s %s' % (search_type, search_terms))
	else:
	url, title = google(search_terms)

	if url:
	if link_text == '' and title:
	link_text = title
	if inline:
	if title and include_titles:
	return '[%s](%s "%s")' % (link_text, url, clean_string(title))
	else:
	return '[%s](%s)' % (link_text, url)
	else:
	if url in links:
	marker = prefix + ('%d' % (int(links[url]) + highest_marker))
	else:
	links[url] = prefix + ('%d' % (len(links) + 1 + highest_marker))
	footer += '\n[%s]: %s' % (links[url], url)
	if title and include_titles:
	footer += '"%s"' % clean_string(title)

	if title:
	return '[%s][%s]' % (link_text, links[url])
	else:
	return '[%s](%s)' % (link_text, url)
	else:
	return match.group(0)

	result = re.sub(r'\[(.?)\]\((.?)\)', repl, input_)
	else:
	url, title = google(input_)
	if include_titles:
	result = '[%s](%s "%s")' % (input_.strip(), url, clean_string(title))
	else:
	result = '[%s](%s)' % (input_.strip(), url)

	if not inline and footer:
	result += '\n' + footer
	workflow.set_output(result)