Created
April 13, 2014 19:20
-
-
Save fractaledmind/10598311 to your computer and use it in GitHub Desktop.
Pythonista SearchLink
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Original SearchLink by Brett Terpstra (@ttscoff) | |
# | |
# Project page: | |
# http://brettterpstra.com/projects/searchlink/ | |
# | |
# Port to Python/Editorial by Ole Zorn (@olemoritz) | |
# Based on SearchLink 2.0 | |
# Configuration: | |
# set to True to force inline links | |
inline = False | |
# set to True to add titles to links based on site title | |
include_titles = False | |
# change this to set a specific country for search (default US) | |
country_code = 'US' | |
# set to True to include a random string in ref titles | |
# allows running SearchLink multiple times w/out conflicts | |
prefix_random = False | |
# append affiliate link info to iTunes urls, empty quotes for none | |
# Example: | |
itunes_affiliate = '&at=10l4tL&ct=searchlink' | |
# to create Amazon affiliate links, set amazon_partner to: | |
# [tag, camp, creative] | |
# Use the amazon link tool to create any affiliate link and examine | |
# to find the needed parts. Set to False to return regular amazon links | |
# Example: | |
amazon_partner = ["brettterpcom-20","1789","390957"] | |
# To create custom abbreviations for Google Site Searches, | |
# add to (or replace) the hash below. | |
# "abbreviation" => "site.url", | |
# This allows you, for example to use [search term](!bt) | |
# as a shortcut to search brettterpstra.com. Keys in this | |
# hash can override existing search triggers. | |
custom_site_searches = { | |
'bt': 'brettterpstra.com', | |
'md': 'www.macdrifter.com' | |
} | |
import requests | |
import json | |
import re | |
import urllib | |
import random | |
import workflow | |
import editor | |
params = workflow.get_parameters() | |
input_ = workflow.get_input() | |
if not inline and len(re.findall(r'\]\(', input_)) == 1: | |
inline = True | |
def clean_string(s): | |
s = re.sub(r'\n+', ' ', s) | |
s = re.sub(r'"', '"', s) | |
s = re.sub(r'\|', '-', s) | |
return s.strip() | |
# TODO: Read custom config... | |
def wiki(terms): | |
uri = 'http://en.wikipedia.org/w/api.php?action=query&format=json&prop=info&inprop=url&titles=' + urllib.quote(terms) | |
json_string = requests.get(uri, headers={'Referer': 'http://bretterpstra.com', 'User-Agent': 'SearchLink (http://brettterpstra.com)'}).text | |
result = json.loads(json_string) | |
if result: | |
pages = result['query']['pages'] | |
first_page = pages[pages.keys()[0]] | |
return (first_page['fullurl'], first_page['title']) | |
def zero_click(terms): | |
url = 'http://api.duckduckgo.com/?q=%s&format=json&no_redirect=1&no_html=1&skip_disambig=1' % urllib.quote(terms) | |
json_string = requests.get(url).text | |
result = json.loads(json_string) | |
if result: | |
definition = result.get('Definition', None) | |
definition_link = result.get('DefinitionURL', None) | |
wiki_link = result.get('AbstractURL', None) | |
title = result.get('Heading', None) | |
return (title, definition, definition_link, wiki_link) | |
else: | |
return (None, None, None, None) | |
def itunes(entity, terms, dev, aff='', country_code='US'): | |
url = 'http://itunes.apple.com/search?term=%s&country=%s&entity=%s&attribute=allTrackTerm' % (urllib.quote(terms), country_code, entity) | |
json_string = requests.get(url).text | |
json_dict = json.loads(json_string) | |
if json_dict.get('resultCount') and json_dict.get('resultCount') > 0: | |
result = json_dict['results'][0] | |
if re.match(r'(mac|iPad)Software'): | |
output_url = result['sellerUrl'] if dev else result['trackViewUrl'] | |
output_title = result['trackName'] | |
elif re.match(r'(musicArtist|song|album)'): | |
wrapper_type = result['wrapperType'] | |
if wrapper_type == 'track': | |
output_url = result['trackViewUrl'] | |
output_title = result['trackName'] + ' by ' + result['artistName'] | |
elif wrapper_type == 'collection': | |
output_url = result['collectionViewUrl'] | |
output_title = result['collectionName'] + ' by ' + result['artistName'] | |
elif wrapper_type == 'artist': | |
output_url = result['artistLinkUrl'] | |
output_title = result['artistName'] | |
if dev: | |
return (output_url, output_title) | |
else: | |
return (output_url + aff, output_title) | |
else: | |
return (None, None) | |
def lastfm(entity, terms): | |
url = 'http://ws.audioscrobbler.com/2.0/?method=%s.search&%s=%s&api_key=2f3407ec29601f97ca8a18ff580477de&format=json' % (entity, entity, urllib.quote(terms)) | |
res = requests.get(url).text | |
json_dict = json.loads(res) | |
if json_dict.get('results', None): | |
try: | |
if entity == 'track': | |
result = json_dict['results']['trackmatches']['track'][0] | |
url = result['url'] | |
title = result['name'] + ' by ' + result['artist'] | |
elif entity == 'artist': | |
result = json_dict['results']['artistmatches']['artist'][0] | |
url = result['url'] | |
title = result['name'] | |
except: | |
return (None, None) | |
return (url, title) | |
else: | |
return (None, None) | |
def google(terms, define=False): | |
uri = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&filter=1&rsz=small&q=' + urllib.quote(terms) | |
json_string = requests.get(uri, headers={'Referer': 'http://bretterpstra.com', 'User-Agent': 'SearchLink (http://brettterpstra.com)'}).text | |
json_dict = json.loads(json_string) | |
if json_dict.get('responseData', None): | |
result = json_dict['responseData'].get('results', None) | |
if not result: | |
return [None, None] | |
result = result[0] | |
output_url = result['unescapedUrl'] | |
if define and re.match(r'dictionary', output_url): | |
output_title = re.sub(r'<\/?.*?>', '', result['content']) | |
else: | |
output_title = result['titleNoFormatting'] | |
return [output_url, output_title] | |
def amazon_affiliatize(url, amazon_partner): | |
if not amazon_partner: | |
return url | |
url_match = re.match(r'http:\/\/www.amazon.com\/(?:(.*?)\/)?dp\/([^\?]+)', url) | |
if url_match: | |
title = url_match.group(1) | |
id_ = url_match.group(2) | |
tag = url_match.group(3) | |
az_url = 'http://www.amazon.com/gp/product/%s/ref=as_li_ss_tl?ie=UTF8&camp=%s&creative=%s&creativeASIN=%s&linkCode=as2&tag=%s' % (id_, amazon_partner[1], amazon_partner[2], id_, amazon_partner[0]) | |
return (az_url, title) | |
else: | |
return (url, '') | |
links = {} | |
footer = '' | |
prefix = ('%04d' % random.randint(0, 9999)) + '-' if prefix_random else '' | |
highest_marker = 0 | |
for match in re.finditer(r'\[(?:%s-)?(\d+)\]: ' % (prefix), input_): | |
marker = int(match.group(1)) | |
print 'marker:', marker | |
highest_marker = max(marker, highest_marker) | |
if re.search(r'\[(.*?)\]\((.*?)\)', input_): | |
def repl(match): | |
global footer | |
link_text = match.group(1) | |
link_info = match.group(2) | |
search_type = '' | |
search_terms = '' | |
link_info_match = re.match(r'^(?:\!(.+) )?"(.*?)"$', link_info) | |
if link_info_match: | |
if not link_info_match.group(1): | |
search_type = 'g' | |
else: | |
search_type = link_info_match.group(1) | |
search_terms = link_info_match.group(2) | |
if not link_info_match: | |
link_info_match = re.match(r'^\!', link_info) | |
if link_info_match: | |
search_word_match = re.match(r'^\!(.+)', link_info) | |
if len(search_word_match.groups()) > 0: | |
search_type = search_word_match.group(1) | |
search_word = search_word_match.group(0) | |
search_terms = link_text | |
if not link_info_match and link_text and not link_info: | |
search_type = 'g' | |
search_terms = link_text | |
if not link_info_match and link_text: | |
search_type = 'g' | |
search_terms = link_text | |
if search_type and search_terms: | |
for k, v in custom_site_searches.iteritems(): | |
if search_type == k: | |
search_type = 'g' | |
search_terms = 'site:%s %s' % (v, search_terms) | |
url = None | |
title = None | |
if search_type == 'a': | |
az_url, title = google('site:amazon.com %s' % search_terms, False) | |
url, title = amazon_affiliatize(az_url, amazon_partner) | |
elif search_type == 'g': # google lucky search | |
url, title = google(search_terms) | |
elif search_type == 'wiki': | |
url, title = wiki(search_terms) | |
elif search_type == 'def': | |
url, title = google("define " + search_terms, True) | |
elif search_type == 'mas' or search_type == 'masd': # Mac App Store search (mas = itunes link, masd = developer link) | |
dev = True if search_type == 'masd' else False | |
url, title = itunes('macSoftware', search_terms, dev, itunes_affiliate, country_code) | |
elif search_type == 'itu' or search_type == 'itud': # iTunes app search | |
dev = True if search_type == 'itud' else False | |
url, title = itunes('iPadSoftware',search_terms, dev, itunes_affiliate, country_code) | |
elif search_type == 's': # software search (google) | |
url, title = google('(software OR app OR mac) %s' % search_terms) | |
if link_text == '': | |
link_text = title | |
elif search_type == 'isong': # iTunes Song Search | |
url, title = itunes('song', search_terms, False) | |
elif search_type == 'iart': # iTunes Artist Search | |
url, title = itunes('musicArtist', search_terms, False) | |
elif search_type == 'ialb': # iTunes Album Search | |
url, title = itunes('album', search_terms, False) | |
elif search_type == 'lsong': # Last.fm Song Search | |
url, title = lastfm('track', search_terms) | |
elif search_type == 'lart': | |
url, title = lastfm('artist', search_terms) | |
else: | |
if search_terms: | |
if re.match(r'.+?\.\w{2,4}$', search_type): | |
url, title = google('site:%s %s' % (search_type, search_terms)) | |
else: | |
url, title = google(search_terms) | |
if url: | |
if link_text == '' and title: | |
link_text = title | |
if inline: | |
if title and include_titles: | |
return '[%s](%s "%s")' % (link_text, url, clean_string(title)) | |
else: | |
return '[%s](%s)' % (link_text, url) | |
else: | |
if url in links: | |
marker = prefix + ('%d' % (int(links[url]) + highest_marker)) | |
else: | |
links[url] = prefix + ('%d' % (len(links) + 1 + highest_marker)) | |
footer += '\n[%s]: %s' % (links[url], url) | |
if title and include_titles: | |
footer += '"%s"' % clean_string(title) | |
if title: | |
return '[%s][%s]' % (link_text, links[url]) | |
else: | |
return '[%s](%s)' % (link_text, url) | |
else: | |
return match.group(0) | |
result = re.sub(r'\[(.*?)\]\((.*?)\)', repl, input_) | |
else: | |
url, title = google(input_) | |
if include_titles: | |
result = '[%s](%s "%s")' % (input_.strip(), url, clean_string(title)) | |
else: | |
result = '[%s](%s)' % (input_.strip(), url) | |
if not inline and footer: | |
result += '\n' + footer | |
workflow.set_output(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment