Skip to content

Instantly share code, notes, and snippets.

ruby|ruby ⇒ ruby sample.rb lookup --business-id=tropisueño-san-francisco-3
Found business with id yelp-san-francisco:
{
"categories": [
{
"alias": "localflavor",
"title": "Local Flavor"
},
{
"alias": "massmedia",
{
"location": {
"city": "some_city",
"state": "ABC"
}
}
require 'date'
require 'nokogiri'
require 'rest-client'
require 'reverse_markdown'
# Match [caption <stuff>]...[/caption] tags
# example: http://rubular.com/r/r2FH3QSOpL
CAPTION_REGEX = /\[caption.*\](?=.*\[)|\[\/caption\]/
# clf = sklearn.linear_model.LogisticRegression
# significant_terms = set of terms appearing more than n times in training
def classify(left_name, right_name):
"""
Classifies names using delta term analysis.
:return:
A tuple (p_is_duplicate, exact_match_rare_terms, one_side_rare_terms).
* p_is_duplicate is the score from the log-linear classifier. It's
probably the most relevant signal.
# -*- coding: utf-8 -*-
import itertools
import re
import urlparse
import boto
import warc
from boto.s3.key import Key
from gzipstream import GzipStreamFile
require 'aws-sdk'
require 'json'
REGION = 'us-west-2'
QUEUE_NAME = 'c0wl-cloudtrail'
s3 = Aws::S3::Client.new(region: REGION)
sqs = Aws::SQS::Client.new(region: REGION)
queue_url = sqs.get_queue_url(queue_name: QUEUE_NAME).queue_url
{
"query" : "cheap restaurants",
"language" : "en",
"queryAnnotations" : [
{
"type" : "RawTokenQueryAnnotation",
"offset" : 0,
"length" : 5,
"queryText" : "cheap"
},
with mock.patch("%s.restart_server" % module_under_test, autospec=True) as mock_restart:
restart_servers_in_datacenter(servers, "sfo")
mock_restart.assert_called_once()
SELECT
r.id as restaurant_id,
rez_sheet.shift as shift,
start AT TIME ZONE r.timezone as start_time,
"end" AT TIME ZONE r.timezone as end_time
FROM
rez_sheet,rez_schedule rs,rez_restaurant r
WHERE
r.id = rez_sheet.restaurant_id AND
rs.sheet_id = rez_sheet.id;
{
"visit_time": "2014-10-16 09:44:57",
"title": "Pizza New York, NY",
"url": "http://www.yelp.com/search?find_desc=pizza&find_loc=NYC"
}