Skip to content

Instantly share code, notes, and snippets.

@risavkarna
Created September 30, 2012 08:23
Show Gist options
  • Save risavkarna/3806242 to your computer and use it in GitHub Desktop.
Save risavkarna/3806242 to your computer and use it in GitHub Desktop.
we are not changing this any more i guess
SET SCHEMA CHALLENGE3;
DROP TYPE UNKNOWN_TWEET_T;
CREATE TYPE UNKNOWN_TWEET_T AS TABLE
(
"ID" BIGINT NOT NULL, -- Unique identifier for this Tweet
"TEXT" nvarchar(800) NOT NULL, -- The actual text of the status update
"TRUNCATED" TINYINT NOT NULL default 0, -- (1 = True, 0 = False) Indicates whether the text was truncated, for example, as a result of a retweet exceeding the 140 character Tweet length. Truncated text will end in ellipsis, like this ...
"SOURCE" nvarchar(255), -- Utility used to post the Tweet, as an HTML-formatted string. (e.g. 'web')
"CREATED_AT" DATETIME not null, -- UTC time when this Tweet was created.
"IN_REPLY_TO_STATUS_ID" BIGINT, -- If the represented Tweet is a reply, ID of the original Tweet
"IN_REPLY_TO_USER_ID" BIGINT, -- If the represented Tweet is a reply, ID of the original Tweet's author.
--"GEO_LONGITUDE" FLOAT, -- Longitude, Geographic Location
--"GEO_LATITUDE" FLOAT, -- Latitude, Geographics Location
--"ORIGINAL_TWEET_ID" BIGINT, -- if this retweet contains the ID of the original tweet, which is also stored...
-- tweet stats
--"EMBEDDED" TINYINT, -- (1 = True, 0 = False) Indicates whether the tweet was embedded in a retweet
"RETWEET_COUNT" Integer, -- Number of times this Tweet has been retweeted.
-- user
"USER_ID" BIGINT NOT NULL, -- Unique identifier for this User
"USER_NAME" nvarchar(40), -- The name of the user, as they've defined it. Not necessarily a person's name.
"USER_SCREEN_NAME" nvarchar(20), -- The screen name, handle, or alias that this user identifies themselves with. screen_names are unique but subject to change.
"USER_CREATED_AT" DATETIME not null, -- The UTC datetime that the user account was created on Twitter.
"USER_LANG" nvarchar(8), -- The ISO 639-1 two-letter character code for the user's self-declared user interface language.
"USER_LOCATION" nvarchar(250), -- The user-defined location for this account's profile.
"USER_DESCRIPTION" nvarchar(500), -- The user-defined string describing their account.
"USER_PROFILE_IMAGE" nvarchar(500), -- A HTTP-based URL pointing to the user's avatar image.
"USER_TIME_ZONE" nvarchar(80), -- A string describing the Time Zone this user declares themselves within, e.g. "Pacific Time (US & Canada)"
"USER_UTC_OFFSET" integer, -- The offset from GMT/UTC in seconds.
"USER_URL" nvarchar(200), -- A URL provided by the user in association with their profile.
-- user stats
"USER_STATUSES_COUNT" Integer, -- The number of tweets (including retweets) issued by the user.
"USER_FRIENDS_COUNT" Integer, -- The number of users this account is following (AKA their "followings")
"USER_FOLLOWERS_COUNT" Integer, -- The number of followers this account currently has.
"USER_FAVOURITES_COUNT" Integer, -- The number of tweets this user has favorited in the account's lifetime.
"USER_LISTED_COUNT" Integer -- The number of public lists that this user is a member of.
-- place
--"PLACE_ID" nvarchar(255), -- ID representing this place. Note that this is represented as a string, not an integer.
--"PLACE_NAME" nvarchar(255), -- Short human-readable representation of the place's name.
--"PLACE_FULL_NAME" nvarchar(255), -- Full human-readable representation of the place's name.
--"PLACE_TYPE" nvarchar(20), -- The type of location represented by this place. (e.g. 'city')
--"PLACE_COUNTRY" nvarchar(50), -- Name of the country containing this place.
--"PLACE_COUNTRY_CODE" nvarchar(8), -- Shortened country code representing the country containing this place.
--"PLACE_URL" nvarchar(500) -- e.g. "http://api.twitter.com/1/geo/id/6565298bcadb82a1.json"
);
--GIVEN A TWITTER RETURN ITS LATITUDE AND LONGITUDE BASED ON UTC-TIME ZONE ONLY FOR NOW
DROP TYPE GEO_COORDINATES_T;
CREATE TYPE GEO_COORDINATES_T AS TABLE
(
"TWEET_ID" BIGINT NOT NULL, -- Tweet ID (ForeinKey)
"GEO_LONGITUDE" FLOAT, -- Longitude, Geographic Location
"GEO_LATITUDE" FLOAT -- Latitude, Geographics Location
);
DROP PROCEDURE EXISTING_USER;
CREATE PROCEDURE EXISTING_USER(
IN userID BIGINT,
OUT maxlat DOUBLE,
OUT minlat DOUBLE,
OUT maxlong DOUBLE,
OUT minlong DOUBLE
)
AS
RESULTCOUNT INTEGER;
BEGIN
topUsedLoc = SELECT TOP 1
USER_ID,
MAX_LAT,
MIN_LAT,
MAX_LONG,
MIN_LONG
FROM TOP_USR_LOC
WHERE USER_ID = :userID;
SELECT COUNT(*) INTO RESULTCOUNT FROM :topUsedLoc;
IF RESULTCOUNT > 0 THEN
SELECT MAX_LAT INTO maxlat FROM :topUsedLoc;
SELECT MAX_LONG INTO maxlong FROM :topUsedLoc;
SELECT MIN_LONG INTO minlong FROM :topUsedLoc;
SELECT MIN_LAT INTO minlat FROM :topUsedLoc;
ELSE
maxlat := 90;
minlat := -90;
maxlong := 180;
minlong := -180;
END IF;
END;
DROP PROCEDURE COORDINATE_RESOLVER;
CREATE PROCEDURE COORDINATE_RESOLVER(
IN minlat FLOAT,
IN maxlat FLOAT,
IN minlon FLOAT,
IN maxlon FLOAT,
OUT lat FLOAT,
OUT lon FLOAT
)
AS BEGIN
lat := (maxlat+minlat)/2;
lon := (maxlon+minlon)/2;
IF :lat = 0.0 AND :lon = 0.0 THEN
lat := 40.0;
lon := -75.0;
END IF;
END;
DROP PROCEDURE UTC_OFFSET;
CREATE PROCEDURE UTC_OFFSET(
IN utcoffset INTEGER,
OUT max_long FLOAT,
OUT min_long FLOAT
)
AS
RESULTCOUNT INTEGER;
BEGIN
mean_long = (SELECT TOP 1 MIN_LONG
FROM UTCOFFSET_MINLONG
WHERE UTCOFFSET*3600 = COALESCE(utcoffset,1));
SELECT COUNT(*) INTO RESULTCOUNT FROM :mean_long;
IF RESULTCOUNT > 0 THEN
SELECT MIN_LONG+7.5 INTO max_long FROM :mean_long;
SELECT MIN_LONG-7.5 INTO min_long FROM :mean_long;
ELSE
max_long := 180.0;
min_long := -180.0;
END IF;
END;
DROP PROCEDURE ESTIMATE_COORDINATES;
CREATE PROCEDURE ESTIMATE_COORDINATES(
IN source UNKNOWN_TWEET_T,
OUT result GEO_COORDINATES_T
)
AS
v_max_lat FLOAT;
v_max_long FLOAT;
v_min_lat FLOAT;
v_min_long FLOAT;
v_lat FLOAT;
v_long FLOAT;
CURSOR c_tweets FOR SELECT * FROM :source;
BEGIN
--call something for every element in the cursor
FOR tweet as c_tweets DO
--initialize
v_max_lat := 90;
v_min_lat := -90;
v_max_long := 180;
v_min_long := -180;
--start from the worst guess
IF tweet.USER_UTC_OFFSET IS NOT NULL THEN
UTC_OFFSET(tweet.USER_UTC_OFFSET, v_max_long, v_min_long);
ELSE
v_max_long := 180;
v_min_long := -180;
END IF;
EXISTING_USER(tweet.USER_ID, v_max_lat, v_min_lat, v_max_long, v_min_long);
COORDINATE_RESOLVER(v_min_lat, v_max_lat, v_min_long, v_max_long, v_lat, v_long);
new_tweet = SELECT tweet.ID AS TWEET_ID, v_long AS GEO_LONGITUDE, v_lat AS GEO_LATITUDE FROM dummy;
result = (SELECT * FROM :result UNION SELECT * FROM :new_tweet);
--INSERT INTO :result VALUES (tweet.ID, :v_long, :v_lat);
END FOR;
END;
--DROP TABLE TEST1;
--CREATE TABLE TEST3 LIKE GEO_COORDINATES_T;
DROP VIEW ye;
CREATE VIEW ye AS (SELECT TOP 1000 * FROM TWEETS_FOR_1DAY);
CALL ESTIMATE_COORDINATES(ye, ?);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment