Created
February 24, 2011 23:22
-
-
Save jakcharlton/843113 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Threading; | |
using Dimebrain.TweetSharp.Extensions; | |
using Dimebrain.TweetSharp.Fluent; | |
using Dimebrain.TweetSharp.Model; | |
using NHibernate; | |
using NHibernate.Criterion; | |
using Tbtq.Core.Entities; | |
using Tbtq.Core.Entities.Twitter; | |
namespace Tbtq.Core.Harvester | |
{ | |
public class HarvesterBot | |
{ | |
private static IList<long> tweetCache = new List<long>(); | |
private static IList<string> twittererCache = new List<string>(); | |
public static List<string> Log = new List<string>(); | |
public static string Status = "Stopped"; | |
private static long since = 1; | |
private static TimeSpan IntervalBetweenPolls = 60.Seconds(); | |
private static Thread workerProcessThreadDaemon; | |
private static string accountName; | |
private static string accountPassword; | |
private const int CacheSize = 10000; | |
private const int CacheMinimum = 1000; | |
private const int ToPull = 1000; | |
public static void Stop() | |
{ | |
if (Status == "Stopped") return; | |
if (workerProcessThreadDaemon != null && workerProcessThreadDaemon.IsAlive) | |
{ | |
workerProcessThreadDaemon.Abort(); | |
} | |
Status = "Stopped"; | |
} | |
public static void Start(string account, string password) | |
{ | |
Start(account, password, 60); | |
} | |
public static void Start(string account, string password, int secondsBetweenPolls) | |
{ | |
if (Status == "Running") return; | |
accountName = account; | |
accountPassword = password; | |
if (secondsBetweenPolls > 0) IntervalBetweenPolls = secondsBetweenPolls.Seconds(); | |
ThreadPool.QueueUserWorkItem(state => PollForStatuses()); | |
Status = "Running"; | |
} | |
private static void PollForStatuses() | |
{ | |
if (tweetCache.Count > CacheSize) | |
{ | |
AddToLog("tweetCache reset" + tweetCache.Count); //TODO Test this actually works at some point | |
tweetCache = tweetCache.Skip(CacheSize - CacheMinimum).ToList(); | |
} | |
if (twittererCache.Count > CacheSize) | |
{ | |
AddToLog("twittererCache reset" + tweetCache.Count); //TODO Test this actually works at some point | |
twittererCache = twittererCache.Skip(CacheSize - CacheMinimum).ToList(); | |
} | |
try | |
{ | |
var twitterQuery = FluentTwitter.CreateRequest() | |
.Configuration.UseGzipCompression() | |
.AuthenticateAs(accountName, accountPassword) | |
.Statuses().Mentions().Take(ToPull).Since(since) | |
.AsJson(); | |
AddToLog(twitterQuery.ToString()); | |
var status = twitterQuery.Request(); | |
UpdateTweets(status.AsStatuses()); | |
AddToLog("Since now at " + since); | |
AddToLog("Cache now at " + tweetCache.Count); | |
} | |
catch (Exception ex) | |
{ | |
AddToLog(ex.Message); | |
} | |
// identify the thread and let it sleep | |
workerProcessThreadDaemon = Thread.CurrentThread; | |
Thread.Sleep(IntervalBetweenPolls); | |
PollForStatuses(); | |
} | |
private static void UpdateTweets(IEnumerable<TwitterStatus> tweets) | |
{ | |
if (tweets==null || tweets.Count() == 0) return; | |
since = tweets.First().Id; | |
IList<TwitterStatus> toPushToDatabase = GetTweetsToPushToDatabase(tweets); | |
if (toPushToDatabase.Count==0) return; | |
var added = 0; | |
using (var session = SessionManager.SessionFactory.OpenSession()) | |
{ | |
session.SetBatchSize(200); | |
using (var tx = session.BeginTransaction()) | |
{ | |
var existingIds = GetExistingIdsFromDatabase(toPushToDatabase, session); | |
foreach (var tweet in tweets) | |
{ | |
UpdateCacheIfTweetIsInExistingIds(existingIds, tweet); | |
if (tweetCache.Contains(tweet.Id)) continue; | |
if (tweet.InReplyToScreenName==null || | |
tweet.InReplyToScreenName.ToLower() != accountName | |
|| tweet.InReplyToStatusId == 0) continue; | |
var newTweet = CreateNewTweetToPersist(tweet); | |
session.Save(newTweet); | |
tweetCache.Add(newTweet.Id); | |
var player = GetPlayer(session, tweet.User.ScreenName) ?? BuildNewPlayer(tweet); | |
if (!twittererCache.Contains(tweet.User.ScreenName)) | |
{ | |
var twitterer = new Twitterer | |
{ | |
ScreenName = tweet.User.ScreenName, | |
ProfileImageUrl = tweet.User.ProfileImageUrl, | |
Player = player | |
}; | |
session.SaveOrUpdate(player); | |
session.SaveOrUpdate(twitterer); | |
twittererCache.Add(twitterer.ScreenName); | |
} | |
// TODO: Change tweet object so that it has a reference to player???? | |
var publishedTweet = GetPublishedTweetByStatusId(tweet.InReplyToStatusId, session); | |
if (publishedTweet != null) | |
{ | |
var s = new Scorer(); | |
s.Score(session, player, publishedTweet.PublishedQuestion, Scorer.CleanTweetAnswer(tweet.Text), tweet.CreatedDate); | |
} | |
added++; | |
} | |
tx.Commit(); | |
} | |
} | |
AddToLog("Added " + added); | |
} | |
private static PublishedTweet GetPublishedTweetByStatusId(long tweetStatusId, ISession session) | |
{ | |
var publishedTweet = session.CreateCriteria<PublishedTweet>() | |
.Add(Restrictions.Eq("Id", tweetStatusId)) | |
.UniqueResult<PublishedTweet>(); | |
return publishedTweet; | |
} | |
private static Player BuildNewPlayer(TwitterStatus twitterDetails) | |
{ | |
return new Player {Username = twitterDetails.User.ScreenName}; | |
} | |
private static Player GetPlayer(ISession session, string name) | |
{ | |
var player = session.CreateCriteria<Player>() | |
.Add(Restrictions.Eq("Username", name)) | |
.UniqueResult<Player>(); | |
return player; | |
} | |
private static IList GetExistingIdsFromDatabase(IEnumerable<TwitterStatus> toPushToDatabase, ISession session) | |
{ | |
var crit = session.CreateMultiCriteria(); | |
foreach (var tweet in toPushToDatabase) | |
crit.Add(DetachedCriteria.For<Tweet>() | |
.Add(Restrictions.Eq("Id", tweet.Id)) | |
.SetProjection(Projections.Id()) | |
.SetResultTransformer(NHibernate.Transform.Transformers.PassThrough)); | |
return crit.List(); | |
} | |
private static Tweet CreateNewTweetToPersist(TwitterStatus tweet) | |
{ | |
return new Tweet | |
{ | |
Id = tweet.Id, | |
ScreenName = tweet.User.ScreenName, | |
Text = tweet.Text, | |
CreatedDate = tweet.CreatedDate, | |
InReplyToStatusId = tweet.InReplyToStatusId, | |
InReplyToScreenName = tweet.InReplyToScreenName | |
}; | |
} | |
private static void UpdateCacheIfTweetIsInExistingIds(IList existingIds, ITwitterEntity tweet) | |
{ | |
foreach (ArrayList item in existingIds) | |
{ | |
if (item.Count > 0 && ((long)item[0] == tweet.Id)) | |
{ | |
if (!tweetCache.Contains(tweet.Id)) | |
tweetCache.Add(tweet.Id); | |
continue; | |
} | |
} | |
} | |
private static IList<TwitterStatus> GetTweetsToPushToDatabase(IEnumerable<TwitterStatus> tweets) | |
{ | |
IList<TwitterStatus> toPushToDatabase = new List<TwitterStatus>(); | |
foreach (var tweet in tweets) | |
if (!tweetCache.Contains(tweet.Id)) | |
toPushToDatabase.Add(tweet); | |
return toPushToDatabase; | |
} | |
private static void AddToLog(string toLog) | |
{ | |
Log.Add(DateTime.Now.ToLongTimeString() + " : " + toLog); | |
if (Log.Count > 25) | |
Log = Log.Skip(1).ToList(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment