Skip to content

Instantly share code, notes, and snippets.

@jakcharlton
Created February 24, 2011 23:22
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jakcharlton/843113 to your computer and use it in GitHub Desktop.
Save jakcharlton/843113 to your computer and use it in GitHub Desktop.
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using Dimebrain.TweetSharp.Extensions;
using Dimebrain.TweetSharp.Fluent;
using Dimebrain.TweetSharp.Model;
using NHibernate;
using NHibernate.Criterion;
using Tbtq.Core.Entities;
using Tbtq.Core.Entities.Twitter;
namespace Tbtq.Core.Harvester
{
public class HarvesterBot
{
private static IList<long> tweetCache = new List<long>();
private static IList<string> twittererCache = new List<string>();
public static List<string> Log = new List<string>();
public static string Status = "Stopped";
private static long since = 1;
private static TimeSpan IntervalBetweenPolls = 60.Seconds();
private static Thread workerProcessThreadDaemon;
private static string accountName;
private static string accountPassword;
private const int CacheSize = 10000;
private const int CacheMinimum = 1000;
private const int ToPull = 1000;
public static void Stop()
{
if (Status == "Stopped") return;
if (workerProcessThreadDaemon != null && workerProcessThreadDaemon.IsAlive)
{
workerProcessThreadDaemon.Abort();
}
Status = "Stopped";
}
public static void Start(string account, string password)
{
Start(account, password, 60);
}
public static void Start(string account, string password, int secondsBetweenPolls)
{
if (Status == "Running") return;
accountName = account;
accountPassword = password;
if (secondsBetweenPolls > 0) IntervalBetweenPolls = secondsBetweenPolls.Seconds();
ThreadPool.QueueUserWorkItem(state => PollForStatuses());
Status = "Running";
}
private static void PollForStatuses()
{
if (tweetCache.Count > CacheSize)
{
AddToLog("tweetCache reset" + tweetCache.Count); //TODO Test this actually works at some point
tweetCache = tweetCache.Skip(CacheSize - CacheMinimum).ToList();
}
if (twittererCache.Count > CacheSize)
{
AddToLog("twittererCache reset" + tweetCache.Count); //TODO Test this actually works at some point
twittererCache = twittererCache.Skip(CacheSize - CacheMinimum).ToList();
}
try
{
var twitterQuery = FluentTwitter.CreateRequest()
.Configuration.UseGzipCompression()
.AuthenticateAs(accountName, accountPassword)
.Statuses().Mentions().Take(ToPull).Since(since)
.AsJson();
AddToLog(twitterQuery.ToString());
var status = twitterQuery.Request();
UpdateTweets(status.AsStatuses());
AddToLog("Since now at " + since);
AddToLog("Cache now at " + tweetCache.Count);
}
catch (Exception ex)
{
AddToLog(ex.Message);
}
// identify the thread and let it sleep
workerProcessThreadDaemon = Thread.CurrentThread;
Thread.Sleep(IntervalBetweenPolls);
PollForStatuses();
}
private static void UpdateTweets(IEnumerable<TwitterStatus> tweets)
{
if (tweets==null || tweets.Count() == 0) return;
since = tweets.First().Id;
IList<TwitterStatus> toPushToDatabase = GetTweetsToPushToDatabase(tweets);
if (toPushToDatabase.Count==0) return;
var added = 0;
using (var session = SessionManager.SessionFactory.OpenSession())
{
session.SetBatchSize(200);
using (var tx = session.BeginTransaction())
{
var existingIds = GetExistingIdsFromDatabase(toPushToDatabase, session);
foreach (var tweet in tweets)
{
UpdateCacheIfTweetIsInExistingIds(existingIds, tweet);
if (tweetCache.Contains(tweet.Id)) continue;
if (tweet.InReplyToScreenName==null ||
tweet.InReplyToScreenName.ToLower() != accountName
|| tweet.InReplyToStatusId == 0) continue;
var newTweet = CreateNewTweetToPersist(tweet);
session.Save(newTweet);
tweetCache.Add(newTweet.Id);
var player = GetPlayer(session, tweet.User.ScreenName) ?? BuildNewPlayer(tweet);
if (!twittererCache.Contains(tweet.User.ScreenName))
{
var twitterer = new Twitterer
{
ScreenName = tweet.User.ScreenName,
ProfileImageUrl = tweet.User.ProfileImageUrl,
Player = player
};
session.SaveOrUpdate(player);
session.SaveOrUpdate(twitterer);
twittererCache.Add(twitterer.ScreenName);
}
// TODO: Change tweet object so that it has a reference to player????
var publishedTweet = GetPublishedTweetByStatusId(tweet.InReplyToStatusId, session);
if (publishedTweet != null)
{
var s = new Scorer();
s.Score(session, player, publishedTweet.PublishedQuestion, Scorer.CleanTweetAnswer(tweet.Text), tweet.CreatedDate);
}
added++;
}
tx.Commit();
}
}
AddToLog("Added " + added);
}
private static PublishedTweet GetPublishedTweetByStatusId(long tweetStatusId, ISession session)
{
var publishedTweet = session.CreateCriteria<PublishedTweet>()
.Add(Restrictions.Eq("Id", tweetStatusId))
.UniqueResult<PublishedTweet>();
return publishedTweet;
}
private static Player BuildNewPlayer(TwitterStatus twitterDetails)
{
return new Player {Username = twitterDetails.User.ScreenName};
}
private static Player GetPlayer(ISession session, string name)
{
var player = session.CreateCriteria<Player>()
.Add(Restrictions.Eq("Username", name))
.UniqueResult<Player>();
return player;
}
private static IList GetExistingIdsFromDatabase(IEnumerable<TwitterStatus> toPushToDatabase, ISession session)
{
var crit = session.CreateMultiCriteria();
foreach (var tweet in toPushToDatabase)
crit.Add(DetachedCriteria.For<Tweet>()
.Add(Restrictions.Eq("Id", tweet.Id))
.SetProjection(Projections.Id())
.SetResultTransformer(NHibernate.Transform.Transformers.PassThrough));
return crit.List();
}
private static Tweet CreateNewTweetToPersist(TwitterStatus tweet)
{
return new Tweet
{
Id = tweet.Id,
ScreenName = tweet.User.ScreenName,
Text = tweet.Text,
CreatedDate = tweet.CreatedDate,
InReplyToStatusId = tweet.InReplyToStatusId,
InReplyToScreenName = tweet.InReplyToScreenName
};
}
private static void UpdateCacheIfTweetIsInExistingIds(IList existingIds, ITwitterEntity tweet)
{
foreach (ArrayList item in existingIds)
{
if (item.Count > 0 && ((long)item[0] == tweet.Id))
{
if (!tweetCache.Contains(tweet.Id))
tweetCache.Add(tweet.Id);
continue;
}
}
}
private static IList<TwitterStatus> GetTweetsToPushToDatabase(IEnumerable<TwitterStatus> tweets)
{
IList<TwitterStatus> toPushToDatabase = new List<TwitterStatus>();
foreach (var tweet in tweets)
if (!tweetCache.Contains(tweet.Id))
toPushToDatabase.Add(tweet);
return toPushToDatabase;
}
private static void AddToLog(string toLog)
{
Log.Add(DateTime.Now.ToLongTimeString() + " : " + toLog);
if (Log.Count > 25)
Log = Log.Skip(1).ToList();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment