Skip to content

Instantly share code, notes, and snippets.

@tdutch1
Last active February 7, 2017 23:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tdutch1/b8fc5706a4df74cd6f33ad38508ec6e0 to your computer and use it in GitHub Desktop.
Save tdutch1/b8fc5706a4df74cd6f33ad38508ec6e0 to your computer and use it in GitHub Desktop.
Azure Function to process Twitter tweets: Stores received tweet text in Azure Storage table, indicates whether tweet is a duplicate or should be ignored
{
"bindings": [
{
"type": "httpTrigger",
"direction": "in",
"webHookType": "genericJson",
"name": "req"
},
{
"type": "http",
"direction": "out",
"name": "res"
}
],
"disabled": false
}
{
"frameworks": {
"net46":{
"dependencies": {
"WindowsAzure.Storage": "8.0.1",
"Microsoft.WindowsAzure.ConfigurationManager": "3.2.3"
}
}
}
}
#r "Newtonsoft.Json"
using System;
using System.Net;
using System.Text.RegularExpressions;
using Microsoft.Azure;
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Table;
using Newtonsoft.Json;
// Purpose: Used to determine whether a Twitter Tweet was already received and whether the Tweet has a keyword that's considered unwanted.
// Input parameters: JSON object with one property named "tweetText".
// Sample input: { "tweetText":"This is a tweet" }
// Output: If the Tweet was already passed to this function then the following is returned:
// {"tweetText":"ignore_tweet"}
// If the Tweet is original (not already passed to this function) then the response returns:
// {"tweetText":"The is a tweet"} // The Tweet text itself is returned.
public static async Task<object> Run(HttpRequestMessage req, TraceWriter log)
{
log.Info("Function invoked");
string jsonContent = await req.Content.ReadAsStringAsync();
dynamic data = JsonConvert.DeserializeObject(jsonContent);
if (data.tweetText == null)
{
return req.CreateResponse(HttpStatusCode.BadRequest, new
{
error = "Please pass 'tweetText' property in the input object"
});
}
log.Info("tweetText = " + data.tweetText);
string originalTweetText = data.tweetText.ToString();
// Derive the tweet text to store in the Azure table. Remove invalid characters. Remove URLs.
string tweetTextForAzureTable = RemoveUrls(originalTweetText);
tweetTextForAzureTable = RemoveInvalidAzureStorageTableRowKeyChars(tweetTextForAzureTable);
log.Info("tweetTextForAzureTable = " + tweetTextForAzureTable);
bool tweetHasUnwantedKeyword = TweetHasUnwantedKeyword(originalTweetText, log);
if (!IsRetweet(tweetTextForAzureTable) && !tweetHasUnwantedKeyword && !TweetAlreadyStored(tweetTextForAzureTable, log))
{
return req.CreateResponse(HttpStatusCode.OK, new
{
tweetText = $"{data.tweetText}"
});
}
else
{
log.Info("Return value: 'ignore_tweet'");
return req.CreateResponse(HttpStatusCode.OK, new
{
tweetText = "ignore_tweet"
});
}
}
private static bool TweetAlreadyStored(string tweetText, TraceWriter log)
{
bool tweetAlreadyStored = false;
CloudStorageAccount storageAccount = CloudStorageAccount.Parse(CloudConfigurationManager.GetSetting("AzureWebJobsStorage"));
CloudTableClient tableClient = storageAccount.CreateCloudTableClient();
CloudTable table = tableClient.GetTableReference("tweets");
TableEntity tableEntity = new TableEntity();
tableEntity.PartitionKey = "Tweet";
tableEntity.RowKey = tweetText;
TableOperation insertOperation = TableOperation.Insert(tableEntity);
try
{
table.Execute(insertOperation);
}
catch (Microsoft.WindowsAzure.Storage.StorageException ex)
{
if (ex.Message.Contains("409"))
{
tweetAlreadyStored = true;
}
else
{
log.Error(ex.ToString());
}
}
return tweetAlreadyStored;
}
private static bool TweetHasUnwantedKeyword(string tweetText, TraceWriter log)
{
bool tweetHasUnwantedKeyword = false;
try
{
List<string> keywordList = GetUnwantedTweetKeywords();
List<string> tweetWords = tweetText.Split(' ').ToList();
foreach (string tweetWord in tweetWords)
{
if (keywordList.Contains(tweetWord.ToLower()))
{
tweetHasUnwantedKeyword = true;
break;
}
}
}
catch (Exception ex)
{
log.Error(ex.ToString());
}
return tweetHasUnwantedKeyword;
}
private static List<string> GetUnwantedTweetKeywords()
{
List<string> keywordList = new List<string>();
CloudStorageAccount storageAccount = CloudStorageAccount.Parse(CloudConfigurationManager.GetSetting("AzureWebJobsStorage"));
CloudTableClient tableClient = storageAccount.CreateCloudTableClient();
CloudTable table = tableClient.GetTableReference("unwantedTweetKeywords");
TableQuery<TableEntity> query = new TableQuery<TableEntity>();
foreach (TableEntity entity in table.ExecuteQuery(query))
{
string keyword = entity.RowKey;
if (keyword.StartsWith("HT"))
{
keyword = keyword.Replace("HT", "#");
}
keywordList.Add(keyword.ToLower());
}
return keywordList;
}
private static string RemoveInvalidAzureStorageTableRowKeyChars(string text)
{
return text.Replace("#", "").Replace("\\", "").Replace("/", "").Replace("?", "").Replace("\r", "").Replace("\n", "").Replace("\t", "");
}
private static string RemoveUrls(string text)
{
return Regex.Replace(text, @"https[^\s]+", "");
}
private static bool IsRetweet(string text)
{
return text.StartsWith("RT");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment