Last active
February 7, 2017 23:45
-
-
Save tdutch1/b8fc5706a4df74cd6f33ad38508ec6e0 to your computer and use it in GitHub Desktop.
Azure Function to process Twitter tweets: Stores received tweet text in Azure Storage table, indicates whether tweet is a duplicate or should be ignored
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"bindings": [ | |
{ | |
"type": "httpTrigger", | |
"direction": "in", | |
"webHookType": "genericJson", | |
"name": "req" | |
}, | |
{ | |
"type": "http", | |
"direction": "out", | |
"name": "res" | |
} | |
], | |
"disabled": false | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"frameworks": { | |
"net46":{ | |
"dependencies": { | |
"WindowsAzure.Storage": "8.0.1", | |
"Microsoft.WindowsAzure.ConfigurationManager": "3.2.3" | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r "Newtonsoft.Json" | |
using System; | |
using System.Net; | |
using System.Text.RegularExpressions; | |
using Microsoft.Azure; | |
using Microsoft.WindowsAzure.Storage; | |
using Microsoft.WindowsAzure.Storage.Table; | |
using Newtonsoft.Json; | |
// Purpose: Used to determine whether a Twitter Tweet was already received and whether the Tweet has a keyword that's considered unwanted. | |
// Input parameters: JSON object with one property named "tweetText". | |
// Sample input: { "tweetText":"This is a tweet" } | |
// Output: If the Tweet was already passed to this function then the following is returned: | |
// {"tweetText":"ignore_tweet"} | |
// If the Tweet is original (not already passed to this function) then the response returns: | |
// {"tweetText":"The is a tweet"} // The Tweet text itself is returned. | |
public static async Task<object> Run(HttpRequestMessage req, TraceWriter log) | |
{ | |
log.Info("Function invoked"); | |
string jsonContent = await req.Content.ReadAsStringAsync(); | |
dynamic data = JsonConvert.DeserializeObject(jsonContent); | |
if (data.tweetText == null) | |
{ | |
return req.CreateResponse(HttpStatusCode.BadRequest, new | |
{ | |
error = "Please pass 'tweetText' property in the input object" | |
}); | |
} | |
log.Info("tweetText = " + data.tweetText); | |
string originalTweetText = data.tweetText.ToString(); | |
// Derive the tweet text to store in the Azure table. Remove invalid characters. Remove URLs. | |
string tweetTextForAzureTable = RemoveUrls(originalTweetText); | |
tweetTextForAzureTable = RemoveInvalidAzureStorageTableRowKeyChars(tweetTextForAzureTable); | |
log.Info("tweetTextForAzureTable = " + tweetTextForAzureTable); | |
bool tweetHasUnwantedKeyword = TweetHasUnwantedKeyword(originalTweetText, log); | |
if (!IsRetweet(tweetTextForAzureTable) && !tweetHasUnwantedKeyword && !TweetAlreadyStored(tweetTextForAzureTable, log)) | |
{ | |
return req.CreateResponse(HttpStatusCode.OK, new | |
{ | |
tweetText = $"{data.tweetText}" | |
}); | |
} | |
else | |
{ | |
log.Info("Return value: 'ignore_tweet'"); | |
return req.CreateResponse(HttpStatusCode.OK, new | |
{ | |
tweetText = "ignore_tweet" | |
}); | |
} | |
} | |
private static bool TweetAlreadyStored(string tweetText, TraceWriter log) | |
{ | |
bool tweetAlreadyStored = false; | |
CloudStorageAccount storageAccount = CloudStorageAccount.Parse(CloudConfigurationManager.GetSetting("AzureWebJobsStorage")); | |
CloudTableClient tableClient = storageAccount.CreateCloudTableClient(); | |
CloudTable table = tableClient.GetTableReference("tweets"); | |
TableEntity tableEntity = new TableEntity(); | |
tableEntity.PartitionKey = "Tweet"; | |
tableEntity.RowKey = tweetText; | |
TableOperation insertOperation = TableOperation.Insert(tableEntity); | |
try | |
{ | |
table.Execute(insertOperation); | |
} | |
catch (Microsoft.WindowsAzure.Storage.StorageException ex) | |
{ | |
if (ex.Message.Contains("409")) | |
{ | |
tweetAlreadyStored = true; | |
} | |
else | |
{ | |
log.Error(ex.ToString()); | |
} | |
} | |
return tweetAlreadyStored; | |
} | |
private static bool TweetHasUnwantedKeyword(string tweetText, TraceWriter log) | |
{ | |
bool tweetHasUnwantedKeyword = false; | |
try | |
{ | |
List<string> keywordList = GetUnwantedTweetKeywords(); | |
List<string> tweetWords = tweetText.Split(' ').ToList(); | |
foreach (string tweetWord in tweetWords) | |
{ | |
if (keywordList.Contains(tweetWord.ToLower())) | |
{ | |
tweetHasUnwantedKeyword = true; | |
break; | |
} | |
} | |
} | |
catch (Exception ex) | |
{ | |
log.Error(ex.ToString()); | |
} | |
return tweetHasUnwantedKeyword; | |
} | |
private static List<string> GetUnwantedTweetKeywords() | |
{ | |
List<string> keywordList = new List<string>(); | |
CloudStorageAccount storageAccount = CloudStorageAccount.Parse(CloudConfigurationManager.GetSetting("AzureWebJobsStorage")); | |
CloudTableClient tableClient = storageAccount.CreateCloudTableClient(); | |
CloudTable table = tableClient.GetTableReference("unwantedTweetKeywords"); | |
TableQuery<TableEntity> query = new TableQuery<TableEntity>(); | |
foreach (TableEntity entity in table.ExecuteQuery(query)) | |
{ | |
string keyword = entity.RowKey; | |
if (keyword.StartsWith("HT")) | |
{ | |
keyword = keyword.Replace("HT", "#"); | |
} | |
keywordList.Add(keyword.ToLower()); | |
} | |
return keywordList; | |
} | |
private static string RemoveInvalidAzureStorageTableRowKeyChars(string text) | |
{ | |
return text.Replace("#", "").Replace("\\", "").Replace("/", "").Replace("?", "").Replace("\r", "").Replace("\n", "").Replace("\t", ""); | |
} | |
private static string RemoveUrls(string text) | |
{ | |
return Regex.Replace(text, @"https[^\s]+", ""); | |
} | |
private static bool IsRetweet(string text) | |
{ | |
return text.StartsWith("RT"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment