Last active
July 4, 2022 09:16
-
-
Save potatoqualitee/8f4699a01260e2640d976e335049e614 to your computer and use it in GitHub Desktop.
Screen scraping Twitter Bot in PowerShell
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Tweet function from https://gallery.technet.microsoft.com/Send-Tweets-via-a-72b97964 | |
workflow Send-Tweet { | |
param ( | |
[Parameter(Mandatory=$true)][string]$Message | |
) | |
InlineScript { | |
[Reflection.Assembly]::LoadWithPartialName("System.Security") | |
[Reflection.Assembly]::LoadWithPartialName("System.Net") | |
$status = [System.Uri]::EscapeDataString($Using:Message); | |
$oauth_consumer_key = "xxyzxyzxyzxyzxyzxyzyz"; | |
$oauth_consumer_secret = "xyxyzxyzxyxyzzxyzxyzz"; | |
$oauth_token = "xyzxyzxyzxyzxyzxyz"; | |
$oauth_token_secret = "xyzxyzxyzxyzxyzxyzxyz"; | |
$oauth_nonce = [System.Convert]::ToBase64String([System.Text.Encoding]::ASCII.GetBytes([System.DateTime]::Now.Ticks.ToString())); | |
$ts = [System.DateTime]::UtcNow - [System.DateTime]::ParseExact("01/01/1970", "dd/MM/yyyy", $null).ToUniversalTime(); | |
$oauth_timestamp = [System.Convert]::ToInt64($ts.TotalSeconds).ToString(); | |
$signature = "POST&"; | |
$signature += [System.Uri]::EscapeDataString("https://api.twitter.com/1.1/statuses/update.json") + "&"; | |
$signature += [System.Uri]::EscapeDataString("oauth_consumer_key=" + $oauth_consumer_key + "&"); | |
$signature += [System.Uri]::EscapeDataString("oauth_nonce=" + $oauth_nonce + "&"); | |
$signature += [System.Uri]::EscapeDataString("oauth_signature_method=HMAC-SHA1&"); | |
$signature += [System.Uri]::EscapeDataString("oauth_timestamp=" + $oauth_timestamp + "&"); | |
$signature += [System.Uri]::EscapeDataString("oauth_token=" + $oauth_token + "&"); | |
$signature += [System.Uri]::EscapeDataString("oauth_version=1.0&"); | |
$signature += [System.Uri]::EscapeDataString("status=" + $status); | |
$signature_key = [System.Uri]::EscapeDataString($oauth_consumer_secret) + "&" + [System.Uri]::EscapeDataString($oauth_token_secret); | |
$hmacsha1 = new-object System.Security.Cryptography.HMACSHA1; | |
$hmacsha1.Key = [System.Text.Encoding]::ASCII.GetBytes($signature_key); | |
$oauth_signature = [System.Convert]::ToBase64String($hmacsha1.ComputeHash([System.Text.Encoding]::ASCII.GetBytes($signature))); | |
$oauth_authorization = 'OAuth '; | |
$oauth_authorization += 'oauth_consumer_key="' + [System.Uri]::EscapeDataString($oauth_consumer_key) + '",'; | |
$oauth_authorization += 'oauth_nonce="' + [System.Uri]::EscapeDataString($oauth_nonce) + '",'; | |
$oauth_authorization += 'oauth_signature="' + [System.Uri]::EscapeDataString($oauth_signature) + '",'; | |
$oauth_authorization += 'oauth_signature_method="HMAC-SHA1",' | |
$oauth_authorization += 'oauth_timestamp="' + [System.Uri]::EscapeDataString($oauth_timestamp) + '",' | |
$oauth_authorization += 'oauth_token="' + [System.Uri]::EscapeDataString($oauth_token) + '",'; | |
$oauth_authorization += 'oauth_version="1.0"'; | |
$post_body = [System.Text.Encoding]::ASCII.GetBytes("status=" + $status); | |
[System.Net.HttpWebRequest] $request = [System.Net.WebRequest]::Create("https://api.twitter.com/1.1/statuses/update.json"); | |
$request.Method = "POST"; | |
$request.Headers.Add("Authorization", $oauth_authorization); | |
$request.ContentType = "application/x-www-form-urlencoded"; | |
$body = $request.GetRequestStream(); | |
$body.write($post_body, 0, $post_body.length); | |
$body.flush(); | |
$body.close(); | |
$response = $request.GetResponse(); | |
} | |
} | |
# HtmlAgilityPack from https://htmlagilitypack.codeplex.com/ | |
# Figured out how to use it from http://www.leeholmes.com/blog/2010/03/05/html-agility-pack-rocks-your-screen-scraping-world/ | |
Add-Type -Path .\HtmlAgilityPack.dll | |
$doc = New-Object HtmlAgilityPack.HtmlDocument | |
$items = @() | |
$archive = Import-Csv .\archive.csv | |
# Connect Search has an URL for Resolved (Status=2) and one for Closed (Status=3). Run this section for each of these URLs | |
2..3 | ForEach-Object { | |
$statusnum = $_ | |
$url = "https://connect.microsoft.com/SQLServer/SearchResults.aspx?FeedbackType=0&Status=$statusnum&Scope=0&ChangedDays=2&SortOrder=40&TabView=0" | |
$html = Invoke-WebRequest -UseBasicParsing -Uri $url | |
# Load the result into HtmlAgilityPack | |
$null = $doc.LoadHtml($html.Content) | |
# Count the number of returned items | |
# Where did I get this XPath? Am I a wizard? No, I used Google Chrome's Right Click -> Inspect element -> Copy -> XPath | |
$count = $doc.DocumentNode.SelectNodes('//*[@id="ctl00_MasterBody_BugPagingControlTop_NumberOfItems"]').InnerText | |
# Parse each item | |
1..$count | ForEach-Object { | |
# Stylize the number so that it works with their element names | |
if ($_ -lt 10) { $num = "0$_" } else { $num = "$_" } | |
# all of these items basicaly use the same path, so set up a base path | |
$basepath = "//*[@id='ctl00_MasterBody_BugsSearchResultsView_ctl$($num)_BugsResultModule" | |
$xpath = "$($basepath)_FeedbackStatus_ResolutionLabel']" | |
$node = $doc.DocumentNode.SelectNodes($xpath) | |
$resolution = $node.InnerText | |
# Some resolutions are Won't Fix or Duplicate, etc | |
if ($resolution -eq 'as Fixed') { | |
# Link | |
$xpath = "$($basepath)_FeedbackLink']" | |
$node = $doc.DocumentNode.SelectNodes($xpath) | |
$link = $node.attributes[0].value | |
$link = "http://connect.microsoft.com$link" | |
# title | |
$title = $node.InnerText | |
# Author | |
$xpath = "$($basepath)_NewFeedbackAuthor']" | |
$node = $doc.DocumentNode.SelectNodes($xpath) | |
$author = $node.InnerText | |
# Alert the author on Twitter | |
switch ($author) { | |
"AaronBertrand" { $author = "@AaronBertrand" } | |
"Chrissy LeMaire" { $author = "@cl" } | |
"SQLvariant" { $author = "@SQLvariant" } | |
} | |
# Created and a bunch of other metadata | |
$xpath = "$($basepath)_FeedbackItemDetailsUpdatePanel']/div" | |
$node = $doc.DocumentNode.SelectNodes($xpath) | |
# created | |
$xpath = "$($basepath)_FeedbackItemDetailsUpdatePanel']/div/span[1]" | |
$node = $doc.DocumentNode.SelectNodes($xpath) | |
$created = (($node.FirstChild.InnerText -Split " ")[0]).TrimStart("Created on ") | |
# feedbackid | |
$xpath = "$($basepath)_FeedbackItemDetailsUpdatePanel']/div/span[5]" | |
$node = $doc.DocumentNode.SelectNodes($xpath) | |
$feedbackid = $node.InnerText.TrimStart("feedback id: ") | |
# Votes | |
$xpath = "$($basepath)_FeedbackItemVotingControlForVoting_SimpleVote_ctl02_text']" | |
$node = $doc.DocumentNode.SelectNodes($xpath) | |
$votes = $node.InnerText | |
$tweet = "$title - $author" | |
# We have length requirements to work with, so work it. | |
# Links in Twitter, no matter how long or short they are, will always take up 20 chars | |
if ($tweet.length -gt 119) { | |
$tweet = $tweet.Substring(0,115) + "..." | |
} | |
$tweet = "$tweet $link" | |
# Check to see if it's been posted already, if not, add it to the collection | |
if ($archive.link -notcontains $link) { | |
$items += [PSCustomObject]@{ | |
Link = $link | |
Title = $title | |
ID = $feedbackid | |
Author = $author | |
Created = $created | |
Closed = Get-Date -f "M/dd/yyyy" | |
Votes = $votes | |
Tweet = $tweet | |
} | |
} | |
} | |
} | |
} | |
# Schedule Task for every 30 minutes in Task Scheduler, and only send one tweet at a time as to not overwhelm followers | |
$post = $items | Select -First 1 | |
if ($post -ne $null) { | |
try { | |
# Send the tweet | |
$null = Send-Tweet $post.Tweet | |
# Write info to file for comparison of stats later and to also ensure there are no duplicates | |
$post | Select-Object Link, Title, ID, Author, Created, Closed, Votes | Export-Csv -Path .\archive.csv -Append | |
} catch { | |
# If the tweet failed, write out the post object | |
Add-Content -Path .\errors.csv "$(Get-Date) $post" | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment