Created
June 26, 2015 18:33
-
-
Save jholland918/4441a1c548e3b914a622 to your computer and use it in GitHub Desktop.
Quick and Dirty GitHub Repo Code Search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class BaseClient | |
{ | |
private Uri BaseUrl = new Uri("https://api.github.com/"); | |
readonly string username = "*****"; | |
readonly string password = "*****"; | |
readonly string userAgent = "*****"; | |
public IRestResponse<T> Execute<T>(MyRestRequest request, string clientUrl) where T : new() | |
{ | |
BaseUrl = new Uri(clientUrl); | |
return Execute<T>(request); | |
} | |
public IRestResponse<T> Execute<T>(MyRestRequest request) where T : new() | |
{ | |
var client = new MyRestClient(); | |
client.BaseUrl = BaseUrl; | |
client.Authenticator = new HttpBasicAuthenticator(username, password); | |
request.AddHeader("User-Agent", userAgent); // used on every request | |
var response = client.Execute<T>(request); | |
if (response.ErrorException != null) | |
{ | |
const string message = "Error retrieving response. Check inner details for more info."; | |
throw new Exception(message, response.ErrorException); | |
} | |
return response; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class CodeSearchByRepo | |
{ | |
private string searchTerm; | |
private string inputFileName; | |
private string outputFileName; | |
private string usersExceededMaxFileName; | |
private string exceptionFileName; | |
private string restExceptionFileName; | |
private List<string> input; | |
private List<List<string>> inputPool; | |
private int callCount = 0; | |
private int ratelimit = 0; | |
private int ratelimitRemaining = 0; | |
private DateTime ratelimitReset = new DateTime(); | |
private ResponseHelper responseHelper; | |
private SearchCodeClient client; | |
private CodeSearchTermHelper codeSearchTermHelper; | |
public CodeSearchByRepo(string inputFileName, string searchTerm) | |
{ | |
this.inputFileName = inputFileName; | |
this.searchTerm = searchTerm; | |
this.input = new List<string>(); | |
this.inputPool = new List<List<string>>(); | |
this.responseHelper = new ResponseHelper(); | |
this.client = new SearchCodeClient(); | |
this.codeSearchTermHelper = new CodeSearchTermHelper("repo"); | |
this.outputFileName = FileNameHelper.Create(this, "output_" + searchTerm + ".txt"); | |
this.usersExceededMaxFileName = FileNameHelper.Create(this, "usersExceededMax.txt"); | |
this.exceptionFileName = FileNameHelper.Create(this, "exception.txt"); | |
this.restExceptionFileName = FileNameHelper.Create(this, "restException.txt"); | |
} | |
public void Run() | |
{ | |
ratelimitRemaining = 30; | |
ParseInputFile(); | |
string url = null; | |
string nextUrl = null; | |
string usedUrl = null; | |
while (input.Count > 0 || inputPool.Count > 0) | |
{ | |
callCount++; | |
Console.WriteLine("----------------------------------------"); | |
Console.WriteLine("Ratelimit Remaining: {0} Call Count: {1}", ratelimitRemaining, callCount); | |
Console.WriteLine("----------------------------------------"); | |
Throttle(); | |
nextUrl = null; | |
usedUrl = null; | |
SearchCodeResult result = null; | |
try | |
{ | |
result = SendRequest(url, out nextUrl, out usedUrl); | |
} | |
catch (RestException ex) | |
{ | |
HandleRestException(ex, ref url, ref nextUrl, ref usedUrl); | |
continue; | |
} | |
catch (Exception ex) | |
{ | |
HandleException(ex, ref url, ref nextUrl, ref usedUrl); | |
continue; | |
} | |
Console.WriteLine("Input Count : {0}", input.Count); | |
Console.WriteLine("InputPool Count: {0}", inputPool.Count); | |
Console.WriteLine("Result Count : {0}", result.TotalCount); | |
if ( | |
IsTotalCountWithinRange(result.TotalCount) | |
|| inputPool.Last().Count == 1 /* We can't dilute the pool anymore so we go ahead and process as much as we can */ | |
) | |
{ | |
ProcessResult(result, usedUrl); | |
url = nextUrl; // The nextUrl should be null when the search results have peen iterated to completion | |
bool finishedPagingResults = string.IsNullOrWhiteSpace(nextUrl); | |
if (finishedPagingResults) | |
{ | |
if (inputPool.Count > 0) | |
{ | |
inputPool.RemoveAt(inputPool.Count - 1); | |
} | |
} | |
} | |
else | |
{ | |
codeSearchTermHelper.DiluteInputPool(inputPool); | |
url = null; | |
} | |
} | |
} | |
private void ParseInputFile() | |
{ | |
var lines = File.ReadAllLines(inputFileName).ToList<string>(); | |
foreach (var line in lines) | |
{ | |
if (string.IsNullOrWhiteSpace(line)) | |
{ | |
continue; | |
} | |
if (line[0] == '^') | |
{ | |
continue; | |
} | |
string repoName; | |
// Use if input file contains json strings | |
//repoName = JsonConvert.DeserializeObject<UserRepoOutput>(line).FullName; | |
// Use if the input file is plain old repo names | |
repoName = line; | |
input.Add(repoName); | |
} | |
} | |
private void HandleRestException(Exception ex, ref string url, ref string nextUrl, ref string usedUrl) | |
{ | |
Console.WriteLine("RestException caught!"); | |
url = null; | |
nextUrl = null; | |
usedUrl = null; | |
ratelimitRemaining = ratelimitRemaining - 1; | |
if (ratelimitRemaining < 0) | |
{ | |
ratelimitRemaining = 0; | |
} | |
codeSearchTermHelper.DiluteInputPool(inputPool); | |
using (StreamWriter sw = File.AppendText(restExceptionFileName)) | |
{ | |
sw.WriteLine(ex.Message); | |
} | |
} | |
private void HandleException(Exception ex, ref string url, ref string nextUrl, ref string usedUrl) | |
{ | |
Console.WriteLine("Exception caught!"); | |
url = null; | |
nextUrl = null; | |
usedUrl = null; | |
ratelimitRemaining = ratelimitRemaining - 1; | |
if (ratelimitRemaining < 0) | |
{ | |
ratelimitRemaining = 0; | |
} | |
if (inputPool.Count > 0) | |
{ | |
inputPool.RemoveAt(inputPool.Count - 1); | |
} | |
using (StreamWriter sw = File.AppendText(exceptionFileName)) | |
{ | |
sw.WriteLine(ex.Message); | |
} | |
} | |
private void Throttle() | |
{ | |
var canMakeRequest = (ratelimitRemaining > 0); | |
bool waiting = true; | |
while (waiting) | |
{ | |
var currentDate = DateTime.Now; | |
if (!canMakeRequest && ratelimitReset > currentDate) | |
{ | |
Console.WriteLine("***** Waiting for reset date *****"); | |
Console.WriteLine("Current Date : {0}", currentDate.ToString("yyyy-MM-dd HH:mm:ss")); | |
Console.WriteLine("Ratelimit Reset: {0}", ratelimitReset.ToString("yyyy-MM-dd HH:mm:ss")); | |
System.Threading.Thread.Sleep(10 * 1000); | |
} | |
else | |
{ | |
waiting = false; | |
} | |
} | |
} | |
private SearchCodeResult SendRequest(string url, out string nextUrl, out string usedUrl) | |
{ | |
IRestResponse<SearchCodeResult> response = null; | |
if (string.IsNullOrEmpty(url)) | |
{ | |
var userSearchTerm = codeSearchTermHelper.GetSearchTerm(input, inputPool); | |
var q = string.Format("{0}+in:file+extension:cs{1}", searchTerm, userSearchTerm); | |
var per_page = "100"; | |
var page = "1"; | |
response = client.Search(q, null, null, page, per_page); | |
} | |
else | |
{ | |
// Assume this is a subsequent call | |
response = client.Search(url); | |
} | |
if (response.StatusCode != System.Net.HttpStatusCode.OK) | |
{ | |
throw new RestException(BuildRequestError(response)); | |
} | |
usedUrl = response.ResponseUri.AbsoluteUri; | |
var entity = response.Data; | |
SetRateLimits(response.Headers); | |
nextUrl = responseHelper.GetLink(response.Headers); | |
return entity; | |
} | |
private string BuildRequestError(IRestResponse<SearchCodeResult> response) | |
{ | |
return JsonConvert.SerializeObject(new | |
{ | |
absoluteUri = response.ResponseUri.AbsoluteUri, | |
responseStatusCode = response.StatusCode.ToString(), | |
responseStatusDescription = response.StatusDescription, | |
responseContent = response.Content, | |
inputCount = input.Count, | |
inputPoolLast = string.Join(",", inputPool.Last()) | |
}); | |
} | |
private bool IsTotalCountWithinRange(int totalCount) | |
{ | |
if (totalCount <= 1000) | |
{ | |
return true; | |
} | |
else | |
{ | |
if (inputPool.Last().Count == 1) | |
{ | |
Console.WriteLine("User " + inputPool.Last()[0] + " exceeded search max, written to file users_exceeded_max.txt"); | |
using (StreamWriter sw = File.AppendText(usersExceededMaxFileName)) | |
{ | |
sw.WriteLine(inputPool.Last()[0]); | |
} | |
} | |
return false; | |
} | |
} | |
private void ProcessResult(SearchCodeResult result, string url) | |
{ | |
var output = ParseResult(result, url); | |
WriteOutput(output); | |
} | |
private string ParseResult(SearchCodeResult result, string url) | |
{ | |
if (result.Items == null) | |
{ | |
return string.Empty; | |
} | |
var output = new StringBuilder(); | |
output.AppendLine("^url|" + url + "^"); | |
foreach (var item in result.Items) | |
{ | |
var record = new CodeSearchOutput | |
{ | |
Name = item.Name, | |
Path = item.Path, | |
HtmlUrl = item.HtmlUrl, | |
RepositoryName = item.Repository.Name, | |
RepositoryFullName = item.Repository.FullName, | |
Owner = item.Repository.Owner.Login | |
}; | |
string json = JsonConvert.SerializeObject(record); | |
output.AppendLine(json); | |
} | |
return output.ToString(); | |
} | |
private void WriteOutput(string output) | |
{ | |
using (StreamWriter sw = File.AppendText(outputFileName)) | |
{ | |
sw.WriteLine(output); | |
} | |
Console.WriteLine("output written..."); | |
} | |
private void SetRateLimits(IList<Parameter> headers) | |
{ | |
var ratelimitString = responseHelper.GetHeader(headers, "X-Ratelimit-Limit"); | |
var ratelimitRemainingString = responseHelper.GetHeader(headers, "X-Ratelimit-Remaining"); | |
var ratelimitResetString = responseHelper.GetHeader(headers, "X-Ratelimit-Reset"); | |
int.TryParse(ratelimitString, out ratelimit); | |
int.TryParse(ratelimitRemainingString, out ratelimitRemaining); | |
if (ratelimitResetString == null) | |
{ | |
ratelimitReset = DateTime.Now.AddMinutes(1); | |
} | |
else | |
{ | |
ratelimitReset = responseHelper.DateTimeFromEpoch(ratelimitResetString); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class CodeSearchTermHelper | |
{ | |
public int MaxItemCount = 100; | |
private string searchPrefix = ""; | |
public CodeSearchTermHelper(string source) | |
{ | |
switch (source) | |
{ | |
case "user": | |
searchPrefix = "+user:"; | |
break; | |
case "repo": | |
searchPrefix = "+repo:"; | |
break; | |
default: | |
throw new InvalidOperationException("Invalid source"); | |
} | |
} | |
public void DiluteInputPool(List<List<string>> inputPool) | |
{ | |
var lastPartition = inputPool.Last(); | |
var newPartition = new List<string>(); | |
inputPool.Add(newPartition); | |
float removePercentage; | |
removePercentage = 0.5f; | |
var removeCount = (int)(lastPartition.Count * removePercentage); | |
for (var i = 0; i < removeCount; i++) | |
{ | |
newPartition.Add(lastPartition[0]); | |
lastPartition.RemoveAt(0); | |
} | |
} | |
public string GetSearchTerm(List<string> input, List<List<string>> inputPool) | |
{ | |
if (inputPool.Count == 0) | |
{ | |
return GetNewSearchTerm(input, inputPool); | |
} | |
else | |
{ | |
return GetRemainingSearchTerm(inputPool); | |
} | |
} | |
private string GetRemainingSearchTerm(List<List<string>> inputPool) | |
{ | |
var searchTerm = ""; | |
var partition = inputPool.Last(); | |
foreach (var item in partition) | |
{ | |
searchTerm += searchPrefix + item; | |
} | |
return searchTerm; | |
} | |
private string GetNewSearchTerm(List<string> input, List<List<string>> inputPool) | |
{ | |
var baseUrlLength = 100; | |
var maxUrlLength = 1500; | |
var maxLength = maxUrlLength - baseUrlLength; | |
var prefixLength = searchPrefix.Length; | |
var searchTerm = ""; | |
var partition = new List<string>(); | |
for (int i = input.Count - 1; i >= 0; i--) | |
{ | |
var newStringLength = searchTerm.Length + prefixLength + input[i].Length; | |
var newPartitionCount = partition.Count + 1; | |
if (newStringLength < maxLength && newPartitionCount < MaxItemCount) | |
{ | |
searchTerm += searchPrefix + input[i]; | |
partition.Add(input[i]); | |
input.RemoveAt(i); | |
} | |
else | |
{ | |
break; | |
} | |
} | |
inputPool.Add(partition); | |
return searchTerm; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class Item | |
{ | |
[JsonProperty("name")] | |
public string Name { get; set; } | |
[JsonProperty("path")] | |
public string Path { get; set; } | |
[JsonProperty("sha")] | |
public string Sha { get; set; } | |
[JsonProperty("url")] | |
public string Url { get; set; } | |
[JsonProperty("git_url")] | |
public string GitUrl { get; set; } | |
[JsonProperty("html_url")] | |
public string HtmlUrl { get; set; } | |
[JsonProperty("repository")] | |
public Repository Repository { get; set; } | |
[JsonProperty("score")] | |
public double Score { get; set; } | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class Owner | |
{ | |
[JsonProperty("login")] | |
public string Login { get; set; } | |
[JsonProperty("id")] | |
public int Id { get; set; } | |
[JsonProperty("avatar_url")] | |
public string AvatarUrl { get; set; } | |
[JsonProperty("gravatar_id")] | |
public string GravatarId { get; set; } | |
[JsonProperty("url")] | |
public string Url { get; set; } | |
[JsonProperty("html_url")] | |
public string HtmlUrl { get; set; } | |
[JsonProperty("followers_url")] | |
public string FollowersUrl { get; set; } | |
[JsonProperty("following_url")] | |
public string FollowingUrl { get; set; } | |
[JsonProperty("gists_url")] | |
public string GistsUrl { get; set; } | |
[JsonProperty("starred_url")] | |
public string StarredUrl { get; set; } | |
[JsonProperty("subscriptions_url")] | |
public string SubscriptionsUrl { get; set; } | |
[JsonProperty("organizations_url")] | |
public string OrganizationsUrl { get; set; } | |
[JsonProperty("repos_url")] | |
public string ReposUrl { get; set; } | |
[JsonProperty("events_url")] | |
public string EventsUrl { get; set; } | |
[JsonProperty("received_events_url")] | |
public string ReceivedEventsUrl { get; set; } | |
[JsonProperty("type")] | |
public string Type { get; set; } | |
[JsonProperty("site_admin")] | |
public bool SiteAdmin { get; set; } | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class Repository | |
{ | |
[JsonProperty("id")] | |
public int Id { get; set; } | |
[JsonProperty("name")] | |
public string Name { get; set; } | |
[JsonProperty("full_name")] | |
public string FullName { get; set; } | |
[JsonProperty("owner")] | |
public Owner Owner { get; set; } | |
[JsonProperty("private")] | |
public bool Private { get; set; } | |
[JsonProperty("html_url")] | |
public string HtmlUrl { get; set; } | |
[JsonProperty("description")] | |
public string Description { get; set; } | |
[JsonProperty("fork")] | |
public bool Fork { get; set; } | |
[JsonProperty("url")] | |
public string Url { get; set; } | |
[JsonProperty("forks_url")] | |
public string ForksUrl { get; set; } | |
[JsonProperty("keys_url")] | |
public string KeysUrl { get; set; } | |
[JsonProperty("collaborators_url")] | |
public string CollaboratorsUrl { get; set; } | |
[JsonProperty("teams_url")] | |
public string TeamsUrl { get; set; } | |
[JsonProperty("hooks_url")] | |
public string HooksUrl { get; set; } | |
[JsonProperty("issue_events_url")] | |
public string IssueEventsUrl { get; set; } | |
[JsonProperty("events_url")] | |
public string EventsUrl { get; set; } | |
[JsonProperty("assignees_url")] | |
public string AssigneesUrl { get; set; } | |
[JsonProperty("branches_url")] | |
public string BranchesUrl { get; set; } | |
[JsonProperty("tags_url")] | |
public string TagsUrl { get; set; } | |
[JsonProperty("blobs_url")] | |
public string BlobsUrl { get; set; } | |
[JsonProperty("git_tags_url")] | |
public string GitTagsUrl { get; set; } | |
[JsonProperty("git_refs_url")] | |
public string GitRefsUrl { get; set; } | |
[JsonProperty("trees_url")] | |
public string TreesUrl { get; set; } | |
[JsonProperty("statuses_url")] | |
public string StatusesUrl { get; set; } | |
[JsonProperty("languages_url")] | |
public string LanguagesUrl { get; set; } | |
[JsonProperty("stargazers_url")] | |
public string StargazersUrl { get; set; } | |
[JsonProperty("contributors_url")] | |
public string ContributorsUrl { get; set; } | |
[JsonProperty("subscribers_url")] | |
public string SubscribersUrl { get; set; } | |
[JsonProperty("subscription_url")] | |
public string SubscriptionUrl { get; set; } | |
[JsonProperty("commits_url")] | |
public string CommitsUrl { get; set; } | |
[JsonProperty("git_commits_url")] | |
public string GitCommitsUrl { get; set; } | |
[JsonProperty("comments_url")] | |
public string CommentsUrl { get; set; } | |
[JsonProperty("issue_comment_url")] | |
public string IssueCommentUrl { get; set; } | |
[JsonProperty("contents_url")] | |
public string ContentsUrl { get; set; } | |
[JsonProperty("compare_url")] | |
public string CompareUrl { get; set; } | |
[JsonProperty("merges_url")] | |
public string MergesUrl { get; set; } | |
[JsonProperty("archive_url")] | |
public string ArchiveUrl { get; set; } | |
[JsonProperty("downloads_url")] | |
public string DownloadsUrl { get; set; } | |
[JsonProperty("issues_url")] | |
public string IssuesUrl { get; set; } | |
[JsonProperty("pulls_url")] | |
public string PullsUrl { get; set; } | |
[JsonProperty("milestones_url")] | |
public string MilestonesUrl { get; set; } | |
[JsonProperty("notifications_url")] | |
public string NotificationsUrl { get; set; } | |
[JsonProperty("labels_url")] | |
public string LabelsUrl { get; set; } | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ResponseHelper | |
{ | |
public string GetHeader(IList<Parameter> headers, string name) | |
{ | |
var result = headers.FirstOrDefault(x => x.Name.ToUpper().Equals(name.ToUpper())); | |
if (result != null) | |
{ | |
return result.Value.ToString(); | |
} | |
return null; | |
} | |
public DateTime DateTimeFromEpoch(string seconds) | |
{ | |
var start = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(long.Parse(seconds)); | |
return start.ToLocalTime(); | |
} | |
public string GetLink(IList<Parameter> headers) | |
{ | |
Parameter linkData; | |
string link = null; | |
linkData = headers.FirstOrDefault(x => x.Name.ToUpper().Equals("LINK")); | |
if (linkData != null) | |
{ | |
link = ParseLink(linkData.Value.ToString()); | |
} | |
return link; | |
} | |
private string ParseLink(string data) | |
{ | |
string link = null; | |
var links = data.Split(','); | |
for (int i = 0, len = links.Length; i < len; i++) | |
{ | |
if (links[i].Contains("rel=\"next\"")) | |
{ | |
link = links[i].Split(';')[0].Split('>')[0].Split('<')[1]; | |
break; | |
} | |
} | |
return link; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This is the hack to get RestSharp to work. | |
// Maybe I was too stupid to figure out how to get this to | |
// work without the hack but oh well. | |
// RestSharp\RestSharp\Extensions\StringExtensions.cs | |
public static string UrlEncode(this string input) | |
{ | |
return input; // hack the planet... | |
//const int maxLength = 32766; | |
//... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class SearchCodeClient : BaseClient | |
{ | |
private string resource = "search/code"; | |
public IRestResponse<SearchCodeResult> Search(string url) | |
{ | |
if (string.IsNullOrWhiteSpace(url)) | |
{ | |
throw new ArgumentNullException("url"); | |
} | |
var request = new MyRestRequest(); | |
var result = Execute<SearchCodeResult>(request, url); | |
return result; | |
} | |
// This Requires a hacked version of RestSharp because RestSharp URL encodes the query string and causes GitHub to reject the "q" query string. | |
public IRestResponse<SearchCodeResult> Search(string q, string sort = null, string order = null, string page = null, string perPage = null) | |
{ | |
if (string.IsNullOrWhiteSpace(q)) | |
{ | |
throw new ArgumentNullException("q"); | |
} | |
var request = new MyRestRequest(resource); | |
request.AddQueryParameter("q", q); | |
if (!string.IsNullOrWhiteSpace(sort)) | |
{ | |
request.AddQueryParameter("sort", sort); | |
} | |
if (!string.IsNullOrWhiteSpace(order)) | |
{ | |
request.AddQueryParameter("order", order); | |
} | |
if (!string.IsNullOrWhiteSpace(page)) | |
{ | |
request.AddQueryParameter("page", page); | |
} | |
if (!string.IsNullOrWhiteSpace(perPage)) | |
{ | |
request.AddQueryParameter("per_page", perPage); | |
} | |
var result = Execute<SearchCodeResult>(request); | |
return result; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class SearchCodeResult | |
{ | |
[JsonProperty("total_count")] | |
public int TotalCount { get; set; } | |
[JsonProperty("incomplete_results")] | |
public bool IncompleteResults { get; set; } | |
[JsonProperty("items")] | |
public Item[] Items { get; set; } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment