Skip to content

Instantly share code, notes, and snippets.

@jholland918
Created June 26, 2015 18:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jholland918/4441a1c548e3b914a622 to your computer and use it in GitHub Desktop.
Save jholland918/4441a1c548e3b914a622 to your computer and use it in GitHub Desktop.
Quick and Dirty GitHub Repo Code Search
public class BaseClient
{
private Uri BaseUrl = new Uri("https://api.github.com/");
readonly string username = "*****";
readonly string password = "*****";
readonly string userAgent = "*****";
public IRestResponse<T> Execute<T>(MyRestRequest request, string clientUrl) where T : new()
{
BaseUrl = new Uri(clientUrl);
return Execute<T>(request);
}
public IRestResponse<T> Execute<T>(MyRestRequest request) where T : new()
{
var client = new MyRestClient();
client.BaseUrl = BaseUrl;
client.Authenticator = new HttpBasicAuthenticator(username, password);
request.AddHeader("User-Agent", userAgent); // used on every request
var response = client.Execute<T>(request);
if (response.ErrorException != null)
{
const string message = "Error retrieving response. Check inner details for more info.";
throw new Exception(message, response.ErrorException);
}
return response;
}
}
public class CodeSearchByRepo
{
private string searchTerm;
private string inputFileName;
private string outputFileName;
private string usersExceededMaxFileName;
private string exceptionFileName;
private string restExceptionFileName;
private List<string> input;
private List<List<string>> inputPool;
private int callCount = 0;
private int ratelimit = 0;
private int ratelimitRemaining = 0;
private DateTime ratelimitReset = new DateTime();
private ResponseHelper responseHelper;
private SearchCodeClient client;
private CodeSearchTermHelper codeSearchTermHelper;
public CodeSearchByRepo(string inputFileName, string searchTerm)
{
this.inputFileName = inputFileName;
this.searchTerm = searchTerm;
this.input = new List<string>();
this.inputPool = new List<List<string>>();
this.responseHelper = new ResponseHelper();
this.client = new SearchCodeClient();
this.codeSearchTermHelper = new CodeSearchTermHelper("repo");
this.outputFileName = FileNameHelper.Create(this, "output_" + searchTerm + ".txt");
this.usersExceededMaxFileName = FileNameHelper.Create(this, "usersExceededMax.txt");
this.exceptionFileName = FileNameHelper.Create(this, "exception.txt");
this.restExceptionFileName = FileNameHelper.Create(this, "restException.txt");
}
public void Run()
{
ratelimitRemaining = 30;
ParseInputFile();
string url = null;
string nextUrl = null;
string usedUrl = null;
while (input.Count > 0 || inputPool.Count > 0)
{
callCount++;
Console.WriteLine("----------------------------------------");
Console.WriteLine("Ratelimit Remaining: {0} Call Count: {1}", ratelimitRemaining, callCount);
Console.WriteLine("----------------------------------------");
Throttle();
nextUrl = null;
usedUrl = null;
SearchCodeResult result = null;
try
{
result = SendRequest(url, out nextUrl, out usedUrl);
}
catch (RestException ex)
{
HandleRestException(ex, ref url, ref nextUrl, ref usedUrl);
continue;
}
catch (Exception ex)
{
HandleException(ex, ref url, ref nextUrl, ref usedUrl);
continue;
}
Console.WriteLine("Input Count : {0}", input.Count);
Console.WriteLine("InputPool Count: {0}", inputPool.Count);
Console.WriteLine("Result Count : {0}", result.TotalCount);
if (
IsTotalCountWithinRange(result.TotalCount)
|| inputPool.Last().Count == 1 /* We can't dilute the pool anymore so we go ahead and process as much as we can */
)
{
ProcessResult(result, usedUrl);
url = nextUrl; // The nextUrl should be null when the search results have peen iterated to completion
bool finishedPagingResults = string.IsNullOrWhiteSpace(nextUrl);
if (finishedPagingResults)
{
if (inputPool.Count > 0)
{
inputPool.RemoveAt(inputPool.Count - 1);
}
}
}
else
{
codeSearchTermHelper.DiluteInputPool(inputPool);
url = null;
}
}
}
private void ParseInputFile()
{
var lines = File.ReadAllLines(inputFileName).ToList<string>();
foreach (var line in lines)
{
if (string.IsNullOrWhiteSpace(line))
{
continue;
}
if (line[0] == '^')
{
continue;
}
string repoName;
// Use if input file contains json strings
//repoName = JsonConvert.DeserializeObject<UserRepoOutput>(line).FullName;
// Use if the input file is plain old repo names
repoName = line;
input.Add(repoName);
}
}
private void HandleRestException(Exception ex, ref string url, ref string nextUrl, ref string usedUrl)
{
Console.WriteLine("RestException caught!");
url = null;
nextUrl = null;
usedUrl = null;
ratelimitRemaining = ratelimitRemaining - 1;
if (ratelimitRemaining < 0)
{
ratelimitRemaining = 0;
}
codeSearchTermHelper.DiluteInputPool(inputPool);
using (StreamWriter sw = File.AppendText(restExceptionFileName))
{
sw.WriteLine(ex.Message);
}
}
private void HandleException(Exception ex, ref string url, ref string nextUrl, ref string usedUrl)
{
Console.WriteLine("Exception caught!");
url = null;
nextUrl = null;
usedUrl = null;
ratelimitRemaining = ratelimitRemaining - 1;
if (ratelimitRemaining < 0)
{
ratelimitRemaining = 0;
}
if (inputPool.Count > 0)
{
inputPool.RemoveAt(inputPool.Count - 1);
}
using (StreamWriter sw = File.AppendText(exceptionFileName))
{
sw.WriteLine(ex.Message);
}
}
private void Throttle()
{
var canMakeRequest = (ratelimitRemaining > 0);
bool waiting = true;
while (waiting)
{
var currentDate = DateTime.Now;
if (!canMakeRequest && ratelimitReset > currentDate)
{
Console.WriteLine("***** Waiting for reset date *****");
Console.WriteLine("Current Date : {0}", currentDate.ToString("yyyy-MM-dd HH:mm:ss"));
Console.WriteLine("Ratelimit Reset: {0}", ratelimitReset.ToString("yyyy-MM-dd HH:mm:ss"));
System.Threading.Thread.Sleep(10 * 1000);
}
else
{
waiting = false;
}
}
}
private SearchCodeResult SendRequest(string url, out string nextUrl, out string usedUrl)
{
IRestResponse<SearchCodeResult> response = null;
if (string.IsNullOrEmpty(url))
{
var userSearchTerm = codeSearchTermHelper.GetSearchTerm(input, inputPool);
var q = string.Format("{0}+in:file+extension:cs{1}", searchTerm, userSearchTerm);
var per_page = "100";
var page = "1";
response = client.Search(q, null, null, page, per_page);
}
else
{
// Assume this is a subsequent call
response = client.Search(url);
}
if (response.StatusCode != System.Net.HttpStatusCode.OK)
{
throw new RestException(BuildRequestError(response));
}
usedUrl = response.ResponseUri.AbsoluteUri;
var entity = response.Data;
SetRateLimits(response.Headers);
nextUrl = responseHelper.GetLink(response.Headers);
return entity;
}
private string BuildRequestError(IRestResponse<SearchCodeResult> response)
{
return JsonConvert.SerializeObject(new
{
absoluteUri = response.ResponseUri.AbsoluteUri,
responseStatusCode = response.StatusCode.ToString(),
responseStatusDescription = response.StatusDescription,
responseContent = response.Content,
inputCount = input.Count,
inputPoolLast = string.Join(",", inputPool.Last())
});
}
private bool IsTotalCountWithinRange(int totalCount)
{
if (totalCount <= 1000)
{
return true;
}
else
{
if (inputPool.Last().Count == 1)
{
Console.WriteLine("User " + inputPool.Last()[0] + " exceeded search max, written to file users_exceeded_max.txt");
using (StreamWriter sw = File.AppendText(usersExceededMaxFileName))
{
sw.WriteLine(inputPool.Last()[0]);
}
}
return false;
}
}
private void ProcessResult(SearchCodeResult result, string url)
{
var output = ParseResult(result, url);
WriteOutput(output);
}
private string ParseResult(SearchCodeResult result, string url)
{
if (result.Items == null)
{
return string.Empty;
}
var output = new StringBuilder();
output.AppendLine("^url|" + url + "^");
foreach (var item in result.Items)
{
var record = new CodeSearchOutput
{
Name = item.Name,
Path = item.Path,
HtmlUrl = item.HtmlUrl,
RepositoryName = item.Repository.Name,
RepositoryFullName = item.Repository.FullName,
Owner = item.Repository.Owner.Login
};
string json = JsonConvert.SerializeObject(record);
output.AppendLine(json);
}
return output.ToString();
}
private void WriteOutput(string output)
{
using (StreamWriter sw = File.AppendText(outputFileName))
{
sw.WriteLine(output);
}
Console.WriteLine("output written...");
}
private void SetRateLimits(IList<Parameter> headers)
{
var ratelimitString = responseHelper.GetHeader(headers, "X-Ratelimit-Limit");
var ratelimitRemainingString = responseHelper.GetHeader(headers, "X-Ratelimit-Remaining");
var ratelimitResetString = responseHelper.GetHeader(headers, "X-Ratelimit-Reset");
int.TryParse(ratelimitString, out ratelimit);
int.TryParse(ratelimitRemainingString, out ratelimitRemaining);
if (ratelimitResetString == null)
{
ratelimitReset = DateTime.Now.AddMinutes(1);
}
else
{
ratelimitReset = responseHelper.DateTimeFromEpoch(ratelimitResetString);
}
}
}
public class CodeSearchTermHelper
{
public int MaxItemCount = 100;
private string searchPrefix = "";
public CodeSearchTermHelper(string source)
{
switch (source)
{
case "user":
searchPrefix = "+user:";
break;
case "repo":
searchPrefix = "+repo:";
break;
default:
throw new InvalidOperationException("Invalid source");
}
}
public void DiluteInputPool(List<List<string>> inputPool)
{
var lastPartition = inputPool.Last();
var newPartition = new List<string>();
inputPool.Add(newPartition);
float removePercentage;
removePercentage = 0.5f;
var removeCount = (int)(lastPartition.Count * removePercentage);
for (var i = 0; i < removeCount; i++)
{
newPartition.Add(lastPartition[0]);
lastPartition.RemoveAt(0);
}
}
public string GetSearchTerm(List<string> input, List<List<string>> inputPool)
{
if (inputPool.Count == 0)
{
return GetNewSearchTerm(input, inputPool);
}
else
{
return GetRemainingSearchTerm(inputPool);
}
}
private string GetRemainingSearchTerm(List<List<string>> inputPool)
{
var searchTerm = "";
var partition = inputPool.Last();
foreach (var item in partition)
{
searchTerm += searchPrefix + item;
}
return searchTerm;
}
private string GetNewSearchTerm(List<string> input, List<List<string>> inputPool)
{
var baseUrlLength = 100;
var maxUrlLength = 1500;
var maxLength = maxUrlLength - baseUrlLength;
var prefixLength = searchPrefix.Length;
var searchTerm = "";
var partition = new List<string>();
for (int i = input.Count - 1; i >= 0; i--)
{
var newStringLength = searchTerm.Length + prefixLength + input[i].Length;
var newPartitionCount = partition.Count + 1;
if (newStringLength < maxLength && newPartitionCount < MaxItemCount)
{
searchTerm += searchPrefix + input[i];
partition.Add(input[i]);
input.RemoveAt(i);
}
else
{
break;
}
}
inputPool.Add(partition);
return searchTerm;
}
}
public class Item
{
[JsonProperty("name")]
public string Name { get; set; }
[JsonProperty("path")]
public string Path { get; set; }
[JsonProperty("sha")]
public string Sha { get; set; }
[JsonProperty("url")]
public string Url { get; set; }
[JsonProperty("git_url")]
public string GitUrl { get; set; }
[JsonProperty("html_url")]
public string HtmlUrl { get; set; }
[JsonProperty("repository")]
public Repository Repository { get; set; }
[JsonProperty("score")]
public double Score { get; set; }
}
public class Owner
{
[JsonProperty("login")]
public string Login { get; set; }
[JsonProperty("id")]
public int Id { get; set; }
[JsonProperty("avatar_url")]
public string AvatarUrl { get; set; }
[JsonProperty("gravatar_id")]
public string GravatarId { get; set; }
[JsonProperty("url")]
public string Url { get; set; }
[JsonProperty("html_url")]
public string HtmlUrl { get; set; }
[JsonProperty("followers_url")]
public string FollowersUrl { get; set; }
[JsonProperty("following_url")]
public string FollowingUrl { get; set; }
[JsonProperty("gists_url")]
public string GistsUrl { get; set; }
[JsonProperty("starred_url")]
public string StarredUrl { get; set; }
[JsonProperty("subscriptions_url")]
public string SubscriptionsUrl { get; set; }
[JsonProperty("organizations_url")]
public string OrganizationsUrl { get; set; }
[JsonProperty("repos_url")]
public string ReposUrl { get; set; }
[JsonProperty("events_url")]
public string EventsUrl { get; set; }
[JsonProperty("received_events_url")]
public string ReceivedEventsUrl { get; set; }
[JsonProperty("type")]
public string Type { get; set; }
[JsonProperty("site_admin")]
public bool SiteAdmin { get; set; }
}
public class Repository
{
[JsonProperty("id")]
public int Id { get; set; }
[JsonProperty("name")]
public string Name { get; set; }
[JsonProperty("full_name")]
public string FullName { get; set; }
[JsonProperty("owner")]
public Owner Owner { get; set; }
[JsonProperty("private")]
public bool Private { get; set; }
[JsonProperty("html_url")]
public string HtmlUrl { get; set; }
[JsonProperty("description")]
public string Description { get; set; }
[JsonProperty("fork")]
public bool Fork { get; set; }
[JsonProperty("url")]
public string Url { get; set; }
[JsonProperty("forks_url")]
public string ForksUrl { get; set; }
[JsonProperty("keys_url")]
public string KeysUrl { get; set; }
[JsonProperty("collaborators_url")]
public string CollaboratorsUrl { get; set; }
[JsonProperty("teams_url")]
public string TeamsUrl { get; set; }
[JsonProperty("hooks_url")]
public string HooksUrl { get; set; }
[JsonProperty("issue_events_url")]
public string IssueEventsUrl { get; set; }
[JsonProperty("events_url")]
public string EventsUrl { get; set; }
[JsonProperty("assignees_url")]
public string AssigneesUrl { get; set; }
[JsonProperty("branches_url")]
public string BranchesUrl { get; set; }
[JsonProperty("tags_url")]
public string TagsUrl { get; set; }
[JsonProperty("blobs_url")]
public string BlobsUrl { get; set; }
[JsonProperty("git_tags_url")]
public string GitTagsUrl { get; set; }
[JsonProperty("git_refs_url")]
public string GitRefsUrl { get; set; }
[JsonProperty("trees_url")]
public string TreesUrl { get; set; }
[JsonProperty("statuses_url")]
public string StatusesUrl { get; set; }
[JsonProperty("languages_url")]
public string LanguagesUrl { get; set; }
[JsonProperty("stargazers_url")]
public string StargazersUrl { get; set; }
[JsonProperty("contributors_url")]
public string ContributorsUrl { get; set; }
[JsonProperty("subscribers_url")]
public string SubscribersUrl { get; set; }
[JsonProperty("subscription_url")]
public string SubscriptionUrl { get; set; }
[JsonProperty("commits_url")]
public string CommitsUrl { get; set; }
[JsonProperty("git_commits_url")]
public string GitCommitsUrl { get; set; }
[JsonProperty("comments_url")]
public string CommentsUrl { get; set; }
[JsonProperty("issue_comment_url")]
public string IssueCommentUrl { get; set; }
[JsonProperty("contents_url")]
public string ContentsUrl { get; set; }
[JsonProperty("compare_url")]
public string CompareUrl { get; set; }
[JsonProperty("merges_url")]
public string MergesUrl { get; set; }
[JsonProperty("archive_url")]
public string ArchiveUrl { get; set; }
[JsonProperty("downloads_url")]
public string DownloadsUrl { get; set; }
[JsonProperty("issues_url")]
public string IssuesUrl { get; set; }
[JsonProperty("pulls_url")]
public string PullsUrl { get; set; }
[JsonProperty("milestones_url")]
public string MilestonesUrl { get; set; }
[JsonProperty("notifications_url")]
public string NotificationsUrl { get; set; }
[JsonProperty("labels_url")]
public string LabelsUrl { get; set; }
}
class ResponseHelper
{
public string GetHeader(IList<Parameter> headers, string name)
{
var result = headers.FirstOrDefault(x => x.Name.ToUpper().Equals(name.ToUpper()));
if (result != null)
{
return result.Value.ToString();
}
return null;
}
public DateTime DateTimeFromEpoch(string seconds)
{
var start = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(long.Parse(seconds));
return start.ToLocalTime();
}
public string GetLink(IList<Parameter> headers)
{
Parameter linkData;
string link = null;
linkData = headers.FirstOrDefault(x => x.Name.ToUpper().Equals("LINK"));
if (linkData != null)
{
link = ParseLink(linkData.Value.ToString());
}
return link;
}
private string ParseLink(string data)
{
string link = null;
var links = data.Split(',');
for (int i = 0, len = links.Length; i < len; i++)
{
if (links[i].Contains("rel=\"next\""))
{
link = links[i].Split(';')[0].Split('>')[0].Split('<')[1];
break;
}
}
return link;
}
}
// This is the hack to get RestSharp to work.
// Maybe I was too stupid to figure out how to get this to
// work without the hack but oh well.
// RestSharp\RestSharp\Extensions\StringExtensions.cs
public static string UrlEncode(this string input)
{
return input; // hack the planet...
//const int maxLength = 32766;
//...
class SearchCodeClient : BaseClient
{
private string resource = "search/code";
public IRestResponse<SearchCodeResult> Search(string url)
{
if (string.IsNullOrWhiteSpace(url))
{
throw new ArgumentNullException("url");
}
var request = new MyRestRequest();
var result = Execute<SearchCodeResult>(request, url);
return result;
}
// This Requires a hacked version of RestSharp because RestSharp URL encodes the query string and causes GitHub to reject the "q" query string.
public IRestResponse<SearchCodeResult> Search(string q, string sort = null, string order = null, string page = null, string perPage = null)
{
if (string.IsNullOrWhiteSpace(q))
{
throw new ArgumentNullException("q");
}
var request = new MyRestRequest(resource);
request.AddQueryParameter("q", q);
if (!string.IsNullOrWhiteSpace(sort))
{
request.AddQueryParameter("sort", sort);
}
if (!string.IsNullOrWhiteSpace(order))
{
request.AddQueryParameter("order", order);
}
if (!string.IsNullOrWhiteSpace(page))
{
request.AddQueryParameter("page", page);
}
if (!string.IsNullOrWhiteSpace(perPage))
{
request.AddQueryParameter("per_page", perPage);
}
var result = Execute<SearchCodeResult>(request);
return result;
}
}
public class SearchCodeResult
{
[JsonProperty("total_count")]
public int TotalCount { get; set; }
[JsonProperty("incomplete_results")]
public bool IncompleteResults { get; set; }
[JsonProperty("items")]
public Item[] Items { get; set; }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment