Skip to content

Instantly share code, notes, and snippets.

@NotAdam
Created December 25, 2019 09:07
Show Gist options
  • Save NotAdam/bbbe78080af35ac86b1f632325391d40 to your computer and use it in GitHub Desktop.
Save NotAdam/bbbe78080af35ac86b1f632325391d40 to your computer and use it in GitHub Desktop.
parse yotpo review garbage
public class ReviewInfo
{
public ReviewInfo()
{
Questions = new Dictionary<string, string>();
}
public string Name { get; set; }
public string Date { get; set; }
public int Stars { get; set; }
public string Title { get; set; }
public string Description { get; set; }
public string ImageURL { get; set; }
public int Upboats { get; set; }
public int Downdoots { get; set; }
public Dictionary<string, string> Questions {get; set;}
}
static HttpClient client = new HttpClient();
public List<ReviewInfo> ProcessPage(HtmlDocument penis)
{
var items = new List<ReviewInfo>();
var regex = new Regex(@"\d+");
Func<string, int> getInt = (string str) =>
{
var match = regex.Match(str);
if (match.Success)
{
return int.Parse(match.Value);
}
return 0;
};
// get reviews
var reviews = penis.DocumentNode.SelectNodes("//div[contains(@class, 'yotpo-review')]");
foreach (var review in reviews)
{
var author = review.SelectSingleNode(".//span[contains(@class, 'yotpo-user-name')]")?.InnerText.Trim();
var title = review.SelectSingleNode(".//div[contains(@class, 'content-title')]")?.InnerText.Trim();
var content = review.SelectSingleNode(".//div[contains(@class, 'content-review')]")?.InnerText.Trim();
var stars = review.SelectSingleNode(".//span[contains(@class, 'yotpo-review-stars')]/span[contains(@class, 'sr-only')]")?.InnerText.Trim();
var date = review.SelectSingleNode(".//span[contains(@class, 'yotpo-review-date')]")?.InnerText.Trim();
var updoots = review.SelectSingleNode(".//span[contains(@class, 'vote-sum')][@data-type='up']")?.InnerText.Trim();
var downboats = review.SelectSingleNode(".//span[contains(@class, 'vote-sum')][@data-type='down']")?.InnerText.Trim();
if (string.IsNullOrEmpty(author))
{
continue;
}
var ri = new ReviewInfo
{
Name = author,
Title = title,
Description = content,
Stars = getInt(stars),
Upboats = getInt(updoots),
Downdoots = getInt(downboats),
Date = date
};
var img = review.SelectSingleNode(".//img[contains(@class, 'image-review media-review')]");
if (img != null)
{
// fuckin
ri.ImageURL = "https:" + img.Attributes["data-original-src"].Value.ToString();
}
var questions = review.SelectNodes(".//div[@class=\"yotpo-question-field\"]");
if (questions != null)
{
foreach (var question in questions)
{
var q = question.SelectSingleNode(".//div[@class=\"yotpo-question-field-description\"]").InnerText.Trim().Replace(":", "");
var ans = question.SelectSingleNode(".//div[@class=\"yotpo-question-field-answer\"]").InnerText.Trim();
ri.Questions.Add(q, ans);
}
}
items.Add(ri);
}
return items;
}
public async Task<string> GetPage(string productId, int page)
{
var shit = new Dictionary<string, string>
{
{ "methods", @"[{'method':'reviews','params':{'pid':'FUCK','order_metadata_fields':{},'index':0,'data_source':'default','page':SHIT,'host-widget':'main_widget','is_mobile':false,'pictures_per_review':10}}]".Replace("FUCK", productId).Replace("SHIT", page.ToString()).Replace("'", "\"") },
{ "app_key", "12NaMZHdGcfHqPsBaDdcGMthwheuD4jzEUCrYzeV" },
{ "is_mobile", "false" },
{ "widget_version", "2019-12-23_14-03-36" }
};
var res = await client.PostAsync("https://staticw2.yotpo.com/batch", new FormUrlEncodedContent(shit));
return await res.Content.ReadAsStringAsync();
}
public async Task<List<ReviewInfo>> GetReviews(string productId)
{
var fugg = await GetPage(productId, 1);
var obj = Newtonsoft.Json.Linq.JArray.Parse(fugg)[0]["result"];
var penis = new HtmlDocument();
penis.LoadHtml(obj.ToString());
// find page count
var nodes = penis.DocumentNode.SelectNodes("//a[contains(@class, 'yotpo-page-element')]");
// get 2nd last elem cause last is empty????
var lastNode = nodes[nodes.Count - 2];
var pageCount = 1;
if (lastNode != null)
{
var raw = lastNode.InnerText.Trim();
pageCount = int.Parse(raw);
}
var reviews = new List<ReviewInfo>();
// process page we already have
reviews.AddRange(ProcessPage(penis));
var taskStore = new List<Task<string>>();
for (int i = 2; i <= pageCount; i++)
{
taskStore.Add(GetPage(productId, i));
}
var res = await Task.WhenAll(taskStore).ConfigureAwait(false);
taskStore.ForEach(s =>
{
var doc = new HtmlDocument();
var obj = Newtonsoft.Json.Linq.JArray.Parse(s.Result)[0]["result"];
doc.LoadHtml(obj.ToString());
var parsed = ProcessPage(doc);
reviews.AddRange(parsed);
});
return reviews;
}
async void Main()
{
var reviews = await GetReviews("701-04410");
reviews.Dump();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment