Skip to content

Instantly share code, notes, and snippets.

@MNF
Created December 2, 2017 06:57
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save MNF/4cd1fd31d6f49e5fe49530b683985d14 to your computer and use it in GitHub Desktop.
Save MNF/4cd1fd31d6f49e5fe49530b683985d14 to your computer and use it in GitHub Desktop.
Import Apple Reviews
using System;
using System.Collections.Generic;
using System.Configuration;
using System.Data;
using System.Diagnostics;
using System.Diagnostics.Contracts;
using System.Globalization;
using System.Linq;
using System.Linq.Expressions;
using System.Threading.Tasks;
using System.Xml.Linq;
using AutoMapper;
using Flurl;
using Flurl.Http;
using Flurl.Http.Xml;
using Microsoft.SDC.Common;
using Microsoft.SDC.HtmlAgilityPack;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
namespace WebJobs.ITunesReviews
{
public class ImportAppleReviews
{
/// <summary>
///
/// </summary>
/// <param name="countryCode"></param>
/// <param name="applicationIdKvp"></param>
/// <param name="useRss">if true, only latest will be loaded </param>
/// <returns></returns>
public List<AppReview> GetReviews(string countryCode, KeyValuePair<string, string> applicationIdKvp, bool useRss)
{
List<AppReview> reviews = new List<AppReview>();
if (useRss)
{
//json doesn't have Updated field, so use XML
string url = $"https://itunes.apple.com/{countryCode}/rss/customerreviews/id={applicationIdKvp.Value}/sortBy=mostRecent/xml";
var doc = GetXDocument(url);
_backupSentimentsToBlob.SaveToBlobStorage($"AppleITunesReviews_{countryCode}_{applicationIdKvp.Value}", "xml", doc.ToString());
reviews = XDocumentToAppReviews(doc);
}
else //try WebObjects/MZStore.woa from https://github.com/grych/AppStoreReviews/blob/master/AppStoreReviews.py
{
for (int i = 0; i < 999; i++)
{
var res = GetReviewsFromWebObjects(countryCode, applicationIdKvp.Value, i);
var doc = res.Result;
_backupSentimentsToBlob.SaveToBlobStorage($"AppleWebObjectsReviews_{countryCode}_{applicationIdKvp.Value}_{i}", "xml", doc.ToString());
IEnumerable<AppReview> newReviews = WebObjectXDocumentToAppReviews(doc);
if (newReviews.Any())
{
reviews.AddRange(newReviews);
}
else break;
//StreamHelper.SaveStringToFile(str, "OutputPage" + i.ToString() + ".xml");
}
}
return reviews;
}
internal static IEnumerable<AppReview> WebObjectXDocumentToAppReviews(XDocument doc)
{
//XNamespace ns = doc.Root.Name.Namespace;
//<VBoxView leftInset="10" rightInset="0" stretchiness="1" /> //not sure how fragile it is /consider other checks
var entries = doc.DescendantsAnyNS("VBoxView").Where(v=>(v.AttributeAnyNS("leftInset")?.Value=="10")&&(v.AttributeAnyNS("rightInset")?.Value == "0") && (v.AttributeAnyNS("stretchiness")?.Value == "1"));
var reviews = entries.Select(WebObjectCreateAppReview);
return reviews.Where(r => r != null).ToList();
}
private static AppReview WebObjectCreateAppReview(XElement vboxView)
{
try
{
//example of xml in C:\GitRepos\AnalyticsScripts\SentimentAnalysis\WebJobs.ITunesReviews\MockedData\WebObjectsReviews.xml
var titleText = vboxView.ElementsSameNS("HBoxView").First().ElementSameNS("TextView").ElementSameNS("SetFontStyle").Value;
titleText=HtmlAgilityPackHelper.StripHtmlTags(titleText);
if (titleText==null )
return null;
// <HBoxView topInset="1" alt="5 stars">
var ratingText = vboxView.ElementsSameNS("HBoxView").First().ElementSameNS("HBoxView").ElementsSameNS("HBoxView").First().AttributeAnyNS("alt").Value;
var rating=ratingText.ExtractAndParse<int>();
var sourceCustomerResponseId = vboxView.ElementsSameNS("HBoxView").First().ElementsSameNS("HBoxView").ElementsSameNS("HBoxView").ToList()[1].ElementsSameNS("VBoxView").First().ElementSameNS("GotoURL").AttributeAnyNS("url").Value.RightAfter("=");
/*
< SetFontStyle normalStyle = "textColor" >
by
< GotoURL target = "main" inhibitDragging = "false" url = "https://itunes.apple.com/WebObjects/MZStore.woa/wa/viewUsersUserReviews?userProfileId=427034608" >
< b >
Tay-lee-a
</ b >
</ GotoURL >
-
Version 4.2.3
-
01 August 2016
</ SetFontStyle >
*/
var urlAndAuthorElement = vboxView.ElementsSameNS("HBoxView").ToList()[1].ElementAnyNS("TextView").ElementAnyNS("SetFontStyle");
var verAndDateText = urlAndAuthorElement.Value;
var arrayVerAndDate=verAndDateText.Split('-');
var ver = arrayVerAndDate.SecondLast().Replace("Version","",StringComparison.InvariantCultureIgnoreCase).Trim();
var date = arrayVerAndDate.Last().Trim();
var creationDate = DateTime.Parse(date);
var goToUrlElement = urlAndAuthorElement.ElementAnyNS("GotoURL");
var customerName = HtmlAgilityPackHelper.StripHtmlTags(goToUrlElement.Value).Trim();
var contentText = vboxView.ElementAnyNS("TextView").ElementAnyNS("SetFontStyle").Value;
return new AppReview(titleText, contentText)
{
SourceUniqueId = sourceCustomerResponseId,
CreationDate = creationDate,
Rating = rating,
Version = ver,
CustomerName = customerName
};
}
catch (Exception exc)
{
var msg = "An error has occured when processing XElement ";
WebjobsLoggingHelper.LogError(msg, exc, vboxView.ToString());
return null;
}
}
public static async Task<XDocument> GetReviewsFromWebObjects(string countryCode, string appId, int pageNo)
{
//from https://github.com/grych/AppStoreReviews/blob/master/AppStoreReviews.py
var userAgent = "iTunes/9.2 (Macintosh; U; Mac OS X 10.6)";
// $country = "\nCOUNTRY: Australia";$store = 143460;
var front = GetStoreId(countryCode); //"%d-1" % appStoreId NOT -1
var headers = new Dictionary<string, string>()
{
{"X-Apple-Store-Front", front},
{"User-Agent", userAgent}
};
var url = String.Format(
"http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZStore.woa/wa/viewContentsUserReviews?id={0}&pageNumber={1}&sortOrdering=4&onlyLatestVersion=false&type=Purple+Software",
appId, pageNo);
var flurl=new FlurlClient(url);
foreach (var dictEntry in headers)
{
flurl.WithHeader(dictEntry.Key, dictEntry.Value);
}
var res = await flurl.GetXDocumentAsync();
return res;
}
private static XDocument GetXDocument(string url)
{
var result = url.GetXDocumentAsync();
var doc = result.Result;
Debug.WriteLine("From url " + url + " Response is " + doc);
return doc;
}
public static List<AppReview> XDocumentToAppReviews(XDocument doc)
{
XNamespace ns = doc.Root.Name.Namespace;
var entries = doc.Descendants(ns+"entry");
var reviews = entries.Select(CreateAppReview);
return reviews.Where(r=>r!=null).ToList();
}
private static AppReview CreateAppReview(XElement e)
{
var contentText = e.ElementsSameNS("content").FirstOrDefault(cont => cont.Attribute("type")?.Value=="text")?.Value;
if (contentText == null)
return null;
return new AppReview(e.ElementSameNS("title")?.Value, contentText)
{
SourceUniqueId = e.ElementSameNS("id")?.Value,
CreationDate = DateTime.Parse(e.ElementSameNS("updated")?.Value),
Rating = StructExtensions.TryParse(e.ElementAnyNS("rating")?.Value,0),
Version = e.ElementAnyNS("version")?.Value,
CustomerName = e.ElementSameNS("author")?.ElementSameNS("name")?.Value
};
}
private static string GetStoreId(string countryCode)
{
//from https://github.com/grych/AppStoreReviews/blob/master/AppStoreReviews.py
//TODO: ADD other countries if need
string storeId = "";
switch (countryCode)
{
case "":
storeId = "143441";//USA?
break;
case "AU":
storeId = "143460";
break;
case "NZ":
storeId = "143461";
break;
case "SG":
storeId = "143464";
break;
case "HK":
storeId = "143463";
break;
}
return storeId;
}
}
}
@MNF
Copy link
Author

MNF commented Dec 2, 2017

The class shows how to read Apple Reviews using 2 methods- latest using RSS and historical using WebObjects.
The class is extracted fro the proprietary application and may have missing references(and as such will not be compilable).
Also the application was created in 2016 and is not actively maintained since than, so branch using WebObjects may be broken.

@RikScheffer
Copy link

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment