Skip to content

Instantly share code, notes, and snippets.

@gsalaz98
Created November 20, 2019 21:59
Show Gist options
  • Save gsalaz98/b87992cd5a5a214d01c63dbbefbbf12c to your computer and use it in GitHub Desktop.
Save gsalaz98/b87992cd5a5a214d01c63dbbefbbf12c to your computer and use it in GitHub Desktop.
/// <summary>
/// Creates an instance of <see cref="BenzingaNews"/> from RSS data (historical).
/// An example schema of the data (in JSON format, not XML) is provided below.
/// </summary>
/// <param name="contents">Raw contents of the XML file</param>
/// <param name="mapFileProvider">Map file provider</param>
/// <param name="mapFileResolver">Map file resolver</param>
/// <returns>BenzingaNews instance</returns>
/// <example>
/// {
/// "?xml": {
/// "@version": "1.0",
/// "@encoding": "utf-8"
/// },
/// "rss": {
/// "@version": "2.0",
/// "@xml:base": "http://example/"
/// "@xmlns:dc": "http://example/"
/// "@xmlns:bz": "http://exmaple/"
/// "channel": {
/// "title": string,
/// "link": string,
/// "description": string,
/// "language": string,
/// "item": {
/// "title": string,
/// "link": "http://example/"
/// "description": string,
/// "pubDate": DateTime (Mon, 1 Mar 19 00:00:00 -0400)
/// "dc:creator": string,
/// "guid": {
/// "@isPermaLink": string,
/// "#text": string,
/// },
/// "category": [
/// {
/// "@domain": "publisher",
/// "#text": string,
/// },
/// {
/// "@domain": "http://example/"
/// "#text": string
/// },
/// ...
/// ],
/// "bz:id": string (parsable as int),
/// "bz:revisionid": string (parsable as int),
/// "bz:revisiondate": DateTime (Mon, 1 Mar 19 00:00:00 -0400),
/// "bz:type": {
/// "@bz": string (parsable as int -> bool),
/// "@pro": string (parsable as int -> bool),
/// "@firstrun": string (parsable as int -> bool),
/// "#text": string
/// },
/// "bz:ticker": [
/// {
/// "@primary": string (parsable as int -> bool),
/// "#text": string
/// },
/// ...
/// ]
/// }
/// }
/// }
/// }
/// </example>
public static BenzingaNews CreateBenzingaNewsFromRSS(string contents, IMapFileProvider mapFileProvider, MapFileResolver mapFileResolver)
{
var item = JsonConvert.DeserializeObject<JObject>(contents)["rss"]["channel"]["item"];
// Only process articles that contain tickers to disk
if (item["bz:ticker"] == null)
{
return null;
}
var instance = new BenzingaNews
{
Id = Parse.Int(item.Value<string>("bz:id")),
Author = item.Value<string>("dc:creator"),
CreatedAt = item.Value<DateTime>("pubDate").ToUniversalTime(),
UpdatedAt = item.Value<DateTime>("revisiondate").ToUniversalTime(),
Title = item.Value<string>("title"),
// Teasers are not present in the RSS data
Teaser = string.Empty,
// Strip all HTML tags from the article, then convert HTML entities to their string representation
// e.g. "<html><p>Apple&#39;s Earnings</p></html>" would become "Apple's Earnings"
Contents = WebUtility.HtmlDecode(Regex.Replace(item.Value<string>("description"), @"<[^>]*>", " ")),
Categories = new List<string>(),
Symbols = new List<Symbol>(),
// We won't have any Tags since they're not present in the old data
Tags = new List<string>()
};
// For instance.Categories
foreach (var category in GetValuesFromTag(item, "category"))
{
if (string.IsNullOrWhiteSpace(category.Value<string>("@domain")))
{
continue;
}
var name = WebUtility.HtmlDecode(category.Value<string>("#text"));
if (string.IsNullOrWhiteSpace(name))
{
continue;
}
instance.Categories.Add(name);
}
// Use this collection to get rid of any duplicate symbols
var tempSymbols = new HashSet<Symbol>();
// For instance.Symbols
foreach (var ticker in GetValuesFromTag(item, "bz:ticker"))
{
if (ticker["#text"] == null)
{
continue;
}
// Tickers with dots in them like BRK.A and BRK.B appear as BRK-A and BRK-B in Benzinga data.
var symbolTicker = ticker.Value<string>("#text").Trim().Replace('-', '.');
var mappedSymbol = mapFileResolver.ResolveMapFile(symbolTicker, instance.CreatedAt).GetMappedSymbol(instance.CreatedAt);
var mappableSymbol = ShareClassMappedTickers.ContainsKey(symbolTicker);
if (!mappableSymbol && string.IsNullOrWhiteSpace(mappedSymbol))
{
Log.Error($"BenzingaFactory.CreateBenzingaNewsFromRSS(): Failed to map old ticker {symbolTicker}. New ticker is null");
continue;
}
if (!mappableSymbol)
{
tempSymbols.Add(new Symbol(
SecurityIdentifier.GenerateEquity(symbolTicker, QuantConnect.Market.USA, mapSymbol: true, mapFileProvider: mapFileProvider, mappingResolveDate: instance.CreatedAt),
mappedSymbol
));
}
else
{
foreach (var mappedTicker in ShareClassMappedTickers[symbolTicker])
{
tempSymbols.Add(new Symbol(
SecurityIdentifier.GenerateEquity(mappedTicker, QuantConnect.Market.USA, mapSymbol: true, mapFileProvider: mapFileProvider, mappingResolveDate: instance.CreatedAt),
mappedTicker
));
}
}
}
instance.Symbols.AddRange(tempSymbols);
return instance;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment