Created
November 20, 2019 21:59
-
-
Save gsalaz98/b87992cd5a5a214d01c63dbbefbbf12c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <summary> | |
/// Creates an instance of <see cref="BenzingaNews"/> from RSS data (historical). | |
/// An example schema of the data (in JSON format, not XML) is provided below. | |
/// </summary> | |
/// <param name="contents">Raw contents of the XML file</param> | |
/// <param name="mapFileProvider">Map file provider</param> | |
/// <param name="mapFileResolver">Map file resolver</param> | |
/// <returns>BenzingaNews instance</returns> | |
/// <example> | |
/// { | |
/// "?xml": { | |
/// "@version": "1.0", | |
/// "@encoding": "utf-8" | |
/// }, | |
/// "rss": { | |
/// "@version": "2.0", | |
/// "@xml:base": "http://example/" | |
/// "@xmlns:dc": "http://example/" | |
/// "@xmlns:bz": "http://exmaple/" | |
/// "channel": { | |
/// "title": string, | |
/// "link": string, | |
/// "description": string, | |
/// "language": string, | |
/// "item": { | |
/// "title": string, | |
/// "link": "http://example/" | |
/// "description": string, | |
/// "pubDate": DateTime (Mon, 1 Mar 19 00:00:00 -0400) | |
/// "dc:creator": string, | |
/// "guid": { | |
/// "@isPermaLink": string, | |
/// "#text": string, | |
/// }, | |
/// "category": [ | |
/// { | |
/// "@domain": "publisher", | |
/// "#text": string, | |
/// }, | |
/// { | |
/// "@domain": "http://example/" | |
/// "#text": string | |
/// }, | |
/// ... | |
/// ], | |
/// "bz:id": string (parsable as int), | |
/// "bz:revisionid": string (parsable as int), | |
/// "bz:revisiondate": DateTime (Mon, 1 Mar 19 00:00:00 -0400), | |
/// "bz:type": { | |
/// "@bz": string (parsable as int -> bool), | |
/// "@pro": string (parsable as int -> bool), | |
/// "@firstrun": string (parsable as int -> bool), | |
/// "#text": string | |
/// }, | |
/// "bz:ticker": [ | |
/// { | |
/// "@primary": string (parsable as int -> bool), | |
/// "#text": string | |
/// }, | |
/// ... | |
/// ] | |
/// } | |
/// } | |
/// } | |
/// } | |
/// </example> | |
public static BenzingaNews CreateBenzingaNewsFromRSS(string contents, IMapFileProvider mapFileProvider, MapFileResolver mapFileResolver) | |
{ | |
var item = JsonConvert.DeserializeObject<JObject>(contents)["rss"]["channel"]["item"]; | |
// Only process articles that contain tickers to disk | |
if (item["bz:ticker"] == null) | |
{ | |
return null; | |
} | |
var instance = new BenzingaNews | |
{ | |
Id = Parse.Int(item.Value<string>("bz:id")), | |
Author = item.Value<string>("dc:creator"), | |
CreatedAt = item.Value<DateTime>("pubDate").ToUniversalTime(), | |
UpdatedAt = item.Value<DateTime>("revisiondate").ToUniversalTime(), | |
Title = item.Value<string>("title"), | |
// Teasers are not present in the RSS data | |
Teaser = string.Empty, | |
// Strip all HTML tags from the article, then convert HTML entities to their string representation | |
// e.g. "<html><p>Apple's Earnings</p></html>" would become "Apple's Earnings" | |
Contents = WebUtility.HtmlDecode(Regex.Replace(item.Value<string>("description"), @"<[^>]*>", " ")), | |
Categories = new List<string>(), | |
Symbols = new List<Symbol>(), | |
// We won't have any Tags since they're not present in the old data | |
Tags = new List<string>() | |
}; | |
// For instance.Categories | |
foreach (var category in GetValuesFromTag(item, "category")) | |
{ | |
if (string.IsNullOrWhiteSpace(category.Value<string>("@domain"))) | |
{ | |
continue; | |
} | |
var name = WebUtility.HtmlDecode(category.Value<string>("#text")); | |
if (string.IsNullOrWhiteSpace(name)) | |
{ | |
continue; | |
} | |
instance.Categories.Add(name); | |
} | |
// Use this collection to get rid of any duplicate symbols | |
var tempSymbols = new HashSet<Symbol>(); | |
// For instance.Symbols | |
foreach (var ticker in GetValuesFromTag(item, "bz:ticker")) | |
{ | |
if (ticker["#text"] == null) | |
{ | |
continue; | |
} | |
// Tickers with dots in them like BRK.A and BRK.B appear as BRK-A and BRK-B in Benzinga data. | |
var symbolTicker = ticker.Value<string>("#text").Trim().Replace('-', '.'); | |
var mappedSymbol = mapFileResolver.ResolveMapFile(symbolTicker, instance.CreatedAt).GetMappedSymbol(instance.CreatedAt); | |
var mappableSymbol = ShareClassMappedTickers.ContainsKey(symbolTicker); | |
if (!mappableSymbol && string.IsNullOrWhiteSpace(mappedSymbol)) | |
{ | |
Log.Error($"BenzingaFactory.CreateBenzingaNewsFromRSS(): Failed to map old ticker {symbolTicker}. New ticker is null"); | |
continue; | |
} | |
if (!mappableSymbol) | |
{ | |
tempSymbols.Add(new Symbol( | |
SecurityIdentifier.GenerateEquity(symbolTicker, QuantConnect.Market.USA, mapSymbol: true, mapFileProvider: mapFileProvider, mappingResolveDate: instance.CreatedAt), | |
mappedSymbol | |
)); | |
} | |
else | |
{ | |
foreach (var mappedTicker in ShareClassMappedTickers[symbolTicker]) | |
{ | |
tempSymbols.Add(new Symbol( | |
SecurityIdentifier.GenerateEquity(mappedTicker, QuantConnect.Market.USA, mapSymbol: true, mapFileProvider: mapFileProvider, mappingResolveDate: instance.CreatedAt), | |
mappedTicker | |
)); | |
} | |
} | |
} | |
instance.Symbols.AddRange(tempSymbols); | |
return instance; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment