Last active
August 29, 2015 14:11
-
-
Save RichardD2/f7bf9c50e43a7802708e to your computer and use it in GitHub Desktop.
An RSS feed formatter which supports parsing v0.91 format feeds.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* An RSS feed formatter which supports parsing v0.91 format feeds. | |
* | |
* Based on: | |
* http://referencesource.microsoft.com/#System.ServiceModel/System/ServiceModel/Syndication/Rss20FeedFormatter.cs | |
*/ | |
using System; | |
using System.Collections.Generic; | |
using System.Globalization; | |
using System.ServiceModel.Syndication; | |
using System.Text; | |
using System.Xml; | |
namespace System.ServiceModel.Syndication | |
{ | |
/// <summary> | |
/// An RSS feed reader which supports RSS 0.91 feeds. | |
/// </summary> | |
public sealed class Rss10FeedFormatter : Rss20FeedFormatter | |
{ | |
private const string NS = "http://www.w3.org/XML/1998/namespace"; | |
/// <summary> | |
/// Initializes a new instance of the <see cref="Rss10FeedFormatter"/> class. | |
/// </summary> | |
/// <param name="feed">The feed.</param> | |
public Rss10FeedFormatter(SyndicationFeed feed) : base(feed) | |
{ | |
} | |
/// <summary> | |
/// Initializes a new instance of the <see cref="Rss10FeedFormatter"/> class. | |
/// </summary> | |
public Rss10FeedFormatter() | |
{ | |
} | |
/// <summary> | |
/// Reads an RSS 2.0 syndication feed from the specified <see cref="XmlReader"/> instance. | |
/// </summary> | |
/// <param name="reader"> | |
/// The <see cref="XmlReader"/> to read from. | |
/// </param> | |
public override void ReadFrom(XmlReader reader) | |
{ | |
if (!CanRead(reader)) throw new NotSupportedException("Unknown RSS 0.91 feed format."); | |
SetFeed(CreateFeedInstance()); | |
ReadXml(reader, Feed); | |
} | |
private void ReadXml(XmlReader reader, SyndicationFeed result) | |
{ | |
if (reader == null) throw new ArgumentNullException("reader"); | |
if (result == null) throw new ArgumentNullException("result"); | |
string baseUrl = null; | |
reader.MoveToContent(); | |
string version = reader.GetAttribute("version", ""); | |
if (version != "2.0" && version != "0.91") | |
{ | |
throw new NotSupportedException("Unknown RSS version: " + version); | |
} | |
if (reader.AttributeCount > 1) | |
{ | |
string attribute = reader.GetAttribute("base", NS); | |
if (!string.IsNullOrEmpty(attribute)) baseUrl = attribute; | |
} | |
reader.ReadStartElement(); | |
reader.MoveToContent(); | |
if (reader.HasAttributes) | |
{ | |
while (reader.MoveToNextAttribute()) | |
{ | |
string localName = reader.LocalName; | |
string namespaceUri = reader.NamespaceURI; | |
string value = reader.Value; | |
if ("base" == localName && NS == namespaceUri) | |
{ | |
baseUrl = reader.Value; | |
} | |
else if (!TryParseAttribute(localName, namespaceUri, value, result, Version)) | |
{ | |
result.AttributeExtensions.Add(new XmlQualifiedName(localName, namespaceUri), value); | |
} | |
} | |
} | |
if (!string.IsNullOrEmpty(baseUrl)) | |
{ | |
result.BaseUri = new Uri(baseUrl, UriKind.RelativeOrAbsolute); | |
} | |
reader.ReadStartElement("channel", string.Empty); | |
bool startedItems = false; | |
while (reader.IsStartElement()) | |
{ | |
if (reader.IsStartElement("title", string.Empty)) | |
{ | |
result.Title = new TextSyndicationContent(reader.ReadElementString()); | |
continue; | |
} | |
if (reader.IsStartElement("description", string.Empty)) | |
{ | |
result.Description = new TextSyndicationContent(reader.ReadElementString()); | |
continue; | |
} | |
if (reader.IsStartElement("language", string.Empty)) | |
{ | |
result.Language = reader.ReadElementString(); | |
continue; | |
} | |
if (reader.IsStartElement("copyright", string.Empty)) | |
{ | |
result.Copyright = new TextSyndicationContent(reader.ReadElementString()); | |
continue; | |
} | |
if (reader.IsStartElement("generator", string.Empty)) | |
{ | |
result.Generator = reader.ReadElementString(); | |
continue; | |
} | |
if (reader.IsStartElement("link", string.Empty)) | |
{ | |
result.Links.Add(ReadAlternateLink(reader, result.BaseUri)); | |
continue; | |
} | |
if (reader.IsStartElement("managingEditor", string.Empty)) | |
{ | |
result.Authors.Add(ReadPerson(reader, result)); | |
continue; | |
} | |
if (reader.IsStartElement("lastBuildDate", string.Empty)) | |
{ | |
reader.ReadStartElement(); | |
result.LastUpdatedTime = DateFromString(reader.ReadString()); | |
reader.ReadEndElement(); | |
continue; | |
} | |
if (reader.IsStartElement("category", string.Empty)) | |
{ | |
result.Categories.Add(ReadCategory(reader, result)); | |
continue; | |
} | |
if (reader.IsStartElement("image", string.Empty)) | |
{ | |
reader.ReadStartElement(); | |
while (reader.IsStartElement()) | |
{ | |
if (reader.IsStartElement("url", string.Empty)) | |
{ | |
result.ImageUrl = new Uri(reader.ReadElementString(), UriKind.RelativeOrAbsolute); | |
} | |
else | |
{ | |
reader.Skip(); | |
} | |
} | |
reader.ReadEndElement(); | |
continue; | |
} | |
if (reader.IsStartElement("item", string.Empty)) | |
{ | |
if (startedItems) | |
{ | |
throw new InvalidOperationException("The feed has non-contiguous items."); | |
} | |
bool allItemsRead; | |
result.Items = ReadItems(reader, result, out allItemsRead); | |
startedItems = true; | |
if (allItemsRead) continue; | |
break; | |
} | |
if (TryParseElement(reader, result, Version)) | |
{ | |
reader.Skip(); | |
} | |
} | |
} | |
private static Uri CombineXmlBase(Uri rootBase, string newBase) | |
{ | |
if (string.IsNullOrEmpty(newBase)) return rootBase; | |
var uri = new Uri(newBase, UriKind.RelativeOrAbsolute); | |
if (rootBase != null && !uri.IsAbsoluteUri) uri = new Uri(rootBase, newBase); | |
return uri; | |
} | |
private static SyndicationLink ReadAlternateLink(XmlReader reader, Uri baseUri) | |
{ | |
var link = new SyndicationLink | |
{ | |
BaseUri = baseUri, | |
RelationshipType = "alternate", | |
}; | |
if (reader.HasAttributes) | |
{ | |
while (reader.MoveToNextAttribute()) | |
{ | |
string value = reader.Value; | |
string localName = reader.LocalName; | |
string namespaceUri = reader.NamespaceURI; | |
if (localName == "base" && namespaceUri == NS) | |
{ | |
link.BaseUri = CombineXmlBase(link.BaseUri, value); | |
} | |
else | |
{ | |
link.AttributeExtensions.Add(new XmlQualifiedName(localName, namespaceUri), value); | |
} | |
} | |
} | |
reader.ReadStartElement(); | |
link.Uri = new Uri(reader.ReadString(), UriKind.RelativeOrAbsolute); | |
reader.ReadEndElement(); | |
return link; | |
} | |
private void ReadCategory(XmlReader reader, SyndicationCategory category) | |
{ | |
bool isEmpty = reader.IsEmptyElement; | |
if (reader.HasAttributes) | |
{ | |
while (reader.MoveToNextAttribute()) | |
{ | |
string value = reader.Value; | |
string localName = reader.LocalName; | |
string namespaceUri = reader.NamespaceURI; | |
if (localName == "domain" && string.IsNullOrEmpty(namespaceUri)) | |
{ | |
category.Scheme = value; | |
} | |
else if (!TryParseAttribute(localName, namespaceUri, value, category, Version)) | |
{ | |
category.AttributeExtensions.Add(new XmlQualifiedName(localName, namespaceUri), value); | |
} | |
} | |
} | |
reader.ReadStartElement("category", string.Empty); | |
if (!isEmpty) | |
{ | |
category.Name = reader.ReadString(); | |
reader.ReadEndElement(); | |
} | |
} | |
private SyndicationCategory ReadCategory(XmlReader reader, SyndicationFeed feed) | |
{ | |
var category = CreateCategory(feed); | |
ReadCategory(reader, category); | |
return category; | |
} | |
private void ReadPerson(XmlReader reader, SyndicationPerson person) | |
{ | |
bool isEmpty = reader.IsEmptyElement; | |
if (reader.HasAttributes) | |
{ | |
while (reader.MoveToNextAttribute()) | |
{ | |
string value = reader.Value; | |
string localName = reader.LocalName; | |
string namespaceUri = reader.NamespaceURI; | |
if (!TryParseAttribute(localName, namespaceUri, value, person, Version)) | |
{ | |
person.AttributeExtensions.Add(new XmlQualifiedName(localName, namespaceUri), value); | |
} | |
} | |
} | |
reader.ReadStartElement(); | |
if (!isEmpty) | |
{ | |
person.Email = reader.ReadString(); | |
reader.ReadEndElement(); | |
} | |
} | |
private SyndicationPerson ReadPerson(XmlReader reader, SyndicationFeed feed) | |
{ | |
var person = CreatePerson(feed); | |
ReadPerson(reader, person); | |
return person; | |
} | |
private static void RemoveExtraWhiteSpaceAtStart(StringBuilder stringBuilder) | |
{ | |
int index = 0; | |
while (index < stringBuilder.Length && char.IsWhiteSpace(stringBuilder[index])) | |
{ | |
index++; | |
} | |
if (index > 0) | |
{ | |
stringBuilder.Remove(0, index); | |
} | |
} | |
private static void ReplaceMultipleWhiteSpaceWithSingleWhiteSpace(StringBuilder stringBuilder) | |
{ | |
int index = 0; | |
int startIndex = -1; | |
while (index < stringBuilder.Length) | |
{ | |
if (char.IsWhiteSpace(stringBuilder[index])) | |
{ | |
if (startIndex == -1) | |
{ | |
startIndex = index; | |
stringBuilder[index] = ' '; | |
} | |
} | |
else if (startIndex != -1) | |
{ | |
if (index > (startIndex + 1)) | |
{ | |
stringBuilder.Remove(startIndex, (index - startIndex) - 1); | |
index = startIndex + 1; | |
} | |
startIndex = -1; | |
} | |
index++; | |
} | |
} | |
private static string NormalizeTimeZone(string rfc822TimeZone, out bool isUtc) | |
{ | |
isUtc = false; | |
if (rfc822TimeZone[0] == '+' || rfc822TimeZone[0] == '-') | |
{ | |
var sb = new StringBuilder(rfc822TimeZone); | |
if (sb.Length == 4) sb.Insert(1, '0'); | |
sb.Insert(3, ':'); | |
return sb.ToString(); | |
} | |
if (rfc822TimeZone == "Z" || rfc822TimeZone == "UT") | |
{ | |
isUtc = true; | |
return "-00:00"; | |
} | |
string result; | |
if (!TimeZoneMap.TryGetValue(rfc822TimeZone, out result)) | |
{ | |
result = string.Empty; | |
} | |
return result; | |
} | |
private static DateTimeOffset DateFromString(string dateTimeString) | |
{ | |
var sb = new StringBuilder(dateTimeString.Trim()); | |
if (sb.Length < 18) throw new ArgumentException("Invalid date: " + dateTimeString); | |
if (sb[3] == ',') | |
{ | |
sb.Remove(0, 4); | |
RemoveExtraWhiteSpaceAtStart(sb); | |
} | |
ReplaceMultipleWhiteSpaceWithSingleWhiteSpace(sb); | |
if (!char.IsDigit(sb[1])) sb.Insert(0, '0'); | |
if (sb.Length < 19) throw new ArgumentException("Invalid date: " + dateTimeString); | |
bool seconds = ':' == sb[17]; | |
int index = seconds ? 21 : 18; | |
string timeZone = sb.ToString().Substring(index); | |
sb.Remove(index, sb.Length - index); | |
bool isUtc; | |
timeZone = NormalizeTimeZone(timeZone, out isUtc); | |
sb.Append(timeZone); | |
string format = seconds ? "dd MMM yyyy HH:mm:ss zzz" : "dd MMM yyyy HH:mm zzz"; | |
var styles = isUtc ? DateTimeStyles.AdjustToUniversal : DateTimeStyles.None; | |
var culture = CultureInfo.InvariantCulture; | |
DateTimeOffset result; | |
if (!DateTimeOffset.TryParseExact(sb.ToString(), format, culture, styles, out result)) | |
{ | |
throw new ArgumentException("Invalid date: " + dateTimeString); | |
} | |
return result; | |
} | |
private static readonly Dictionary<string, string> TimeZoneMap = new Dictionary<string, string> | |
{ | |
{ "GMT", "-00:00" }, | |
{ "A", "-01:00" }, | |
{ "B", "-02:00" }, | |
{ "C", "-03:00" }, | |
{ "D", "-04:00" }, | |
{ "EDT", "-04:00" }, | |
{ "E", "-05:00" }, | |
{ "EST", "-05:00" }, | |
{ "CDT", "-05:00" }, | |
{ "F", "-06:00" }, | |
{ "CST", "-06:00" }, | |
{ "MDT", "-06:00" }, | |
{ "G", "-07:00" }, | |
{ "MST", "-07:00" }, | |
{ "PDT", "-07:00" }, | |
{ "H", "-08:00" }, | |
{ "PST", "-08:00" }, | |
{ "I", "-09:00" }, | |
{ "K", "-10:00" }, | |
{ "L", "-11:00" }, | |
{ "M", "-12:00" }, | |
{ "N", "+01:00" }, | |
{ "O", "+02:00" }, | |
{ "P", "+03:00" }, | |
{ "Q", "+04:00" }, | |
{ "R", "+05:00" }, | |
{ "S", "+06:00" }, | |
{ "T", "+07:00" }, | |
{ "U", "+08:00" }, | |
{ "V", "+09:00" }, | |
{ "W", "+10:00" }, | |
{ "X", "+11:00" }, | |
{ "Y", "+12:00" }, | |
}; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment