Skip to content

Instantly share code, notes, and snippets.

@RichardD2
Last active August 29, 2015 14:11
Show Gist options
  • Save RichardD2/f7bf9c50e43a7802708e to your computer and use it in GitHub Desktop.
Save RichardD2/f7bf9c50e43a7802708e to your computer and use it in GitHub Desktop.
An RSS feed formatter which supports parsing v0.91 format feeds.
/*
* An RSS feed formatter which supports parsing v0.91 format feeds.
*
* Based on:
* http://referencesource.microsoft.com/#System.ServiceModel/System/ServiceModel/Syndication/Rss20FeedFormatter.cs
*/
using System;
using System.Collections.Generic;
using System.Globalization;
using System.ServiceModel.Syndication;
using System.Text;
using System.Xml;
namespace System.ServiceModel.Syndication
{
/// <summary>
/// An RSS feed reader which supports RSS 0.91 feeds.
/// </summary>
public sealed class Rss10FeedFormatter : Rss20FeedFormatter
{
private const string NS = "http://www.w3.org/XML/1998/namespace";
/// <summary>
/// Initializes a new instance of the <see cref="Rss10FeedFormatter"/> class.
/// </summary>
/// <param name="feed">The feed.</param>
public Rss10FeedFormatter(SyndicationFeed feed) : base(feed)
{
}
/// <summary>
/// Initializes a new instance of the <see cref="Rss10FeedFormatter"/> class.
/// </summary>
public Rss10FeedFormatter()
{
}
/// <summary>
/// Reads an RSS 2.0 syndication feed from the specified <see cref="XmlReader"/> instance.
/// </summary>
/// <param name="reader">
/// The <see cref="XmlReader"/> to read from.
/// </param>
public override void ReadFrom(XmlReader reader)
{
if (!CanRead(reader)) throw new NotSupportedException("Unknown RSS 0.91 feed format.");
SetFeed(CreateFeedInstance());
ReadXml(reader, Feed);
}
private void ReadXml(XmlReader reader, SyndicationFeed result)
{
if (reader == null) throw new ArgumentNullException("reader");
if (result == null) throw new ArgumentNullException("result");
string baseUrl = null;
reader.MoveToContent();
string version = reader.GetAttribute("version", "");
if (version != "2.0" && version != "0.91")
{
throw new NotSupportedException("Unknown RSS version: " + version);
}
if (reader.AttributeCount > 1)
{
string attribute = reader.GetAttribute("base", NS);
if (!string.IsNullOrEmpty(attribute)) baseUrl = attribute;
}
reader.ReadStartElement();
reader.MoveToContent();
if (reader.HasAttributes)
{
while (reader.MoveToNextAttribute())
{
string localName = reader.LocalName;
string namespaceUri = reader.NamespaceURI;
string value = reader.Value;
if ("base" == localName && NS == namespaceUri)
{
baseUrl = reader.Value;
}
else if (!TryParseAttribute(localName, namespaceUri, value, result, Version))
{
result.AttributeExtensions.Add(new XmlQualifiedName(localName, namespaceUri), value);
}
}
}
if (!string.IsNullOrEmpty(baseUrl))
{
result.BaseUri = new Uri(baseUrl, UriKind.RelativeOrAbsolute);
}
reader.ReadStartElement("channel", string.Empty);
bool startedItems = false;
while (reader.IsStartElement())
{
if (reader.IsStartElement("title", string.Empty))
{
result.Title = new TextSyndicationContent(reader.ReadElementString());
continue;
}
if (reader.IsStartElement("description", string.Empty))
{
result.Description = new TextSyndicationContent(reader.ReadElementString());
continue;
}
if (reader.IsStartElement("language", string.Empty))
{
result.Language = reader.ReadElementString();
continue;
}
if (reader.IsStartElement("copyright", string.Empty))
{
result.Copyright = new TextSyndicationContent(reader.ReadElementString());
continue;
}
if (reader.IsStartElement("generator", string.Empty))
{
result.Generator = reader.ReadElementString();
continue;
}
if (reader.IsStartElement("link", string.Empty))
{
result.Links.Add(ReadAlternateLink(reader, result.BaseUri));
continue;
}
if (reader.IsStartElement("managingEditor", string.Empty))
{
result.Authors.Add(ReadPerson(reader, result));
continue;
}
if (reader.IsStartElement("lastBuildDate", string.Empty))
{
reader.ReadStartElement();
result.LastUpdatedTime = DateFromString(reader.ReadString());
reader.ReadEndElement();
continue;
}
if (reader.IsStartElement("category", string.Empty))
{
result.Categories.Add(ReadCategory(reader, result));
continue;
}
if (reader.IsStartElement("image", string.Empty))
{
reader.ReadStartElement();
while (reader.IsStartElement())
{
if (reader.IsStartElement("url", string.Empty))
{
result.ImageUrl = new Uri(reader.ReadElementString(), UriKind.RelativeOrAbsolute);
}
else
{
reader.Skip();
}
}
reader.ReadEndElement();
continue;
}
if (reader.IsStartElement("item", string.Empty))
{
if (startedItems)
{
throw new InvalidOperationException("The feed has non-contiguous items.");
}
bool allItemsRead;
result.Items = ReadItems(reader, result, out allItemsRead);
startedItems = true;
if (allItemsRead) continue;
break;
}
if (TryParseElement(reader, result, Version))
{
reader.Skip();
}
}
}
private static Uri CombineXmlBase(Uri rootBase, string newBase)
{
if (string.IsNullOrEmpty(newBase)) return rootBase;
var uri = new Uri(newBase, UriKind.RelativeOrAbsolute);
if (rootBase != null && !uri.IsAbsoluteUri) uri = new Uri(rootBase, newBase);
return uri;
}
private static SyndicationLink ReadAlternateLink(XmlReader reader, Uri baseUri)
{
var link = new SyndicationLink
{
BaseUri = baseUri,
RelationshipType = "alternate",
};
if (reader.HasAttributes)
{
while (reader.MoveToNextAttribute())
{
string value = reader.Value;
string localName = reader.LocalName;
string namespaceUri = reader.NamespaceURI;
if (localName == "base" && namespaceUri == NS)
{
link.BaseUri = CombineXmlBase(link.BaseUri, value);
}
else
{
link.AttributeExtensions.Add(new XmlQualifiedName(localName, namespaceUri), value);
}
}
}
reader.ReadStartElement();
link.Uri = new Uri(reader.ReadString(), UriKind.RelativeOrAbsolute);
reader.ReadEndElement();
return link;
}
private void ReadCategory(XmlReader reader, SyndicationCategory category)
{
bool isEmpty = reader.IsEmptyElement;
if (reader.HasAttributes)
{
while (reader.MoveToNextAttribute())
{
string value = reader.Value;
string localName = reader.LocalName;
string namespaceUri = reader.NamespaceURI;
if (localName == "domain" && string.IsNullOrEmpty(namespaceUri))
{
category.Scheme = value;
}
else if (!TryParseAttribute(localName, namespaceUri, value, category, Version))
{
category.AttributeExtensions.Add(new XmlQualifiedName(localName, namespaceUri), value);
}
}
}
reader.ReadStartElement("category", string.Empty);
if (!isEmpty)
{
category.Name = reader.ReadString();
reader.ReadEndElement();
}
}
private SyndicationCategory ReadCategory(XmlReader reader, SyndicationFeed feed)
{
var category = CreateCategory(feed);
ReadCategory(reader, category);
return category;
}
private void ReadPerson(XmlReader reader, SyndicationPerson person)
{
bool isEmpty = reader.IsEmptyElement;
if (reader.HasAttributes)
{
while (reader.MoveToNextAttribute())
{
string value = reader.Value;
string localName = reader.LocalName;
string namespaceUri = reader.NamespaceURI;
if (!TryParseAttribute(localName, namespaceUri, value, person, Version))
{
person.AttributeExtensions.Add(new XmlQualifiedName(localName, namespaceUri), value);
}
}
}
reader.ReadStartElement();
if (!isEmpty)
{
person.Email = reader.ReadString();
reader.ReadEndElement();
}
}
private SyndicationPerson ReadPerson(XmlReader reader, SyndicationFeed feed)
{
var person = CreatePerson(feed);
ReadPerson(reader, person);
return person;
}
private static void RemoveExtraWhiteSpaceAtStart(StringBuilder stringBuilder)
{
int index = 0;
while (index < stringBuilder.Length && char.IsWhiteSpace(stringBuilder[index]))
{
index++;
}
if (index > 0)
{
stringBuilder.Remove(0, index);
}
}
private static void ReplaceMultipleWhiteSpaceWithSingleWhiteSpace(StringBuilder stringBuilder)
{
int index = 0;
int startIndex = -1;
while (index < stringBuilder.Length)
{
if (char.IsWhiteSpace(stringBuilder[index]))
{
if (startIndex == -1)
{
startIndex = index;
stringBuilder[index] = ' ';
}
}
else if (startIndex != -1)
{
if (index > (startIndex + 1))
{
stringBuilder.Remove(startIndex, (index - startIndex) - 1);
index = startIndex + 1;
}
startIndex = -1;
}
index++;
}
}
private static string NormalizeTimeZone(string rfc822TimeZone, out bool isUtc)
{
isUtc = false;
if (rfc822TimeZone[0] == '+' || rfc822TimeZone[0] == '-')
{
var sb = new StringBuilder(rfc822TimeZone);
if (sb.Length == 4) sb.Insert(1, '0');
sb.Insert(3, ':');
return sb.ToString();
}
if (rfc822TimeZone == "Z" || rfc822TimeZone == "UT")
{
isUtc = true;
return "-00:00";
}
string result;
if (!TimeZoneMap.TryGetValue(rfc822TimeZone, out result))
{
result = string.Empty;
}
return result;
}
private static DateTimeOffset DateFromString(string dateTimeString)
{
var sb = new StringBuilder(dateTimeString.Trim());
if (sb.Length < 18) throw new ArgumentException("Invalid date: " + dateTimeString);
if (sb[3] == ',')
{
sb.Remove(0, 4);
RemoveExtraWhiteSpaceAtStart(sb);
}
ReplaceMultipleWhiteSpaceWithSingleWhiteSpace(sb);
if (!char.IsDigit(sb[1])) sb.Insert(0, '0');
if (sb.Length < 19) throw new ArgumentException("Invalid date: " + dateTimeString);
bool seconds = ':' == sb[17];
int index = seconds ? 21 : 18;
string timeZone = sb.ToString().Substring(index);
sb.Remove(index, sb.Length - index);
bool isUtc;
timeZone = NormalizeTimeZone(timeZone, out isUtc);
sb.Append(timeZone);
string format = seconds ? "dd MMM yyyy HH:mm:ss zzz" : "dd MMM yyyy HH:mm zzz";
var styles = isUtc ? DateTimeStyles.AdjustToUniversal : DateTimeStyles.None;
var culture = CultureInfo.InvariantCulture;
DateTimeOffset result;
if (!DateTimeOffset.TryParseExact(sb.ToString(), format, culture, styles, out result))
{
throw new ArgumentException("Invalid date: " + dateTimeString);
}
return result;
}
private static readonly Dictionary<string, string> TimeZoneMap = new Dictionary<string, string>
{
{ "GMT", "-00:00" },
{ "A", "-01:00" },
{ "B", "-02:00" },
{ "C", "-03:00" },
{ "D", "-04:00" },
{ "EDT", "-04:00" },
{ "E", "-05:00" },
{ "EST", "-05:00" },
{ "CDT", "-05:00" },
{ "F", "-06:00" },
{ "CST", "-06:00" },
{ "MDT", "-06:00" },
{ "G", "-07:00" },
{ "MST", "-07:00" },
{ "PDT", "-07:00" },
{ "H", "-08:00" },
{ "PST", "-08:00" },
{ "I", "-09:00" },
{ "K", "-10:00" },
{ "L", "-11:00" },
{ "M", "-12:00" },
{ "N", "+01:00" },
{ "O", "+02:00" },
{ "P", "+03:00" },
{ "Q", "+04:00" },
{ "R", "+05:00" },
{ "S", "+06:00" },
{ "T", "+07:00" },
{ "U", "+08:00" },
{ "V", "+09:00" },
{ "W", "+10:00" },
{ "X", "+11:00" },
{ "Y", "+12:00" },
};
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment