Created
September 9, 2015 10:24
-
-
Save derantell/df7d2382c171cb4ff204 to your computer and use it in GitHub Desktop.
Fetch og metadata with CsQuery
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Fetch meta data from a web page | |
// Depends on CsQuery [https://github.com/jamietre/CsQuery] | |
// Nuget: install-package csquery | |
using CsQuery; | |
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Text; | |
namespace Meridium.OgMetaFetcher { | |
public class OgMetaFetcher { | |
public FetchResult Fetch(string url) { | |
try { | |
var dom = CQ.CreateFromUrl(url); | |
var metadata = new OgMetaData { | |
Title = dom | |
.OgProperty("title") | |
.ElementText("title") | |
.Value, | |
Description = dom | |
.OgProperty("description") | |
.MetaValue("description") | |
.Value, | |
SiteName = dom | |
.OgProperty("site_name") | |
.Value(new Uri(url).Host), | |
Url = dom | |
.OgProperty("url") | |
.Value(url), | |
ImageUrl = dom | |
.OgProperty("image") | |
.Custom(d => FirstImageUrl(d, new Uri(url))) | |
.Value | |
}; | |
return FetchResult.Success(url, metadata); | |
} catch (Exception e) { | |
return FetchResult.Failure(url, e); | |
} | |
} | |
private string FirstImageUrl(CQ dom, Uri url) { | |
var src = dom["img"].Attr("src") ?? ""; | |
if (src.StartsWith("/")) { | |
return url.MakeAbsolute(src); | |
} | |
return src; | |
} | |
} | |
static class Ext { | |
public static string MakeAbsolute(this Uri self, string path) { | |
return self.Scheme + "://" + self.Host + path; | |
} | |
public static ValueResult OgProperty(this CQ self, string property) { | |
return new ValueResult { | |
Value = self["meta[property='og:" + property + "']"].Attr("content"), | |
Dom = self | |
}; | |
} | |
public static ValueResult MetaValue(this ValueResult self, string name) { | |
if (self.HasValue) return self; | |
return new ValueResult { | |
Value = self.Dom["meta[name='" + name + "']"].Attr("content"), | |
Dom = self.Dom | |
}; | |
} | |
public static ValueResult ElementText(this ValueResult self, string selector) { | |
if (self.HasValue) return self; | |
return new ValueResult { | |
Value = self.Dom[selector].Text(), | |
Dom = self.Dom | |
}; | |
} | |
public static ValueResult Custom(this ValueResult self, Func<CQ, string> selector) { | |
if (self.HasValue) return self; | |
return new ValueResult { | |
Value = selector(self.Dom), | |
Dom = self.Dom | |
}; | |
} | |
public static string Value(this ValueResult self, string value) { | |
return self.HasValue ? self.Value : value; | |
} | |
} | |
class ValueResult { | |
public CQ Dom { get; set; } | |
public string Value { get; set; } | |
public bool HasValue { get { return !string.IsNullOrEmpty(Value); } } | |
} | |
public class FetchResult { | |
public bool IsSuccess { get; private set; } | |
public OgMetaData OgMetaData { get; private set; } | |
public Exception Error { get; private set; } | |
public string FetchUrl { get; private set; } | |
public static FetchResult Failure(string fetchurl, Exception exception) { | |
return new FetchResult { | |
IsSuccess = false, | |
Error = exception, | |
OgMetaData = OgMetaData.Empty, | |
FetchUrl = fetchurl | |
}; | |
} | |
public static FetchResult Success(string fetchurl, OgMetaData metadata ) { | |
return new FetchResult { | |
IsSuccess = true, | |
OgMetaData = metadata, | |
FetchUrl = fetchurl | |
}; | |
} | |
} | |
public class OgMetaData { | |
public string Title { get; set; } | |
public string Description { get; set; } | |
public string SiteName { get; set; } | |
public string ImageUrl { get; set; } | |
public string Url { get; set; } | |
public static OgMetaData Empty { | |
get { | |
return new OgMetaData { | |
Title = "", | |
Description = "", | |
SiteName = "", | |
ImageUrl = "", | |
Url = "" | |
}; | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment