Created
January 5, 2010 10:39
-
-
Save JeffreyZhao/269308 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#light | |
module Blogging | |
open System | |
open System.IO | |
open System.Net | |
open System.Text.RegularExpressions | |
type MetaWeblog.IMetaWeblogProxy with | |
member p.GetPostAsync(postId, userName, password) = | |
let beginGet (ac, o) = p.BeginGetPost postId userName password ac o | |
Async.FromBeginEnd(beginGet, p.EndGetPost) | |
member p.UpdatePostAsync(postId, userName, password, post, publish) = | |
let beginUpdate (ac, o) = p.BeginUpdatePost postId userName password post publish ac o | |
Async.FromBeginEnd(beginUpdate, p.EndUpdatePost) | |
type WebClient with | |
member c.GetStringAsync(url) = | |
async { | |
let completeEvent = c.DownloadStringCompleted | |
do c.DownloadStringAsync(new Uri(url)) | |
let! args = Async.AwaitEvent(completeEvent) | |
return args.Result | |
} | |
let downloadPostIdsAsync (beginMonth : DateTime) (endMonth : DateTime) = | |
let downloadPostIdsAsync' (m : DateTime) = | |
async { | |
let webClient = new WebClient() | |
let url = sprintf "http://www.cnblogs.com/JeffreyZhao/archive/%i/%i.html" m.Year m.Month | |
let! html = webClient.GetStringAsync(url) | |
printfn "%i/%i downloaded" m.Year m.Month | |
let regex = @"EditPosts\.aspx\?postid=(\d+)" | |
return [ for m in Regex.Matches(html, regex) -> m.Groups.Item(1).Value |> Int32.Parse ] | |
} | |
async { | |
let! lists = | |
Seq.initInfinite (fun i -> beginMonth.AddMonths(i)) | |
|> Seq.takeWhile (fun m -> m <= endMonth) | |
|> Seq.map downloadPostIdsAsync' | |
|> Async.Parallel | |
lists | |
|> List.concat | |
|> List.sort | |
|> List.map (fun i -> i.ToString()) | |
|> fun lines -> File.WriteAllLines("postIds.txt", lines) | |
} | |
let apiUrl = "http://www.cnblogs.com/JeffreyZhao/services/metaweblog.aspx" | |
let userName, password = "JeffreyZhao", "..." | |
let downloadPostsAsync() = | |
let downloadPostAsync id = | |
async { | |
let proxy = MetaWeblog.createProxy() | |
do proxy.Url <- apiUrl | |
let! post = proxy.GetPostAsync(id, userName, password) | |
let file = sprintf @"posts\%i.xml" post.PostID | |
let xml = XmlSerialization.serialize post | |
File.WriteAllText(file, xml); | |
printfn "post %i downloaded" post.PostID | |
} | |
File.ReadAllLines("postIds.txt") | |
//|> Seq.take 10 | |
|> Seq.map downloadPostAsync | |
|> Async.Parallel | |
|> Async.Ignore | |
let updateLocalPosts() = | |
let regex = @"(?i)(<p\b[^>]*>) " | |
let target = "$1" | |
let updatePost (post : MetaWeblog.Post) = | |
post.Content <- Regex.Replace(post.Content, regex, target) | |
let file = sprintf @"updated\%i.xml" post.PostID | |
let xml = XmlSerialization.serialize post | |
File.WriteAllText(file, xml); | |
Directory.GetFiles(@"posts\", "*.xml") | |
|> Seq.map (fun f -> File.ReadAllText(f)) | |
|> Seq.map XmlSerialization.deserialize<MetaWeblog.Post> | |
|> Seq.filter (fun p -> Regex.IsMatch(p.Content, regex)) | |
|> Seq.iter updatePost | |
let updateRemotePostsAsync() = | |
let updateFromFileAsync file = | |
async { | |
let proxy = MetaWeblog.createProxy() | |
do proxy.Url <- apiUrl | |
let post = File.ReadAllText(file) |> XmlSerialization.deserialize<MetaWeblog.Post> xml | |
let! result = proxy.UpdatePostAsync(post.PostID.ToString(), userName, password, post, true) | |
File.Delete(file) | |
printfn "post %i updated" post.PostID | |
} | |
Directory.GetFiles(@"updated\", "*.xml") | |
|> Seq.map updateFromFileAsync | |
|> Async.Parallel | |
|> Async.Ignore |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#light | |
module Main | |
open System | |
[<EntryPoint>] | |
let main args = | |
// Blogging.downloadPostIdsAsync (new DateTime(2006, 9, 1)) (new DateTime(2009, 12, 1)) | |
// |> Async.RunSynchronously | |
// Blogging.downloadPostsAsync() |> Async.RunSynchronously | |
// Blogging.updateLocalPosts() | |
Blogging.updateRemotePostsAsync() |> Async.RunSynchronously | |
printfn "Finished" | |
Console.ReadLine() |> ignore | |
0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#light | |
module MetaWeblog | |
open System | |
open CookComputing.XmlRpc | |
[<Serializable>] | |
type Post() = | |
[<XmlRpcMember("postid")>] | |
[<DefaultValue>] | |
val mutable PostID : int | |
[<XmlRpcMember("dateCreated")>] | |
[<DefaultValue>] | |
val mutable CreateTime : DateTime | |
[<XmlRpcMember("title")>] | |
[<DefaultValue>] | |
val mutable Title : string | |
[<XmlRpcMember("description")>] | |
[<DefaultValue>] | |
val mutable Content : string | |
[<XmlRpcMember("categories")>] | |
[<DefaultValue>] | |
val mutable Categories : string array | |
type IMetaWeblogProxy = | |
inherit IXmlRpcProxy | |
[<XmlRpcMethod("metaWeblog.getPost")>] | |
abstract GetPost : string -> string -> string -> Post | |
[<XmlRpcBegin("metaWeblog.getPost")>] | |
abstract BeginGetPost : string -> string -> string -> AsyncCallback -> obj -> IAsyncResult | |
[<XmlRpcEnd("metaWeblog.getPost")>] | |
abstract EndGetPost : IAsyncResult -> Post | |
[<XmlRpcMethod("metaWeblog.editPost")>] | |
abstract UpdatePost : string -> string -> string -> Post -> bool -> bool | |
[<XmlRpcBegin("metaWeblog.editPost")>] | |
abstract BeginUpdatePost : string -> string -> string -> Post -> bool -> AsyncCallback -> obj -> IAsyncResult | |
[<XmlRpcEnd("metaWeblog.editPost")>] | |
abstract EndUpdatePost : IAsyncResult -> bool | |
let createProxy() = XmlRpcProxyGen.Create<IMetaWeblogProxy>() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#light | |
module XmlSerialization | |
open System | |
open System.IO | |
open System.Xml | |
open System.Xml.Linq | |
open System.Xml.Serialization | |
open System.Runtime.Serialization | |
let serialize (graph : 'a) = | |
let serializer = new DataContractSerializer(typeof<'a>) | |
let textWriter = new StringWriter(); | |
let xmlWriter = new XmlTextWriter(textWriter); | |
serializer.WriteObject(xmlWriter, graph) | |
textWriter.ToString() | |
let deserialize<'a> xml = | |
let serializer = new DataContractSerializer(typeof<'a>) | |
let textReader = new StringReader(xml) | |
let xmlReader = new XmlTextReader(textReader) | |
serializer.ReadObject(xmlReader) :?> 'a |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment