Skip to content

Instantly share code, notes, and snippets.

@JeffreyZhao
Created January 5, 2010 10:39
Show Gist options
  • Save JeffreyZhao/269308 to your computer and use it in GitHub Desktop.
Save JeffreyZhao/269308 to your computer and use it in GitHub Desktop.
#light
module Blogging
open System
open System.IO
open System.Net
open System.Text.RegularExpressions
type MetaWeblog.IMetaWeblogProxy with
member p.GetPostAsync(postId, userName, password) =
let beginGet (ac, o) = p.BeginGetPost postId userName password ac o
Async.FromBeginEnd(beginGet, p.EndGetPost)
member p.UpdatePostAsync(postId, userName, password, post, publish) =
let beginUpdate (ac, o) = p.BeginUpdatePost postId userName password post publish ac o
Async.FromBeginEnd(beginUpdate, p.EndUpdatePost)
type WebClient with
member c.GetStringAsync(url) =
async {
let completeEvent = c.DownloadStringCompleted
do c.DownloadStringAsync(new Uri(url))
let! args = Async.AwaitEvent(completeEvent)
return args.Result
}
let downloadPostIdsAsync (beginMonth : DateTime) (endMonth : DateTime) =
let downloadPostIdsAsync' (m : DateTime) =
async {
let webClient = new WebClient()
let url = sprintf "http://www.cnblogs.com/JeffreyZhao/archive/%i/%i.html" m.Year m.Month
let! html = webClient.GetStringAsync(url)
printfn "%i/%i downloaded" m.Year m.Month
let regex = @"EditPosts\.aspx\?postid=(\d+)"
return [ for m in Regex.Matches(html, regex) -> m.Groups.Item(1).Value |> Int32.Parse ]
}
async {
let! lists =
Seq.initInfinite (fun i -> beginMonth.AddMonths(i))
|> Seq.takeWhile (fun m -> m <= endMonth)
|> Seq.map downloadPostIdsAsync'
|> Async.Parallel
lists
|> List.concat
|> List.sort
|> List.map (fun i -> i.ToString())
|> fun lines -> File.WriteAllLines("postIds.txt", lines)
}
let apiUrl = "http://www.cnblogs.com/JeffreyZhao/services/metaweblog.aspx"
let userName, password = "JeffreyZhao", "..."
let downloadPostsAsync() =
let downloadPostAsync id =
async {
let proxy = MetaWeblog.createProxy()
do proxy.Url <- apiUrl
let! post = proxy.GetPostAsync(id, userName, password)
let file = sprintf @"posts\%i.xml" post.PostID
let xml = XmlSerialization.serialize post
File.WriteAllText(file, xml);
printfn "post %i downloaded" post.PostID
}
File.ReadAllLines("postIds.txt")
//|> Seq.take 10
|> Seq.map downloadPostAsync
|> Async.Parallel
|> Async.Ignore
let updateLocalPosts() =
let regex = @"(?i)(<p\b[^>]*>)  "
let target = "$1"
let updatePost (post : MetaWeblog.Post) =
post.Content <- Regex.Replace(post.Content, regex, target)
let file = sprintf @"updated\%i.xml" post.PostID
let xml = XmlSerialization.serialize post
File.WriteAllText(file, xml);
Directory.GetFiles(@"posts\", "*.xml")
|> Seq.map (fun f -> File.ReadAllText(f))
|> Seq.map XmlSerialization.deserialize<MetaWeblog.Post>
|> Seq.filter (fun p -> Regex.IsMatch(p.Content, regex))
|> Seq.iter updatePost
let updateRemotePostsAsync() =
let updateFromFileAsync file =
async {
let proxy = MetaWeblog.createProxy()
do proxy.Url <- apiUrl
let post = File.ReadAllText(file) |> XmlSerialization.deserialize<MetaWeblog.Post> xml
let! result = proxy.UpdatePostAsync(post.PostID.ToString(), userName, password, post, true)
File.Delete(file)
printfn "post %i updated" post.PostID
}
Directory.GetFiles(@"updated\", "*.xml")
|> Seq.map updateFromFileAsync
|> Async.Parallel
|> Async.Ignore
#light
module Main
open System
[<EntryPoint>]
let main args =
// Blogging.downloadPostIdsAsync (new DateTime(2006, 9, 1)) (new DateTime(2009, 12, 1))
// |> Async.RunSynchronously
// Blogging.downloadPostsAsync() |> Async.RunSynchronously
// Blogging.updateLocalPosts()
Blogging.updateRemotePostsAsync() |> Async.RunSynchronously
printfn "Finished"
Console.ReadLine() |> ignore
0
#light
module MetaWeblog
open System
open CookComputing.XmlRpc
[<Serializable>]
type Post() =
[<XmlRpcMember("postid")>]
[<DefaultValue>]
val mutable PostID : int
[<XmlRpcMember("dateCreated")>]
[<DefaultValue>]
val mutable CreateTime : DateTime
[<XmlRpcMember("title")>]
[<DefaultValue>]
val mutable Title : string
[<XmlRpcMember("description")>]
[<DefaultValue>]
val mutable Content : string
[<XmlRpcMember("categories")>]
[<DefaultValue>]
val mutable Categories : string array
type IMetaWeblogProxy =
inherit IXmlRpcProxy
[<XmlRpcMethod("metaWeblog.getPost")>]
abstract GetPost : string -> string -> string -> Post
[<XmlRpcBegin("metaWeblog.getPost")>]
abstract BeginGetPost : string -> string -> string -> AsyncCallback -> obj -> IAsyncResult
[<XmlRpcEnd("metaWeblog.getPost")>]
abstract EndGetPost : IAsyncResult -> Post
[<XmlRpcMethod("metaWeblog.editPost")>]
abstract UpdatePost : string -> string -> string -> Post -> bool -> bool
[<XmlRpcBegin("metaWeblog.editPost")>]
abstract BeginUpdatePost : string -> string -> string -> Post -> bool -> AsyncCallback -> obj -> IAsyncResult
[<XmlRpcEnd("metaWeblog.editPost")>]
abstract EndUpdatePost : IAsyncResult -> bool
let createProxy() = XmlRpcProxyGen.Create<IMetaWeblogProxy>()
#light
module XmlSerialization
open System
open System.IO
open System.Xml
open System.Xml.Linq
open System.Xml.Serialization
open System.Runtime.Serialization
let serialize (graph : 'a) =
let serializer = new DataContractSerializer(typeof<'a>)
let textWriter = new StringWriter();
let xmlWriter = new XmlTextWriter(textWriter);
serializer.WriteObject(xmlWriter, graph)
textWriter.ToString()
let deserialize<'a> xml =
let serializer = new DataContractSerializer(typeof<'a>)
let textReader = new StringReader(xml)
let xmlReader = new XmlTextReader(textReader)
serializer.ReadObject(xmlReader) :?> 'a
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment