Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Grabbing a site in C# using phantomJS
var page = require('webpage').create(),
system = require('system');
page.onLoadFinished = function() {
console.log(page.content);
phantom.exit();
};
page.open(system.args[1]);
class Program
{
static void Main(string[] args)
{
var grabby = new Grabby();
string output = grabby.Grab("http://www.dotnetnerd.dk/cv");
Console.WriteLine(output);
File.WriteAllText("c:\\test.html", output);
}
}
public class Grabby
{
public string Grab(string url)
{
var process = new System.Diagnostics.Process();
var startInfo = new System.Diagnostics.ProcessStartInfo
{
WindowStyle = System.Diagnostics.ProcessWindowStyle.Hidden,
UseShellExecute = false,
RedirectStandardOutput = true,
FileName = Config.PhantomJSPath,
Arguments = string.Format("\"{0}\\{1}\" {2}", Directory.GetParent(Directory.GetCurrentDirectory()).Parent.FullName, "index.js", url)
};
process.StartInfo = startInfo;
process.Start();
string output = process.StandardOutput.ReadToEnd();
process.WaitForExit();
return output;
}
}
@porkopek

This comment has been minimized.

Copy link

commented Jun 30, 2016

Although default encoding in PhantomJS is utf-8, I needed to put
process.StartInfo.StandardOutputEncoding = Encoding.UTF8;
before
process.Start();
to get the proper characters encoded

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.