Skip to content

Instantly share code, notes, and snippets.

@greenido
Created May 2, 2012 11:20
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save greenido/2575918 to your computer and use it in GitHub Desktop.
Save greenido/2575918 to your computer and use it in GitHub Desktop.
Dart Crawler class - A server application example
#import('dart:io');
#import('dart:uri');
#import('dart:json');
// Dart Hackathon TLV 2012
//
// A simple example to fetch RSS/JSON feed and parse it on the server side
// This is a good start for a crawler that fetch info and parse it.
//
// Author: Ido Green | greenido.wordpress.com
// Date: 28/4/2012
//
class Crawler {
String _urlToFetch = "http://feeds.feedburner.com/html5rocks";
String _dataFileName = "webPageData.json";
HttpClient _client;
var rssItems;
//Ctor.
Crawler() {
_client = new HttpClient();
}
// Fetch the page and save the data locally
// in a file so we could process it later
fetchWebPage() {
// Get all the updates of h5r
Uri pipeUrl = new Uri.fromString(_urlToFetch);
// open a GET connection to fetch this data
var conn = _client.getUrl(pipeUrl);
conn.onRequest = (HttpClientRequest request) {
request.outputStream.close();
};
conn.onResponse = (HttpClientResponse response) {
print("status code:" + response.statusCode);
var output = new File(_dataFileName).openOutputStream();
response.inputStream.pipe(output);
// In case you want to print the data to your console:
// response.inputStream.pipe(stdout);
};
}
// Read a file and return its content.
readFile() {
File file = new File(_dataFileName);
if (!file.existsSync()) {
print ("Err: Could not find: " + _dataFileName);
return;
}
InputStream file_stream = file.openInputStream();
StringInputStream lines = new StringInputStream(file_stream);
String data = "";
lines.onLine = () {
String line;
while ((line = lines.readLine()) != null) {
//print ("== "+line);
data += line;
}
};
lines.onClosed = () {
print ("Got to the end of: "+_dataFileName);
print ("This is our file content:\n" + data);
parsePage(data);
};
}
//
// Basic (real basic) parsing
//
parsePage(data) {
// cut the intersting part of the feed
int start = data.indexOf("<title>");
int end = data.lastIndexOf("</channel>");
var feed = data.substring(start, end);
// put the items in an array
rssItems = feed.split("<title>");
for (var item in rssItems) {
print("\n** Item: " +item);
}
}
} // End of class
//
// Start the party
//
void main() {
Crawler crawler = new Crawler();
crawler.fetchWebPage();
crawler.readFile();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment