Created
May 2, 2012 11:20
-
-
Save greenido/2575918 to your computer and use it in GitHub Desktop.
Dart Crawler class - A server application example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import('dart:io'); | |
#import('dart:uri'); | |
#import('dart:json'); | |
// Dart Hackathon TLV 2012 | |
// | |
// A simple example to fetch RSS/JSON feed and parse it on the server side | |
// This is a good start for a crawler that fetch info and parse it. | |
// | |
// Author: Ido Green | greenido.wordpress.com | |
// Date: 28/4/2012 | |
// | |
class Crawler { | |
String _urlToFetch = "http://feeds.feedburner.com/html5rocks"; | |
String _dataFileName = "webPageData.json"; | |
HttpClient _client; | |
var rssItems; | |
//Ctor. | |
Crawler() { | |
_client = new HttpClient(); | |
} | |
// Fetch the page and save the data locally | |
// in a file so we could process it later | |
fetchWebPage() { | |
// Get all the updates of h5r | |
Uri pipeUrl = new Uri.fromString(_urlToFetch); | |
// open a GET connection to fetch this data | |
var conn = _client.getUrl(pipeUrl); | |
conn.onRequest = (HttpClientRequest request) { | |
request.outputStream.close(); | |
}; | |
conn.onResponse = (HttpClientResponse response) { | |
print("status code:" + response.statusCode); | |
var output = new File(_dataFileName).openOutputStream(); | |
response.inputStream.pipe(output); | |
// In case you want to print the data to your console: | |
// response.inputStream.pipe(stdout); | |
}; | |
} | |
// Read a file and return its content. | |
readFile() { | |
File file = new File(_dataFileName); | |
if (!file.existsSync()) { | |
print ("Err: Could not find: " + _dataFileName); | |
return; | |
} | |
InputStream file_stream = file.openInputStream(); | |
StringInputStream lines = new StringInputStream(file_stream); | |
String data = ""; | |
lines.onLine = () { | |
String line; | |
while ((line = lines.readLine()) != null) { | |
//print ("== "+line); | |
data += line; | |
} | |
}; | |
lines.onClosed = () { | |
print ("Got to the end of: "+_dataFileName); | |
print ("This is our file content:\n" + data); | |
parsePage(data); | |
}; | |
} | |
// | |
// Basic (real basic) parsing | |
// | |
parsePage(data) { | |
// cut the intersting part of the feed | |
int start = data.indexOf("<title>"); | |
int end = data.lastIndexOf("</channel>"); | |
var feed = data.substring(start, end); | |
// put the items in an array | |
rssItems = feed.split("<title>"); | |
for (var item in rssItems) { | |
print("\n** Item: " +item); | |
} | |
} | |
} // End of class | |
// | |
// Start the party | |
// | |
void main() { | |
Crawler crawler = new Crawler(); | |
crawler.fetchWebPage(); | |
crawler.readFile(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment