greenido/DartCrawlerClassExample.dart

## DartCrawlerClassExample.dart
#import('dart:io');
#import('dart:uri');
#import('dart:json');

// Dart Hackathon TLV 2012
//
// A simple example to fetch RSS/JSON feed and parse it on the server side
// This is a good start for a crawler that fetch info and parse it.
//
// Author: Ido Green | greenido.wordpress.com
// Date: 28/4/2012
//
class Crawler {

  String _urlToFetch   = "http://feeds.feedburner.com/html5rocks";
  String _dataFileName = "webPageData.json";
  HttpClient _client;
  var rssItems;

  //Ctor.
  Crawler() {
    _client = new HttpClient();
  }

  // Fetch the page and save the data locally
  // in a file so we could process it later
  fetchWebPage() {
    // Get all the updates of h5r
    Uri pipeUrl = new Uri.fromString(_urlToFetch);

    // open a GET connection to fetch this data
    var conn = _client.getUrl(pipeUrl);

    conn.onRequest = (HttpClientRequest request) {
     request.outputStream.close();
    };

    conn.onResponse = (HttpClientResponse response) {
     print("status code:" + response.statusCode);

     var output = new File(_dataFileName).openOutputStream();
     response.inputStream.pipe(output);

     // In case you want to print the data to your console:
     // response.inputStream.pipe(stdout);

    };
  }

  // Read a file and return its content.
  readFile() {

    File file = new File(_dataFileName);
    if (!file.existsSync()) {
      print ("Err: Could not find: " + _dataFileName);
      return;
    }

    InputStream file_stream = file.openInputStream();
    StringInputStream lines = new StringInputStream(file_stream);
    String data = "";
    lines.onLine = () {
      String line;
      while ((line = lines.readLine()) != null) {
        //print ("== "+line);
        data += line;
      }
    };
    lines.onClosed = () {
      print ("Got to the end of: "+_dataFileName);
      print ("This is our file content:\n" + data);
      parsePage(data);
    };
  }

  //
  // Basic (real basic) parsing
  //
  parsePage(data) {
    // cut the intersting part of the feed
    int start = data.indexOf("<title>");
    int end   = data.lastIndexOf("</channel>");
    var feed  = data.substring(start, end);

    // put the items in an array
    rssItems = feed.split("<title>");
    for (var item in rssItems) {
      print("\n** Item: " +item);
    }
  }

} // End of class


//
// Start the party
//
void main() {
  Crawler crawler = new Crawler();
  crawler.fetchWebPage();
  crawler.readFile();
}
	#import('dart:io');
	#import('dart:uri');
	#import('dart:json');

	// Dart Hackathon TLV 2012
	//
	// A simple example to fetch RSS/JSON feed and parse it on the server side
	// This is a good start for a crawler that fetch info and parse it.
	//
	// Author: Ido Green \| greenido.wordpress.com
	// Date: 28/4/2012
	//
	class Crawler {

	String _urlToFetch = "http://feeds.feedburner.com/html5rocks";
	String _dataFileName = "webPageData.json";
	HttpClient _client;
	var rssItems;

	//Ctor.
	Crawler() {
	_client = new HttpClient();
	}

	// Fetch the page and save the data locally
	// in a file so we could process it later
	fetchWebPage() {
	// Get all the updates of h5r
	Uri pipeUrl = new Uri.fromString(_urlToFetch);

	// open a GET connection to fetch this data
	var conn = _client.getUrl(pipeUrl);

	conn.onRequest = (HttpClientRequest request) {
	request.outputStream.close();
	};

	conn.onResponse = (HttpClientResponse response) {
	print("status code:" + response.statusCode);

	var output = new File(_dataFileName).openOutputStream();
	response.inputStream.pipe(output);

	// In case you want to print the data to your console:
	// response.inputStream.pipe(stdout);

	};
	}

	// Read a file and return its content.
	readFile() {

	File file = new File(_dataFileName);
	if (!file.existsSync()) {
	print ("Err: Could not find: " + _dataFileName);
	return;
	}

	InputStream file_stream = file.openInputStream();
	StringInputStream lines = new StringInputStream(file_stream);
	String data = "";
	lines.onLine = () {
	String line;
	while ((line = lines.readLine()) != null) {
	//print ("== "+line);
	data += line;
	}
	};
	lines.onClosed = () {
	print ("Got to the end of: "+_dataFileName);
	print ("This is our file content:\n" + data);
	parsePage(data);
	};
	}

	//
	// Basic (real basic) parsing
	//
	parsePage(data) {
	// cut the intersting part of the feed
	int start = data.indexOf("<title>");
	int end = data.lastIndexOf("</channel>");
	var feed = data.substring(start, end);

	// put the items in an array
	rssItems = feed.split("<title>");
	for (var item in rssItems) {
	print("\n** Item: " +item);
	}
	}

	} // End of class


	//
	// Start the party
	//
	void main() {
	Crawler crawler = new Crawler();
	crawler.fetchWebPage();
	crawler.readFile();
	}