Skip to content

Instantly share code, notes, and snippets.

@atuttle
Created February 24, 2011 04:19
Show Gist options
  • Save atuttle/841743 to your computer and use it in GitHub Desktop.
Save atuttle/841743 to your computer and use it in GitHub Desktop.
Parses the HTML export from Delicious.com and returns an array of structures representing the links from delicious.
component {
public function init() output="false" {
return this;
}
public function parse(data) output="false" {
local.links = [];
//loop over each line in the file
local.lines = listToArray(data, chr(10));
local.newLink = {};
for (local.i = 1; local.i lte arrayLen(local.lines); local.i++){
local.first4 = ucase(left(local.lines[i], 4));
//ignore unimportant lines
if ((local.first4 neq "<DL>") and (local.first4 neq "<DT>") and (local.first4 neq "<DD>")) {
continue;
}
if (local.first4 eq "<DL>" or local.first4 eq "<DT>"){
//first commit previous link to array
if (!structIsEmpty(local.newLink)){
arrayAppend(local.links, duplicate(local.newLink));
//then create new link entry
local.newLink = {};
}
local.linkHtml = right(local.lines[i], len(local.lines[i])-4);
local.parts = {
url = reFindNoSuck("href=\""([^\""]+)\""",local.linkHtml,1),
tags = reFindNoSuck("tags=\""([^\""]+)\""", local.linkHtml, 1),
name = reFindNoSuck(">([^<]+)<", local.linkHtml, 1),
private = reFindNoSuck("private=\""([^\""]+)\""", local.linkHtml, 1)
};
//get the URL
if (arrayLen(local.parts.url) gte 2){
local.newLink.url = local.parts.url[2];
}
//get the tags
if (arrayLen(local.parts.tags) gte 2){
local.newLink.tags = listToArray(local.parts.tags[2]);
}
//get the name
if (arrayLen(local.parts.name) gte 2){
local.newLink.name = local.parts.name[2];
}
//get the private flag
if (arrayLen(local.parts.private) gte 2){
local.newLink.private = local.parts.private[2];
}
//don't commit it yet because the next line *might* add a description
}else if (local.first4 eq "<DD>"){
local.newLink.description = right(local.lines[i], len(local.lines[i])-4);
}
}
//finally, add the last link that we've parsed to the array
if (!structIsEmpty(local.newLink)){
arrayAppend(local.links, duplicate(local.newLink));
}
return local.links;
}
//===============================
private function reFindNoSuck(pattern, data, startPos = 1) output="false" {
var local = StructNew();
local.awesome = arrayNew(1);
local.sucky = refindNoCase(arguments.pattern, arguments.data, arguments.startPos, true);
if (not isArray(local.sucky.len) or arrayLen(local.sucky.len) eq 0){return arrayNew(1);} //handle no match at all
for (local.i=1; local.i<= arrayLen(local.sucky.len); local.i++){
//if there's a match with pos 0 & length 0, that means the mime type was not specified
if (local.sucky.len[local.i] gt 0 && local.sucky.pos[local.i] gt 0){
//don't include the group that matches the entire pattern
local.matchBody = mid(arguments.data, local.sucky.pos[local.i], local.sucky.len[local.i]);
if (local.matchBody neq arguments.data){
arrayAppend( local.awesome, local.matchBody );
}
}
}
return local.awesome;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment