Created
February 23, 2010 15:04
-
-
Save pirj/312275 to your computer and use it in GitHub Desktop.
Agent
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'util' | |
require 'luarocks.require' -- http://www.luarocks.org/ | |
local http = require 'socket.http' -- http://www.tecgraf.puc-rio.br/~diego/professional/luasocket/ | |
local bot_container = 'http://dozorni.heroku.com/whatnow' | |
local body, status = http.request(bot_container, '') | |
local json = require('json') -- http://luaforge.net/projects/luajson/ | |
local dec = json.decode(body) | |
local url = 'http://s5.travian.ru' | |
local htmlf = http.request(url..dec[2]) | |
local html = require 'lib/html' -- http://luaforge.net/projects/html/ | |
local parsed_html = html.parsestr(htmlf) | |
local xml = to_html(parsed_html[1]) | |
local lom = require 'lxp.lom' -- http://www.keplerproject.org/luaexpat/ | |
local parsed = lom.parse(xml) | |
local xpath = require 'lib/xpath' -- http://luaxpath.luaforge.net/ | |
local found = xpath.selectNodes(parsed, "//form//input[@name='login']") | |
print(to_string(found)) | |
print(#found) | |
print(found[1].tag) | |
print(found[1].attr.value) | |
print(found[1].attr.name) | |
print(found[1].attr.id) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- $Id: html.lua,v 1.2 2007/05/12 04:37:20 tclua Exp $ | |
module(..., package.seeall) | |
entity = { | |
nbsp = " ", | |
lt = "<", | |
gt = ">", | |
quot = "\"", | |
amp = "&", | |
} | |
-- keep unknown entity as is | |
setmetatable(entity, { | |
__index = function (t, key) | |
return "&" .. key .. ";" | |
end | |
}) | |
block = { | |
"address", | |
"blockquote", | |
"center", | |
"dir", "div", "dl", | |
"fieldset", "form", | |
"h1", "h2", "h3", "h4", "h5", "h6", "hr", | |
"isindex", | |
"menu", | |
"noframes", | |
"ol", | |
"p", | |
"pre", | |
"table", | |
"ul", | |
} | |
inline = { | |
"a", "abbr", "acronym", "applet", | |
"b", "basefont", "bdo", "big", "br", "button", | |
"cite", "code", | |
"dfn", | |
"em", | |
"font", | |
"i", "iframe", "img", "input", | |
"kbd", | |
"label", | |
"map", | |
"object", | |
"q", | |
"s", "samp", "select", "small", "span", "strike", "strong", "sub", "sup", | |
"textarea", "tt", | |
"u", | |
"var", | |
} | |
tags = { | |
a = { empty = false }, | |
abbr = {empty = false} , | |
acronym = {empty = false} , | |
address = {empty = false} , | |
applet = {empty = false} , | |
area = {empty = true} , | |
b = {empty = false} , | |
base = {empty = true} , | |
basefont = {empty = true} , | |
bdo = {empty = false} , | |
big = {empty = false} , | |
blockquote = {empty = false} , | |
body = { empty = false, }, | |
br = {empty = true} , | |
button = {empty = false} , | |
caption = {empty = false} , | |
center = {empty = false} , | |
cite = {empty = false} , | |
code = {empty = false} , | |
col = {empty = true} , | |
colgroup = { | |
empty = false, | |
optional_end = true, | |
child = {"col",}, | |
}, | |
dd = {empty = false} , | |
del = {empty = false} , | |
dfn = {empty = false} , | |
dir = {empty = false} , | |
div = {empty = false} , | |
dl = {empty = false} , | |
dt = { | |
empty = false, | |
optional_end = true, | |
child = { | |
inline, | |
"del", | |
"ins", | |
"noscript", | |
"script", | |
}, | |
}, | |
em = {empty = false} , | |
fieldset = {empty = false} , | |
font = {empty = false} , | |
form = {empty = false} , | |
frame = {empty = true} , | |
frameset = {empty = false} , | |
h1 = {empty = false} , | |
h2 = {empty = false} , | |
h3 = {empty = false} , | |
h4 = {empty = false} , | |
h5 = {empty = false} , | |
h6 = {empty = false} , | |
head = {empty = false} , | |
hr = {empty = true} , | |
html = {empty = false} , | |
i = {empty = false} , | |
iframe = {empty = false} , | |
img = {empty = true} , | |
input = {empty = true} , | |
ins = {empty = false} , | |
isindex = {empty = true} , | |
kbd = {empty = false} , | |
label = {empty = false} , | |
legend = {empty = false} , | |
li = { | |
empty = false, | |
optional_end = true, | |
child = { | |
inline, | |
block, | |
"del", | |
"ins", | |
"noscript", | |
"script", | |
}, | |
}, | |
link = {empty = true} , | |
map = {empty = false} , | |
menu = {empty = false} , | |
meta = {empty = true} , | |
noframes = {empty = false} , | |
noscript = {empty = false} , | |
object = {empty = false} , | |
ol = {empty = false} , | |
optgroup = {empty = false} , | |
option = { | |
empty = false, | |
optional_end = true, | |
child = {}, | |
}, | |
p = { | |
empty = false, | |
optional_end = true, | |
child = { | |
inline, | |
"del", | |
"ins", | |
"noscript", | |
"script", | |
}, | |
} , | |
param = {empty = true} , | |
pre = {empty = false} , | |
q = {empty = false} , | |
s = {empty = false} , | |
samp = {empty = false} , | |
script = {empty = false} , | |
select = {empty = false} , | |
small = {empty = false} , | |
span = {empty = false} , | |
strike = {empty = false} , | |
strong = {empty = false} , | |
style = {empty = false} , | |
sub = {empty = false} , | |
sup = {empty = false} , | |
table = {empty = false} , | |
tbody = {empty = false} , | |
td = { | |
empty = false, | |
optional_end = true, | |
child = { | |
inline, | |
block, | |
"del", | |
"ins", | |
"noscript", | |
"script", | |
}, | |
}, | |
textarea = {empty = false} , | |
tfoot = { | |
empty = false, | |
optional_end = true, | |
child = {"tr",}, | |
}, | |
th = { | |
empty = false, | |
optional_end = true, | |
child = { | |
inline, | |
block, | |
"del", | |
"ins", | |
"noscript", | |
"script", | |
}, | |
}, | |
thead = { | |
empty = false, | |
optional_end = true, | |
child = {"tr",}, | |
}, | |
title = {empty = false} , | |
tr = { | |
empty = false, | |
optional_end = true, | |
child = { | |
"td", "th", | |
}, | |
}, | |
tt = {empty = false} , | |
u = {empty = false} , | |
ul = {empty = false} , | |
var = {empty = false} , | |
} | |
setmetatable(tags, { | |
__index = function (t, key) | |
return {empty = false} | |
end | |
}) | |
-- string buffer implementation | |
function newbuf () | |
local buf = { | |
_buf = {}, | |
clear = function (self) self._buf = {}; return self end, | |
content = function (self) return table.concat(self._buf) end, | |
append = function (self, s) | |
self._buf[#(self._buf) + 1] = s | |
return self | |
end, | |
set = function (self, s) self._buf = {s}; return self end, | |
} | |
return buf | |
end | |
-- unescape character entities | |
function unescape (s) | |
function entity2string (e) | |
return entity[e] | |
end | |
return s.gsub(s, "&(#?%w+);", entity2string) | |
end | |
-- iterator factory | |
function makeiter (f) | |
local co = coroutine.create(f) | |
return function () | |
local code, res = coroutine.resume(co) | |
return res | |
end | |
end | |
-- constructors for token | |
function Tag (s) | |
return string.find(s, "^</") and | |
{type = "End", value = s} or | |
{type = "Start", value = s} | |
end | |
function Text (s) | |
local unescaped = unescape(s) | |
return {type = "Text", value = unescaped} | |
end | |
-- lexer: text mode | |
function text (f, buf) | |
local c = f:read(1) | |
if c == "<" then | |
if buf:content() ~= "" then coroutine.yield(Text(buf:content())) end | |
buf:set(c) | |
return tag(f, buf) | |
elseif c then | |
buf:append(c) | |
return text(f, buf) | |
else | |
if buf:content() ~= "" then coroutine.yield(Text(buf:content())) end | |
end | |
end | |
-- lexer: tag mode | |
function tag (f, buf) | |
local c = f:read(1) | |
if c == ">" then | |
coroutine.yield(Tag(buf:append(c):content())) | |
buf:clear() | |
return text(f, buf) | |
elseif c then | |
buf:append(c) | |
return tag(f, buf) | |
else | |
if buf:content() ~= "" then coroutine.yield(Tag(buf:content())) end | |
end | |
end | |
function parse_starttag(tag) | |
local tagname = string.match(tag, "<%s*(%w+)") | |
local elem = {_attr = {}} | |
elem._tag = tagname | |
for key, _, val in string.gmatch(tag, "(%w+)%s*=%s*([\"'])(.-)%2") do | |
local unescaped = unescape(val) | |
elem._attr[key] = unescaped | |
end | |
return elem | |
end | |
function parse_endtag(tag) | |
local tagname = string.match(tag, "<%s*/%s*(%w+)") | |
return tagname | |
end | |
-- find last element that satisfies given predicate | |
function rfind(t, pred) | |
local length = #t | |
for i=length,1,-1 do | |
if pred(t[i]) then | |
return i, t[i] | |
end | |
end | |
end | |
function flatten(t, acc) | |
acc = acc or {} | |
for i,v in ipairs(t) do | |
if type(v) == "table" then | |
flatten(v, acc) | |
else | |
acc[#acc + 1] = v | |
end | |
end | |
return acc | |
end | |
function optional_end_p(elem) | |
if tags[elem._tag].optional_end then | |
return true | |
else | |
return false | |
end | |
end | |
function valid_child_p(child, parent) | |
local schema = tags[parent._tag].child | |
if not schema then return true end | |
for i,v in ipairs(flatten(schema)) do | |
if v == child._tag then | |
return true | |
end | |
end | |
return false | |
end | |
-- tree builder | |
function parse(f) | |
local root = {_tag = "#document", _attr = {}} | |
local stack = {root} | |
for i in makeiter(function () return text(f, newbuf()) end) do | |
if i.type == "Start" then | |
local new = parse_starttag(i.value) | |
local top = stack[#stack] | |
while | |
top._tag ~= "#document" and | |
optional_end_p(top) and | |
not valid_child_p(new, top) | |
do | |
stack[#stack] = nil | |
top = stack[#stack] | |
end | |
top[#top+1] = new -- appendchild | |
if not tags[new._tag].empty then | |
stack[#stack+1] = new -- push | |
end | |
elseif i.type == "End" then | |
local tag = parse_endtag(i.value) | |
local openingpos = rfind(stack, function(v) | |
if v._tag == tag then | |
return true | |
else | |
return false | |
end | |
end) | |
if openingpos then | |
local length = #stack | |
for j=length,openingpos,-1 do | |
table.remove(stack, j) | |
end | |
end | |
else -- Text | |
local top = stack[#stack] | |
top[#top+1] = i.value | |
end | |
end | |
return root | |
end | |
function parsestr(s) | |
local handle = { | |
_content = s, | |
_pos = 1, | |
read = function (self, length) | |
if self._pos > string.len(self._content) then return end | |
local ret = string.sub(self._content, self._pos, self._pos + length - 1) | |
self._pos = self._pos + length | |
return ret | |
end | |
} | |
return parse(handle) | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function trim(s) | |
local n = s:find"%S" | |
return n and s:match(".*%S", n) or "" | |
end | |
function to_html(h, indent) | |
indent = indent or '' | |
local res = '' | |
if h._tag then | |
res = res..indent..'<'..h._tag..attrs(h._attr)..'>\n' | |
end | |
if type(h) == 'table' then | |
for i,v in ipairs(h) do | |
res = res..to_html(v, h._tag and indent..' ' or indent) | |
end | |
elseif type(h) == 'string' then | |
local s = trim(h) | |
if #s > 0 then | |
-- res = res..' ![CDATA['..s..']]' | |
res = res..' '..s..'' | |
end | |
end | |
if h._tag then | |
res = res..indent..'</'..h._tag..'>\n' | |
end | |
return res | |
end | |
function attrs(attrl) | |
if attrl == nil then return '' end | |
local res = '' | |
for key, value in pairs(attrl) do | |
res = res..' '..key..'="'..encode(value)..'"' | |
end | |
return res | |
end | |
function encode(str) | |
if "string" ~= type(str) then str = tostring(str) end | |
local repl = {["<"] = "<", [">"] = ">", ["\""] = """, ["&"] = "&", ["'"] = "'"} | |
return (string.gsub(str, "[<>\"&']", repl)) | |
end | |
-- debug | |
require 'io' | |
-- saves a Lua var as xml file | |
function save(xml, filename) | |
if not filename or #filename==0 then return end | |
local file = io.open(filename,"w") | |
file:write("<?xml version=\"1.0\"?>\n<!-- file \"",filename, "\", generated by LuaXML -->\n\n") | |
file:write(xml) | |
io.close(file) | |
end | |
function table_print (tt, indent, done) | |
done = done or {} | |
indent = indent or 0 | |
if type(tt) == "table" then | |
local sb = {} | |
for key, value in pairs (tt) do | |
if type (value) == "table" and not done [value] then | |
table.insert(sb, string.rep (" ", indent)) -- indent it | |
done [value] = true | |
table.insert(sb, "{\n"); | |
table.insert(sb, table_print (value, indent + 2, done)) | |
table.insert(sb, string.rep (" ", indent)) -- indent it | |
table.insert(sb, "},\n"); | |
elseif "number" == type(key) then | |
-- table.insert(sb, string.rep (" ", indent)) -- indent it | |
-- table.insert(sb, string.format("\"%s\"\n", tostring(value))) | |
else | |
table.insert(sb, string.rep (" ", indent)) -- indent it | |
table.insert(sb, string.format( | |
"%s = \"%s\",\n", tostring (key), tostring(value))) | |
end | |
end | |
return table.concat(sb) | |
else | |
return tt .. "\n" | |
end | |
end | |
function to_string( tbl ) | |
if "nil" == type( tbl ) then | |
return tostring(nil) | |
elseif "table" == type( tbl ) then | |
return table_print(tbl) | |
elseif "string" == type( tbl ) then | |
return tbl | |
else | |
return tostring(tbl) | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
----------------------------------------------------------------------------- | |
-- XPath module based on LuaExpat | |
-- Description: Module that provides xpath capabilities to xmls. | |
-- Author: Gal Dubitski | |
-- Version: 0.1 | |
-- Date: 2008-01-15 | |
----------------------------------------------------------------------------- | |
----------------------------------------------------------------------------- | |
-- Declare module and import dependencies | |
----------------------------------------------------------------------------- | |
module(..., package.seeall) | |
local resultTable,option = {},nil | |
----------------------------------------------------------------------------- | |
-- Supported functions | |
----------------------------------------------------------------------------- | |
local function insertToTable(leaf) | |
if type(leaf) == "table" then | |
if option == nil then | |
table.insert(resultTable,leaf) | |
elseif option == "text()" then | |
table.insert(resultTable,leaf[1]) | |
elseif option == "node()" then | |
table.insert(resultTable,leaf.tag) | |
elseif option:find("@") == 1 then | |
table.insert(resultTable,leaf.attr[option:sub(2)]) | |
end | |
end | |
end | |
local function match(tag,tagAttr,tagExpr,nextTag) | |
local expression,evalTag | |
-- check if its a wild card | |
if tagExpr == "*" then | |
return true | |
end | |
-- check if its empty | |
if tagExpr == "" then | |
if tag == nextTag then | |
return false,1 | |
else | |
return false,0 | |
end | |
end | |
-- check if there is an expression to evaluate | |
if tagExpr:find("[[]") ~= nil and tagExpr:find("[]]") ~= nil then | |
evalTag = tagExpr:sub(1,tagExpr:find("[[]")-1) | |
expression = tagExpr:sub(tagExpr:find("[[]")+1,tagExpr:find("[]]")-1) | |
if evalTag ~= tag then | |
return false | |
end | |
else | |
return (tag == tagExpr) | |
end | |
-- check if the expression is an attribute | |
if expression:find("@") ~= nil then | |
local evalAttr,evalValue | |
evalAttr = expression:sub(expression:find("[@]")+1,expression:find("[=]")-1) | |
evalValue = string.gsub(expression:sub(expression:find("[=]")+1),"'","") | |
evalValue = evalValue:gsub("\"","") | |
if tagAttr[evalAttr] ~= evalValue then | |
return false | |
else | |
return true | |
end | |
end | |
end | |
local function parseNodes(tags,xmlTable,counter) | |
if counter > #tags then | |
return nil | |
end | |
local currentTag = tags[counter] | |
local nextTag | |
if #tags > counter then | |
nextTag = tags[counter+1] | |
end | |
for i,value in ipairs(xmlTable) do | |
if type(value) == "table" then | |
if value.tag ~= nil and value.attr ~= nil then | |
local x,y = match(value.tag,value.attr,currentTag,nextTag) | |
if x then | |
if #tags == counter then | |
insertToTable(value) | |
else | |
parseNodes(tags,value,counter+1) | |
end | |
else | |
if y ~= nil then | |
if y == 1 then | |
if counter+1 == #tags then | |
insertToTable(value) | |
else | |
parseNodes(tags,value,counter+2) | |
end | |
else | |
parseNodes(tags,value,counter) | |
end | |
end | |
end | |
end | |
end | |
end | |
end | |
function selectNodes(xml,xpath) | |
assert(type(xml) == "table") | |
assert(type(xpath) == "string") | |
resultTable = {} | |
local xmlTree = {} | |
table.insert(xmlTree,xml) | |
assert(type(xpath) == "string") | |
tags = split(xpath,'[\\/]+') | |
local lastTag = tags[#tags] | |
if lastTag == "text()" or lastTag == "node()" or lastTag:find("@") == 1 then | |
option = tags[#tags] | |
table.remove(tags,#tags) | |
else | |
option = nil | |
end | |
if xpath:find("//") == 1 then | |
table.insert(tags,1,"") | |
end | |
parseNodes(tags,xmlTree,1) | |
return resultTable | |
end | |
function split(str, pat) | |
local t = {} -- NOTE: use {n = 0} in Lua-5.0 | |
local fpat = "(.-)" .. pat | |
local last_end = 1 | |
local s, e, cap = str:find(fpat, 1) | |
while s do | |
if s ~= 1 or cap ~= "" then | |
table.insert(t,cap) | |
end | |
last_end = e+1 | |
s, e, cap = str:find(fpat, last_end) | |
end | |
if last_end <= #str then | |
cap = str:sub(last_end) | |
table.insert(t, cap) | |
end | |
return t | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment