Skip to content

Instantly share code, notes, and snippets.

@pirj
Created February 23, 2010 15:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pirj/312275 to your computer and use it in GitHub Desktop.
Save pirj/312275 to your computer and use it in GitHub Desktop.
Agent
require 'util'
require 'luarocks.require' -- http://www.luarocks.org/
local http = require 'socket.http' -- http://www.tecgraf.puc-rio.br/~diego/professional/luasocket/
local bot_container = 'http://dozorni.heroku.com/whatnow'
local body, status = http.request(bot_container, '')
local json = require('json') -- http://luaforge.net/projects/luajson/
local dec = json.decode(body)
local url = 'http://s5.travian.ru'
local htmlf = http.request(url..dec[2])
local html = require 'lib/html' -- http://luaforge.net/projects/html/
local parsed_html = html.parsestr(htmlf)
local xml = to_html(parsed_html[1])
local lom = require 'lxp.lom' -- http://www.keplerproject.org/luaexpat/
local parsed = lom.parse(xml)
local xpath = require 'lib/xpath' -- http://luaxpath.luaforge.net/
local found = xpath.selectNodes(parsed, "//form//input[@name='login']")
print(to_string(found))
print(#found)
print(found[1].tag)
print(found[1].attr.value)
print(found[1].attr.name)
print(found[1].attr.id)
-- $Id: html.lua,v 1.2 2007/05/12 04:37:20 tclua Exp $
module(..., package.seeall)
entity = {
nbsp = " ",
lt = "<",
gt = ">",
quot = "\"",
amp = "&",
}
-- keep unknown entity as is
setmetatable(entity, {
__index = function (t, key)
return "&" .. key .. ";"
end
})
block = {
"address",
"blockquote",
"center",
"dir", "div", "dl",
"fieldset", "form",
"h1", "h2", "h3", "h4", "h5", "h6", "hr",
"isindex",
"menu",
"noframes",
"ol",
"p",
"pre",
"table",
"ul",
}
inline = {
"a", "abbr", "acronym", "applet",
"b", "basefont", "bdo", "big", "br", "button",
"cite", "code",
"dfn",
"em",
"font",
"i", "iframe", "img", "input",
"kbd",
"label",
"map",
"object",
"q",
"s", "samp", "select", "small", "span", "strike", "strong", "sub", "sup",
"textarea", "tt",
"u",
"var",
}
tags = {
a = { empty = false },
abbr = {empty = false} ,
acronym = {empty = false} ,
address = {empty = false} ,
applet = {empty = false} ,
area = {empty = true} ,
b = {empty = false} ,
base = {empty = true} ,
basefont = {empty = true} ,
bdo = {empty = false} ,
big = {empty = false} ,
blockquote = {empty = false} ,
body = { empty = false, },
br = {empty = true} ,
button = {empty = false} ,
caption = {empty = false} ,
center = {empty = false} ,
cite = {empty = false} ,
code = {empty = false} ,
col = {empty = true} ,
colgroup = {
empty = false,
optional_end = true,
child = {"col",},
},
dd = {empty = false} ,
del = {empty = false} ,
dfn = {empty = false} ,
dir = {empty = false} ,
div = {empty = false} ,
dl = {empty = false} ,
dt = {
empty = false,
optional_end = true,
child = {
inline,
"del",
"ins",
"noscript",
"script",
},
},
em = {empty = false} ,
fieldset = {empty = false} ,
font = {empty = false} ,
form = {empty = false} ,
frame = {empty = true} ,
frameset = {empty = false} ,
h1 = {empty = false} ,
h2 = {empty = false} ,
h3 = {empty = false} ,
h4 = {empty = false} ,
h5 = {empty = false} ,
h6 = {empty = false} ,
head = {empty = false} ,
hr = {empty = true} ,
html = {empty = false} ,
i = {empty = false} ,
iframe = {empty = false} ,
img = {empty = true} ,
input = {empty = true} ,
ins = {empty = false} ,
isindex = {empty = true} ,
kbd = {empty = false} ,
label = {empty = false} ,
legend = {empty = false} ,
li = {
empty = false,
optional_end = true,
child = {
inline,
block,
"del",
"ins",
"noscript",
"script",
},
},
link = {empty = true} ,
map = {empty = false} ,
menu = {empty = false} ,
meta = {empty = true} ,
noframes = {empty = false} ,
noscript = {empty = false} ,
object = {empty = false} ,
ol = {empty = false} ,
optgroup = {empty = false} ,
option = {
empty = false,
optional_end = true,
child = {},
},
p = {
empty = false,
optional_end = true,
child = {
inline,
"del",
"ins",
"noscript",
"script",
},
} ,
param = {empty = true} ,
pre = {empty = false} ,
q = {empty = false} ,
s = {empty = false} ,
samp = {empty = false} ,
script = {empty = false} ,
select = {empty = false} ,
small = {empty = false} ,
span = {empty = false} ,
strike = {empty = false} ,
strong = {empty = false} ,
style = {empty = false} ,
sub = {empty = false} ,
sup = {empty = false} ,
table = {empty = false} ,
tbody = {empty = false} ,
td = {
empty = false,
optional_end = true,
child = {
inline,
block,
"del",
"ins",
"noscript",
"script",
},
},
textarea = {empty = false} ,
tfoot = {
empty = false,
optional_end = true,
child = {"tr",},
},
th = {
empty = false,
optional_end = true,
child = {
inline,
block,
"del",
"ins",
"noscript",
"script",
},
},
thead = {
empty = false,
optional_end = true,
child = {"tr",},
},
title = {empty = false} ,
tr = {
empty = false,
optional_end = true,
child = {
"td", "th",
},
},
tt = {empty = false} ,
u = {empty = false} ,
ul = {empty = false} ,
var = {empty = false} ,
}
setmetatable(tags, {
__index = function (t, key)
return {empty = false}
end
})
-- string buffer implementation
function newbuf ()
local buf = {
_buf = {},
clear = function (self) self._buf = {}; return self end,
content = function (self) return table.concat(self._buf) end,
append = function (self, s)
self._buf[#(self._buf) + 1] = s
return self
end,
set = function (self, s) self._buf = {s}; return self end,
}
return buf
end
-- unescape character entities
function unescape (s)
function entity2string (e)
return entity[e]
end
return s.gsub(s, "&(#?%w+);", entity2string)
end
-- iterator factory
function makeiter (f)
local co = coroutine.create(f)
return function ()
local code, res = coroutine.resume(co)
return res
end
end
-- constructors for token
function Tag (s)
return string.find(s, "^</") and
{type = "End", value = s} or
{type = "Start", value = s}
end
function Text (s)
local unescaped = unescape(s)
return {type = "Text", value = unescaped}
end
-- lexer: text mode
function text (f, buf)
local c = f:read(1)
if c == "<" then
if buf:content() ~= "" then coroutine.yield(Text(buf:content())) end
buf:set(c)
return tag(f, buf)
elseif c then
buf:append(c)
return text(f, buf)
else
if buf:content() ~= "" then coroutine.yield(Text(buf:content())) end
end
end
-- lexer: tag mode
function tag (f, buf)
local c = f:read(1)
if c == ">" then
coroutine.yield(Tag(buf:append(c):content()))
buf:clear()
return text(f, buf)
elseif c then
buf:append(c)
return tag(f, buf)
else
if buf:content() ~= "" then coroutine.yield(Tag(buf:content())) end
end
end
function parse_starttag(tag)
local tagname = string.match(tag, "<%s*(%w+)")
local elem = {_attr = {}}
elem._tag = tagname
for key, _, val in string.gmatch(tag, "(%w+)%s*=%s*([\"'])(.-)%2") do
local unescaped = unescape(val)
elem._attr[key] = unescaped
end
return elem
end
function parse_endtag(tag)
local tagname = string.match(tag, "<%s*/%s*(%w+)")
return tagname
end
-- find last element that satisfies given predicate
function rfind(t, pred)
local length = #t
for i=length,1,-1 do
if pred(t[i]) then
return i, t[i]
end
end
end
function flatten(t, acc)
acc = acc or {}
for i,v in ipairs(t) do
if type(v) == "table" then
flatten(v, acc)
else
acc[#acc + 1] = v
end
end
return acc
end
function optional_end_p(elem)
if tags[elem._tag].optional_end then
return true
else
return false
end
end
function valid_child_p(child, parent)
local schema = tags[parent._tag].child
if not schema then return true end
for i,v in ipairs(flatten(schema)) do
if v == child._tag then
return true
end
end
return false
end
-- tree builder
function parse(f)
local root = {_tag = "#document", _attr = {}}
local stack = {root}
for i in makeiter(function () return text(f, newbuf()) end) do
if i.type == "Start" then
local new = parse_starttag(i.value)
local top = stack[#stack]
while
top._tag ~= "#document" and
optional_end_p(top) and
not valid_child_p(new, top)
do
stack[#stack] = nil
top = stack[#stack]
end
top[#top+1] = new -- appendchild
if not tags[new._tag].empty then
stack[#stack+1] = new -- push
end
elseif i.type == "End" then
local tag = parse_endtag(i.value)
local openingpos = rfind(stack, function(v)
if v._tag == tag then
return true
else
return false
end
end)
if openingpos then
local length = #stack
for j=length,openingpos,-1 do
table.remove(stack, j)
end
end
else -- Text
local top = stack[#stack]
top[#top+1] = i.value
end
end
return root
end
function parsestr(s)
local handle = {
_content = s,
_pos = 1,
read = function (self, length)
if self._pos > string.len(self._content) then return end
local ret = string.sub(self._content, self._pos, self._pos + length - 1)
self._pos = self._pos + length
return ret
end
}
return parse(handle)
end
function trim(s)
local n = s:find"%S"
return n and s:match(".*%S", n) or ""
end
function to_html(h, indent)
indent = indent or ''
local res = ''
if h._tag then
res = res..indent..'<'..h._tag..attrs(h._attr)..'>\n'
end
if type(h) == 'table' then
for i,v in ipairs(h) do
res = res..to_html(v, h._tag and indent..' ' or indent)
end
elseif type(h) == 'string' then
local s = trim(h)
if #s > 0 then
-- res = res..' ![CDATA['..s..']]'
res = res..' '..s..''
end
end
if h._tag then
res = res..indent..'</'..h._tag..'>\n'
end
return res
end
function attrs(attrl)
if attrl == nil then return '' end
local res = ''
for key, value in pairs(attrl) do
res = res..' '..key..'="'..encode(value)..'"'
end
return res
end
function encode(str)
if "string" ~= type(str) then str = tostring(str) end
local repl = {["<"] = "&lt;", [">"] = "&gt;", ["\""] = "&quot;", ["&"] = "&amp;", ["'"] = "&apos;"}
return (string.gsub(str, "[<>\"&']", repl))
end
-- debug
require 'io'
-- saves a Lua var as xml file
function save(xml, filename)
if not filename or #filename==0 then return end
local file = io.open(filename,"w")
file:write("<?xml version=\"1.0\"?>\n<!-- file \"",filename, "\", generated by LuaXML -->\n\n")
file:write(xml)
io.close(file)
end
function table_print (tt, indent, done)
done = done or {}
indent = indent or 0
if type(tt) == "table" then
local sb = {}
for key, value in pairs (tt) do
if type (value) == "table" and not done [value] then
table.insert(sb, string.rep (" ", indent)) -- indent it
done [value] = true
table.insert(sb, "{\n");
table.insert(sb, table_print (value, indent + 2, done))
table.insert(sb, string.rep (" ", indent)) -- indent it
table.insert(sb, "},\n");
elseif "number" == type(key) then
-- table.insert(sb, string.rep (" ", indent)) -- indent it
-- table.insert(sb, string.format("\"%s\"\n", tostring(value)))
else
table.insert(sb, string.rep (" ", indent)) -- indent it
table.insert(sb, string.format(
"%s = \"%s\",\n", tostring (key), tostring(value)))
end
end
return table.concat(sb)
else
return tt .. "\n"
end
end
function to_string( tbl )
if "nil" == type( tbl ) then
return tostring(nil)
elseif "table" == type( tbl ) then
return table_print(tbl)
elseif "string" == type( tbl ) then
return tbl
else
return tostring(tbl)
end
end
-----------------------------------------------------------------------------
-- XPath module based on LuaExpat
-- Description: Module that provides xpath capabilities to xmls.
-- Author: Gal Dubitski
-- Version: 0.1
-- Date: 2008-01-15
-----------------------------------------------------------------------------
-----------------------------------------------------------------------------
-- Declare module and import dependencies
-----------------------------------------------------------------------------
module(..., package.seeall)
local resultTable,option = {},nil
-----------------------------------------------------------------------------
-- Supported functions
-----------------------------------------------------------------------------
local function insertToTable(leaf)
if type(leaf) == "table" then
if option == nil then
table.insert(resultTable,leaf)
elseif option == "text()" then
table.insert(resultTable,leaf[1])
elseif option == "node()" then
table.insert(resultTable,leaf.tag)
elseif option:find("@") == 1 then
table.insert(resultTable,leaf.attr[option:sub(2)])
end
end
end
local function match(tag,tagAttr,tagExpr,nextTag)
local expression,evalTag
-- check if its a wild card
if tagExpr == "*" then
return true
end
-- check if its empty
if tagExpr == "" then
if tag == nextTag then
return false,1
else
return false,0
end
end
-- check if there is an expression to evaluate
if tagExpr:find("[[]") ~= nil and tagExpr:find("[]]") ~= nil then
evalTag = tagExpr:sub(1,tagExpr:find("[[]")-1)
expression = tagExpr:sub(tagExpr:find("[[]")+1,tagExpr:find("[]]")-1)
if evalTag ~= tag then
return false
end
else
return (tag == tagExpr)
end
-- check if the expression is an attribute
if expression:find("@") ~= nil then
local evalAttr,evalValue
evalAttr = expression:sub(expression:find("[@]")+1,expression:find("[=]")-1)
evalValue = string.gsub(expression:sub(expression:find("[=]")+1),"'","")
evalValue = evalValue:gsub("\"","")
if tagAttr[evalAttr] ~= evalValue then
return false
else
return true
end
end
end
local function parseNodes(tags,xmlTable,counter)
if counter > #tags then
return nil
end
local currentTag = tags[counter]
local nextTag
if #tags > counter then
nextTag = tags[counter+1]
end
for i,value in ipairs(xmlTable) do
if type(value) == "table" then
if value.tag ~= nil and value.attr ~= nil then
local x,y = match(value.tag,value.attr,currentTag,nextTag)
if x then
if #tags == counter then
insertToTable(value)
else
parseNodes(tags,value,counter+1)
end
else
if y ~= nil then
if y == 1 then
if counter+1 == #tags then
insertToTable(value)
else
parseNodes(tags,value,counter+2)
end
else
parseNodes(tags,value,counter)
end
end
end
end
end
end
end
function selectNodes(xml,xpath)
assert(type(xml) == "table")
assert(type(xpath) == "string")
resultTable = {}
local xmlTree = {}
table.insert(xmlTree,xml)
assert(type(xpath) == "string")
tags = split(xpath,'[\\/]+')
local lastTag = tags[#tags]
if lastTag == "text()" or lastTag == "node()" or lastTag:find("@") == 1 then
option = tags[#tags]
table.remove(tags,#tags)
else
option = nil
end
if xpath:find("//") == 1 then
table.insert(tags,1,"")
end
parseNodes(tags,xmlTree,1)
return resultTable
end
function split(str, pat)
local t = {} -- NOTE: use {n = 0} in Lua-5.0
local fpat = "(.-)" .. pat
local last_end = 1
local s, e, cap = str:find(fpat, 1)
while s do
if s ~= 1 or cap ~= "" then
table.insert(t,cap)
end
last_end = e+1
s, e, cap = str:find(fpat, last_end)
end
if last_end <= #str then
cap = str:sub(last_end)
table.insert(t, cap)
end
return t
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment