Skip to content

Instantly share code, notes, and snippets.

@dexterous
Created June 21, 2011 09:31
Show Gist options
  • Save dexterous/1037526 to your computer and use it in GitHub Desktop.
Save dexterous/1037526 to your computer and use it in GitHub Desktop.
a not so quick script hacked together to scrape the product info from lenovo's product listing page
@Grab(group='org.ccil.cowan.tagsoup', module='tagsoup', version='1.2.1')
def parser = new XmlSlurper(new org.ccil.cowan.tagsoup.Parser())
def html = parser.parse('http://shopap.lenovo.com/SEUILibrary/controller/e/inweb/LenovoPortal/en_IN/catalog.workflow:category.details?current-catalog-id=3634951826AE4D3881BFFF1AC5FCD957&current-category-id=41AB4B1B55F74FF8833753D7713BB6D6&tab=1&runfacets=1&altercrumb=0&initpage=seriespage&filter=&page-size=200')
def model_rows = html.body.div.find { it.@id == 'bodywrap' }.div.table.find { it.@id == 'container' }.tr[1].td.table.tr[1].td.form.table.tr[1].td.find { it.@id == 'series_results_table' }.form.table.tr[1].td.table.tr.findAll { it.@class == 'td-bkg-nograd' }
def models = model_rows.inject([:]) { model, tr ->
def modelNum = tr.td[2].div.text()
model[modelNum] = [
name: tr.td[2].p.text().trim(),
price: (tr.td[2].table[0].tr[1].td[1].text().replaceAll(',', '') - 'Rs.') as BigDecimal,
specs: parseSpecs(tr.td[4..6]*.text().collect { it.split(/· /)*.trim() }.flatten().findAll { it })
]
return model
}
def parseSpecs(specLine) {
return [
processor: (specLine[0] =~ /Intel® Core™ (\S{7}) Processor \( (\S+)GHz .* \)/)[0].with { [code: it[1], speed: it[2]] },
os: OS.parseText(specLine[1]),
display: (specLine[2] =~ /(.{4}) " (\S+) (\S+)/)[0].with { [size: it[1], type: it[3], resolution: Resolution.parseText(it[2])] },
ram: (specLine[3] =~ /(\d) GB .+ (\d+)MHz/)[0].with { [size: it[1] as Integer, speed: it[2] as Integer] },
disk: (specLine[4] - ' SSD SATA' - 'GB') as Integer
]
}
println models.
collect { k, v -> "$k: $v" }.
join(('=' * 80).center(82, "\n"))
enum OS {
DOS('PC DOS 2000 License'), WIN('Genuine Windows 7 Professional 32');
private final String text
private OS(text) { this.text = text }
public static parseText(text) { OS.values().find { it.text == text } }
}
enum Resolution {
HD_PLUS(1600, 900), HD(1366, 768), WXGA_PLUS(1440, 900), WXGA(1280, 800)
private final int width, height
private Resolution(width, height) {
this.width = width
this.height = height
}
public String toString() { "${name().replaceAll('_PLUS', '+')} ($width x $height)" }
public static parseText(text) { Enum.valueOf(Resolution, text.replaceAll(/\+/, '_PLUS')) }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment