Skip to content

Instantly share code, notes, and snippets.

@Glavo
Created July 5, 2017 12:16
Show Gist options
  • Save Glavo/0fb35b447a14f50c4fb18d83ed584236 to your computer and use it in GitHub Desktop.
Save Glavo/0fb35b447a14f50c4fb18d83ed584236 to your computer and use it in GitHub Desktop.
package org.glavo.oj.ojs
import org.glavo.oj.OJ
import org.glavo.oj.Problem
import org.jsoup.*
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import org.jsoup.select.Elements
import java.io.ObjectInputStream
import java.io.ObjectOutputStream
import java.io.PrintWriter
import java.net.URL
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.Paths
import java.util.regex.Pattern
import java.util.stream.Stream
import kotlin.streams.toList
/**
* Created by Glavo on 17-7-2.
*
* @author Glavo
* @since 0.1.0
*/
fun parsePage(document: Document): Problem {
val foo = document.select("div.lg-content-left div.am-g *")
var now = ""
val buffer = Elements()
for (e in foo) {
if (e.nodeName() == "h2") {
buffer.clear()
}
else buffer += e
}
TODO()
}
class Luogu : Stream<Problem> by stream, OJ {
companion object {
val tem: Path by lazy {
if (!Files.exists(path / "luogu")) Files.createDirectories(path / "luogu")
path / "luogu"
}
internal val pages: List<String> by lazy {
if (Files.exists(tem / "pages") && !Files.isDirectory(tem / "pages")) {
Files.newBufferedReader(tem / "pages").use {
@Suppress("UNCHECKED_CAST")
it.lines().toList()
}
} else {
var pages: List<String> = listOf()
var now = "https://www.luogu.org/problem/lists?name=&orderitem=&order=&tag=&page=1"
try {
while (true) {
val html: Document = now.toDocument()
pages += html
.select("div.lg-content-table-left div[class=am-g lg-table-row lg-table-bg0] a:not(a[target=_blank])")
.map { it.attr("href") }
.map { "https://www.luogu.org$it" } ?: listOf()
val l: MutableList<Element> = html.select("a[rel=next]")
if (l.isEmpty()) break
else now = "https://www.luogu.org${l[0].attr("href")}"
}
} catch (e: Exception) {
}
Files.createFile(tem / "pages")
PrintWriter(Files.newBufferedWriter(tem / "pages")).use { stream ->
pages.forEach(stream::println)
}
pages
}
}
private val p = Pattern.compile("""P([0-9]+) +(.+)""")
val stream: Stream<Problem> by lazy {
pages.stream().map { page ->
val title = page
val match = p.matcher(title.trim())
if (!match.find()) return@map null
val index = match.group(0).toInt()
val name = match.group(1)
}
TODO("")
}
}
}
fun String.toDocument(): Document {
var exception: Exception? = null
for (i in 1..5) {
try {
return Jsoup.parse(URL(this), 10000)
} catch (e: Exception) {
exception = e
}
}
throw exception!!
}
inline fun <T> timed(f: () -> T): Long {
val start = System.nanoTime()
f()
return (System.nanoTime() - start)
}
val path: Path = Paths.get(System.getProperty("user.home"), ".crawler")
get() =
if (Files.isDirectory(field)) field
else {
Files.createDirectories(field)
field
}
operator fun Path.div(other: String): Path = this.resolve(other)!!
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment