Skip to content

Instantly share code, notes, and snippets.

@haze
Created January 1, 2017 16:24
Show Gist options
  • Save haze/86f2cdb88c7e8aeccfea9ca1f7e7fd93 to your computer and use it in GitHub Desktop.
Save haze/86f2cdb88c7e8aeccfea9ca1f7e7fd93 to your computer and use it in GitHub Desktop.
fun capture(base: String, left: String, right: String = left): String = base.substringAfterLast(left).substringBeforeLast(right)
fun unspacify(base: String): String = base.filter { c -> Character.isLetterOrDigit(c) }
class AZLyricsQuery() {
data class Artist(val representation: String, val slug: String)
data class Song(val slug: String, val artist: Artist, val title: String)
data class Album(val artist: Artist, var title: String, var songs: Array<Song>)
data class LyricsEntry(val song: Song, var lyrics: Array<String>)
fun getAlbumsFor(artist: Artist): Array<Album> {
val albums: MutableList<Album> = mutableListOf()
val url = "http://azlyrics.com/${artist.representation[0].toLowerCase()}/${artist.slug}"
println("Parsing albums for: $url")
val doc = Jsoup.connect(url).get()
val albumDiv = doc.getElementById("listAlbum")
var curAlbum: Album? = null
val curAlbumSongs: MutableList<Song> = mutableListOf()
for(currentTag: Element in albumDiv.allElements) {
if(currentTag.hasClass("album")){
if(curAlbum != null) {
curAlbum.songs = curAlbumSongs.toTypedArray()
albums.add(curAlbum)
}
curAlbumSongs.clear()
if(currentTag.text().trim().equals("other songs:", true)) {
curAlbum = Album(artist, "Singles", arrayOf())
} else {
val albumTitle = capture(currentTag.text().trim(), "\"")
curAlbum = Album(artist, albumTitle, arrayOf())
}
} else if(currentTag.hasAttr("href") && !currentTag.text().isEmpty()) {
val linkData = currentTag.attr("href").split("/")
curAlbumSongs.add(Song(linkData[linkData.size - 1], artist, currentTag.text()))
}
}
return albums.toTypedArray()
}
fun queryArtists(letters: String): Array<Artist> {
fun queryArtistPage(letter: Char): MutableList<Artist> {
val letterArtists: MutableList<Artist> = mutableListOf()
println("Trying to connect to: http://www.azlyrics.com/$letter.html")
val doc = Jsoup.connect("http://www.azlyrics.com/$letter.html").get()
if(doc.getElementsByClass("row").size > 1) {
val artistsDiv = doc.getElementsByClass("row")[1]
for (zelem in artistsDiv.getElementsByTag("div")) {
for (elem in zelem.getElementsByTag("a")) {
val data = elem.attr("href").split("/")
println("Adding artist: ${elem.text()}")
letterArtists.add(Artist(elem.text(), data[data.size - 1]))
}
}
println("Sleeping for 1 seconds before getting next artist list.")
Thread.sleep(1000)
} else {
println("Couldn't find artists for letter: $letter")
}
return letterArtists
}
return letters.fold<Array<Artist>>(arrayOf(), { left, right-> left.plus(queryArtistPage(right)) })
}
fun querySongLyrics(song: Song): LyricsEntry {
val url = "http://azlyrics.com/lyrics/${song.artist.slug.split(".")[0]}/${song.slug}"
val doc = Jsoup.connect(url).get()
val lyricsEntry: LyricsEntry = LyricsEntry(song, arrayOf())
fun nodeSearch(depth: Int, node: Node): Optional<Node> {
if(node.nodeName().equals("#comment"))
if (node.outerHtml().trimStart().startsWith("<!-- Usage of azlyrics.com"))
return Optional.of(node)
if(node.childNodeSize() > 0)
for(childNode in node.childNodes()) {
val ret = nodeSearch(depth + 1, childNode)
if(ret.isPresent)
return ret
}
return Optional.empty()
}
//recursive node search
val comment: Optional<Node> = nodeSearch(0, doc)
if(comment.isPresent) {
val parentDiv = comment.get().parentNode()
val lyrics: MutableList<String> = mutableListOf()
var curStr: String = ""
var hasAdded = true
for(tnode in (parentDiv as Element).textNodes()) {
if(!tnode.isBlank) {
curStr += (tnode.wholeText.trim() + "\n")
hasAdded = false
} else if(!hasAdded && !curStr.isEmpty()) {
hasAdded = true
lyrics.add(curStr)
curStr = ""
}
}
if(!curStr.isEmpty()){
lyrics.add(curStr)
}
lyricsEntry.lyrics = lyrics.toTypedArray()
} else {
println("Failed to find comment object... $song")
}
return lyricsEntry
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment