Created
January 1, 2017 16:24
-
-
Save haze/86f2cdb88c7e8aeccfea9ca1f7e7fd93 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fun capture(base: String, left: String, right: String = left): String = base.substringAfterLast(left).substringBeforeLast(right) | |
fun unspacify(base: String): String = base.filter { c -> Character.isLetterOrDigit(c) } | |
class AZLyricsQuery() { | |
data class Artist(val representation: String, val slug: String) | |
data class Song(val slug: String, val artist: Artist, val title: String) | |
data class Album(val artist: Artist, var title: String, var songs: Array<Song>) | |
data class LyricsEntry(val song: Song, var lyrics: Array<String>) | |
fun getAlbumsFor(artist: Artist): Array<Album> { | |
val albums: MutableList<Album> = mutableListOf() | |
val url = "http://azlyrics.com/${artist.representation[0].toLowerCase()}/${artist.slug}" | |
println("Parsing albums for: $url") | |
val doc = Jsoup.connect(url).get() | |
val albumDiv = doc.getElementById("listAlbum") | |
var curAlbum: Album? = null | |
val curAlbumSongs: MutableList<Song> = mutableListOf() | |
for(currentTag: Element in albumDiv.allElements) { | |
if(currentTag.hasClass("album")){ | |
if(curAlbum != null) { | |
curAlbum.songs = curAlbumSongs.toTypedArray() | |
albums.add(curAlbum) | |
} | |
curAlbumSongs.clear() | |
if(currentTag.text().trim().equals("other songs:", true)) { | |
curAlbum = Album(artist, "Singles", arrayOf()) | |
} else { | |
val albumTitle = capture(currentTag.text().trim(), "\"") | |
curAlbum = Album(artist, albumTitle, arrayOf()) | |
} | |
} else if(currentTag.hasAttr("href") && !currentTag.text().isEmpty()) { | |
val linkData = currentTag.attr("href").split("/") | |
curAlbumSongs.add(Song(linkData[linkData.size - 1], artist, currentTag.text())) | |
} | |
} | |
return albums.toTypedArray() | |
} | |
fun queryArtists(letters: String): Array<Artist> { | |
fun queryArtistPage(letter: Char): MutableList<Artist> { | |
val letterArtists: MutableList<Artist> = mutableListOf() | |
println("Trying to connect to: http://www.azlyrics.com/$letter.html") | |
val doc = Jsoup.connect("http://www.azlyrics.com/$letter.html").get() | |
if(doc.getElementsByClass("row").size > 1) { | |
val artistsDiv = doc.getElementsByClass("row")[1] | |
for (zelem in artistsDiv.getElementsByTag("div")) { | |
for (elem in zelem.getElementsByTag("a")) { | |
val data = elem.attr("href").split("/") | |
println("Adding artist: ${elem.text()}") | |
letterArtists.add(Artist(elem.text(), data[data.size - 1])) | |
} | |
} | |
println("Sleeping for 1 seconds before getting next artist list.") | |
Thread.sleep(1000) | |
} else { | |
println("Couldn't find artists for letter: $letter") | |
} | |
return letterArtists | |
} | |
return letters.fold<Array<Artist>>(arrayOf(), { left, right-> left.plus(queryArtistPage(right)) }) | |
} | |
fun querySongLyrics(song: Song): LyricsEntry { | |
val url = "http://azlyrics.com/lyrics/${song.artist.slug.split(".")[0]}/${song.slug}" | |
val doc = Jsoup.connect(url).get() | |
val lyricsEntry: LyricsEntry = LyricsEntry(song, arrayOf()) | |
fun nodeSearch(depth: Int, node: Node): Optional<Node> { | |
if(node.nodeName().equals("#comment")) | |
if (node.outerHtml().trimStart().startsWith("<!-- Usage of azlyrics.com")) | |
return Optional.of(node) | |
if(node.childNodeSize() > 0) | |
for(childNode in node.childNodes()) { | |
val ret = nodeSearch(depth + 1, childNode) | |
if(ret.isPresent) | |
return ret | |
} | |
return Optional.empty() | |
} | |
//recursive node search | |
val comment: Optional<Node> = nodeSearch(0, doc) | |
if(comment.isPresent) { | |
val parentDiv = comment.get().parentNode() | |
val lyrics: MutableList<String> = mutableListOf() | |
var curStr: String = "" | |
var hasAdded = true | |
for(tnode in (parentDiv as Element).textNodes()) { | |
if(!tnode.isBlank) { | |
curStr += (tnode.wholeText.trim() + "\n") | |
hasAdded = false | |
} else if(!hasAdded && !curStr.isEmpty()) { | |
hasAdded = true | |
lyrics.add(curStr) | |
curStr = "" | |
} | |
} | |
if(!curStr.isEmpty()){ | |
lyrics.add(curStr) | |
} | |
lyricsEntry.lyrics = lyrics.toTypedArray() | |
} else { | |
println("Failed to find comment object... $song") | |
} | |
return lyricsEntry | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment