Last active
December 3, 2016 15:05
-
-
Save wiomoc/a34a129f3ecf6ff40e84f8b19b0a5d76 to your computer and use it in GitHub Desktop.
Little program to read out lyrics from several different audioformats. mp3/ID3, aac/m4a/mp4, ogg/vorbis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package de.wiomoc.Lyrics; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.FileNotFoundException; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.lang.reflect.Array; | |
import java.math.BigDecimal; | |
import java.nio.charset.Charset; | |
import java.util.Arrays; | |
import org.xml.sax.InputSource; | |
public class LyricsExtractor { | |
public static void main(String[] args) throws Exception { | |
for(File file:new File("/home/christoph/Schreibtisch/test").listFiles()){ | |
try { | |
System.out.println(file.getName()); | |
System.out.println(getLyrics(file)); | |
} catch (Exception e) { | |
} | |
} | |
} | |
public static String getLyrics(File file){ | |
String filename = file.getName(); | |
String fileending = filename.substring(filename.lastIndexOf('.')+1,filename.length()).toLowerCase(); | |
try{ | |
switch(fileending){ | |
case "mp3": | |
return getLyricsID3(file); | |
case "mp4": | |
case "m4a": | |
case "aac": | |
return getLyricsMP4(file); | |
case "ogg": | |
case "oga": | |
return getLyricsVorbis(file); | |
} | |
}catch(Exception e){} | |
return null; | |
} | |
private static int readOgg(byte[] buf, InputStream in, int bytesinpage, int skip) throws IOException { | |
int toread = skip!=-1?skip:buf.length; | |
int offset = 0; | |
while(toread>0){ | |
if(bytesinpage==0){ | |
byte magic[] = new byte[4]; | |
in.read(magic); | |
if(!Arrays.equals(magic,new byte[]{'O','g','g','S'})){ | |
in.close(); | |
throw new IOException(); | |
} | |
byte header[] = new byte[23]; | |
in.read(header); | |
int count = header[22]& 0xFF; | |
while(count-->0){ | |
bytesinpage += in.read(); | |
} | |
} | |
int read = toread; | |
if(bytesinpage-toread<0)read = bytesinpage; | |
if(skip != -1) | |
in.skip(read); | |
else | |
in.read(buf, offset, read); | |
offset += read; | |
toread -= read; | |
bytesinpage -= read; | |
} | |
return bytesinpage; | |
} | |
private static String getLyricsVorbis(File file) throws Exception{ | |
FileInputStream in = new FileInputStream(file); | |
int bytesinpage = 0; | |
byte buffer[] = new byte[7]; | |
bytesinpage = readOgg(buffer,in,bytesinpage,-1); | |
if(!Arrays.equals(buffer, new byte[]{1,'v','o','r','b','i','s'})){ | |
in.close(); | |
return null; | |
} | |
bytesinpage = readOgg(null,in,bytesinpage, 23); | |
bytesinpage = readOgg(buffer,in,bytesinpage,-1); | |
if(!Arrays.equals(buffer, new byte[]{3,'v','o','r','b','i','s'})){ | |
in.close(); | |
return null; | |
} | |
byte length[] = new byte[4]; | |
bytesinpage = readOgg(length, in, bytesinpage,-1); | |
bytesinpage = readOgg(null, in, bytesinpage, byteArrayToInt(length)); | |
bytesinpage = readOgg(length, in, bytesinpage,-1); | |
int count = byteArrayToIntLE(length); | |
while(count-->0){ | |
bytesinpage = readOgg(length, in, bytesinpage,-1); | |
int comment_len = byteArrayToIntLE(length); | |
byte lyrics_tag[] = new byte[]{'L','Y','R','I','C','S','='}; | |
if(comment_len<=lyrics_tag.length){ | |
bytesinpage = readOgg(null, in, bytesinpage, comment_len); | |
continue; | |
} | |
byte comment_probe[] = new byte[lyrics_tag.length]; | |
bytesinpage = readOgg(comment_probe, in, bytesinpage,-1); | |
if(Arrays.equals(comment_probe,lyrics_tag)){ | |
byte lyrics[] = new byte[comment_len - lyrics_tag.length]; | |
readOgg(lyrics, in, bytesinpage,-1); | |
in.close(); | |
return new String(lyrics); | |
}else{ | |
bytesinpage = readOgg(null, in, bytesinpage, comment_len - lyrics_tag.length); | |
} | |
} | |
in.close(); | |
return null; | |
} | |
private static String getLyricsMP4(File file) throws Exception{ | |
FileInputStream in = new FileInputStream(file); | |
byte head[] = new byte[4]; | |
in.read(head); | |
int len = byteArrayToInt(head); | |
in.read(head); | |
if (!Arrays.equals(head, new byte[]{'f','t','y','p'})){ | |
in.close(); | |
return null; | |
} | |
in.skip(len - 8); | |
final byte path[][] = new byte[][]{{'m','o','o','v'},{'u','d','t','a'},{'m','e','t','a'},{'i','l','s','t'},{(byte) '©','l','y','r'},{'d','a','t','a'}}; | |
int atom_size = Integer.MAX_VALUE; | |
outter: | |
for(byte[] atom: path){ | |
while(in.available()>0){ | |
byte buffer[] = new byte[4]; | |
in.read(buffer); | |
len = byteArrayToInt(buffer); | |
if(len==0)continue; | |
in.read(buffer); | |
if(len>atom_size){ | |
in.close(); | |
return null; | |
} | |
if (Arrays.equals(buffer, atom)){ | |
atom_size = len - 8; | |
//Found Atom search next atom | |
continue outter; | |
}else{ | |
//Skip Atom | |
in.skip(len - 8); | |
atom_size-=len; | |
} | |
} | |
in.close(); | |
return null; | |
} | |
in.skip(8); | |
byte buffer[] = new byte[atom_size-8]; | |
in.read(buffer); | |
in.close(); | |
return new String(buffer); | |
} | |
private static String getLyricsID3(File file) throws Exception{ | |
FileInputStream in = new FileInputStream(file); | |
byte buffer[] = new byte[4]; | |
in.read(buffer, 0, 3); | |
if (!Arrays.equals(buffer, new byte[] { 'I', 'D', '3', 0 })){ | |
in.close(); | |
return null; | |
} | |
in.read(buffer, 0, 3); | |
boolean ext = (buffer[2] & (byte) 0b0100000) != 0; | |
in.read(buffer); | |
int len = buffer[3] & 0x7F | (buffer[2] & 0x7F) << 7 | (buffer[1] & 0x7F) << 14 | (buffer[0] & 0x7F) << 21; | |
if (ext) { | |
in.read(buffer); len-=4; | |
int ext_len = byteArrayToInt(buffer); | |
in.skip(ext_len); len -= ext_len; | |
} | |
while (len > 0) { | |
byte tag_name[] = new byte[4]; | |
in.read(tag_name); len-=4; | |
if(tag_name[0]==0)break; | |
in.read(buffer); len -=4; | |
int tag_len = byteArrayToInt(buffer); | |
in.read(buffer,0,2); len-=2; | |
if(Arrays.equals(tag_name, new byte[] { 'U', 'S', 'L', 'T' })){ | |
byte head[] = new byte[4]; | |
in.read(head); len -= 4; tag_len -= 4; | |
while(in.read()!=0){ | |
len--; | |
tag_len--; | |
} | |
if(head[0]==1)in.read(); | |
byte tag_value[] = new byte[tag_len]; | |
in.read(tag_value); len -= tag_len; | |
in.close(); | |
Charset charset = null; | |
switch (head[0]){ | |
case 0: charset = Charset.forName("ISO-8859-1"); | |
break; | |
case 1: charset = Charset.forName("UTF-16"); | |
break; | |
case 2: charset = Charset.forName("UTF-16BE"); | |
break; | |
case 3: charset = Charset.forName("UTF-8"); | |
break; | |
default: | |
return null; | |
} | |
return new String(tag_value,charset); | |
}else{ | |
in.skip(tag_len); len -= tag_len; | |
} | |
} | |
in.close(); | |
return null; | |
} | |
private static int byteArrayToInt(byte[] b) { | |
return b[3] & 0xFF | (b[2] & 0xFF) << 8 | (b[1] & 0xFF) << 16 | (b[0] & 0xFF) << 24; | |
} | |
private static int byteArrayToIntLE(byte[] b) { | |
return b[0] & 0xFF | (b[1] & 0xFF) << 8 | (b[2] & 0xFF) << 16 | (b[3] & 0xFF) << 24; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment