Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
One-Time Retrieve Parser (Just for experimental purpose)
<?php
/* ONE-TIME SCRIPT TO RETRIEVE MUSIC PIECE INFORMATIONS FROM PETRUCCI-DB */
/* DEV: Tarek Alakmeh, Dec 2016 */
/* ! ! ! ! ! ! ! ! ! ! ! ! ! ! !
! ! ! ! ! ATTENTION: ! ! ! ! ! !
! ! This Code isn't made for ! !
! ! production: Just experi- ! !
! ! mental purpose! Thx :) ! ! !
! ! ! ! ! ! ! ! ! ! ! ! ! ! ! */
include('simple_html_dom.php'); // USING the Simple-Html-Dom to get the HTML-Tags
// ENABLE ERROR REPORTING
error_reporting(E_ALL);
ini_set('display_errors', 1);
// Predefined Composer-IDs (Total: 25) to retrieve only their pieces. Mostly famous and important composers.
$composer_id = array(1200,730,9491,2828,1944,5969,13165); // etc.
// Index for Counting-Operations
$i = -1;
// Create DB-Connection
$db = new mysqli("localhost", "username", "password", "table-name");
$db->set_charset("utf8"); // Enable Special-Chars (because of international piece-titles)
if ($db->connect_errno) { die("Verbindung fehlgeschlagen: " . $db->connect_error); }
// Goto-Point to repeat Information-Retrieve from Petrucci for each composer
data_retrieve:
// Index +1
++$i;
// Define current composer using predefined Composer-Array and current Index
$current_composer_id = $composer_id[$i];
// Retrieve all pieces from the defined composer. No duplicates allowed!
if ($result = $db->query("SELECT DISTINCT `title`, `permlink` FROM `petrucci_piece` WHERE `composer_id` = ('$current_composer_id')")) {
while ($row = $result->fetch_row()) {
// Reset all values
$piece_title = null;
$piece_petrucci_permlink = null;
$piece_opus = null;
$piece_movement_title = null;
$piece_movement_number = null;
$piece_duration = null;
$piece_style = null;
$piece_year = null;
$piece_langauge = null;
$piece_instrumentation = null;
$piece_dedication = null;
$piece_related_work_link = null;
$piece_external_link = null;
$piece_key = null;
$piece_year_type = null;
$piece_query = null;
$piece_movement_title_status = null;
$piece_key_number = null;
unset($liValues);
unset($liValues2);
unset($aValues);
unset($aExtValues);
unset($ddValues);
unset($ddValues2);
unset($brValues);
unset($brValues2);
unset($tdValues);
unset($yearValues);
// Piece Title
$piece_title = $db->real_escape_string($row[0]);
$piece_title_url = urlencode($row[0]);
// Piece Petrucci Permlink
$piece_petrucci_permlink = $db->real_escape_string($row[1]);
// Petrucci Mediawiki API Base URL
$petrucci_url = "http://imslp.org/api.php?action=parse&page=$piece_title_url&redirects&format=json";
$petrucci = file_get_contents($petrucci_url); // Loading Content
$petrucci_output = json_decode($petrucci);
$data = $petrucci_output->parse->text->{'*'};
// Replace various hindering chars to allow String-Replacement, Exploding, Imploding etc. in the Processing-Section
// Also: Cut off big chunk of unnecessary HTML-Code
$html = str_get_html(str_replace(array("&#160;","&#8211;", "&#x266d;", "—","–"), array(" ", "-", "-flat", "-", "-"), substr($data, strpos($data, '<div class="wi_body">'))));
/* ////////////////////////////////////////////////////////////
/////// Start Processing (Filtering, Replacement etc.) //////
//////////////////////////////////////////////////////////// */
foreach($html->find('tr') as $element){
// Detecting which informations are available & removing further unnecessary text
switch(TRUE) {
// OPUS / WORK NUMBER
case(strpos($element->innertext, "Opus/Catalogue")):
$piece_opus = $db->real_escape_string(trim(substr($element->plaintext, strlen(" Opus/Catalogue NumberOp./Cat. No. "), strlen($element->plaintext)-1)));
break;
// MOVEMENTS (NUMBER & IF AVAILABLE PIECES)
case(strpos($element->plaintext, "Movements/")):
$piece_movement_title_status = true;
$raw_movement = substr($element->innertext, 90);
if(strpos($element->innertext, "<li>")){
$html_li = str_get_html($raw_movement);
foreach($html_li->find('li') as $liList) {
$liValues[] = trim($liList->plaintext);
}
$piece_movement_title = $db->real_escape_string(implode("|",$liValues));
$piece_movement_number = $db->real_escape_string((int)substr($element->plaintext, strlen("Movements/SectionsMov\'ts/Sec\'s ")));
} elseif(strpos($element->innertext, "<dd>")) {
$html_dd = str_get_html($raw_movement);
foreach($html_dd->find('dd') as $ddList) {
$ddValues[] = trim($ddList->plaintext);
}
$piece_movement_title = $db->real_escape_string(implode("|",$ddValues));
$piece_movement_number = $db->real_escape_string((int)substr($element->plaintext, strlen("Movements/SectionsMov\'ts/Sec\'s ")));
} elseif(strpos($element->innertext, "<br />")) {
$brValues2 = array();
$brValues2 = array_map('trim', explode("<br />", $raw_movement));
$piece_movement_title = $db->real_escape_string(strip_tags(implode("|",$brValues2)));
$piece_movement_number = $db->real_escape_string((int)substr($element->plaintext, strlen("Movements/SectionsMov\'ts/Sec\'s ")));
} else {
$piece_movement_title_status = false;
$piece_movement_number = $db->real_escape_string((int)substr($element->plaintext, strlen("Movements/SectionsMov\'ts/Sec\'s ")));
}
break;
case(strpos($element->innertext, "Key")):
$raw_key = substr($element->innertext, 15);
if(strpos($element->plaintext, "see below")) {
$piece_key = $piece_movement_title;
$piece_key_number = $piece_movement_number;
} elseif(strpos($element->innertext, "<br />")) {
$brValues = array();
$brValues = array_map('trim', explode(" <br />", $raw_key));
$piece_key_number = count($brValues);
$piece_key = $db->real_escape_string(strip_tags(implode("|",$brValues)));
} elseif(strpos($element->innertext, "<dd>")) {
if(strpos($element->innertext, ", ") && substr_count($element->innertext, '<dd>') == 1){
$ddValues = array_map('trim', explode(", ", $raw_key));
$piece_key_number = count($ddValues);
$piece_key = $db->real_escape_string(strip_tags(implode("|",$ddValues)));
} else {
$html_dd2 = str_get_html($raw_key);
foreach($html_dd2->find('dd') as $ddList2) {
$ddValues2[] = trim($ddList2->plaintext);
}
$piece_key_number = count($ddValues2);
$piece_key = $db->real_escape_string(implode("|",$ddValues2));
}
} elseif(strpos($element->innertext, "<td>")) {
if(strpos($element->innertext, ", ") && substr_count($element->innertext, '<td>') == 1){
$tdValues = array_map('trim', explode(", ", $raw_key));
$piece_key_number = count($tdValues);
$piece_key = $db->real_escape_string(strip_tags(implode("|",$tdValues)));
} else {
$html_td = str_get_html($raw_key);
foreach($html_td->find('td') as $tdList) {
$tdValues[] = trim($tdList->plaintext);
}
$piece_key_number = count($tdValues);
$piece_key = $db->real_escape_string(implode("|",$tdValues));
}
} elseif(strpos($element->innertext, "<li>")){
if(strpos($element->innertext, ", ") && substr_count($element->innertext, '<li>') == 1){
$liValues2 = array_map('trim', explode(", ", $raw_key));
$piece_key_number = count($liValues2);
$piece_key = $db->real_escape_string(strip_tags(implode("|",$liValues2)));
} else {
$html_li2 = str_get_html($raw_key);
foreach($html_li2->find('li') as $liList2) {
$liValues2[] = trim($liList2->plaintext);
}
$piece_key_number = count($liValues2);
$piece_key = $db->real_escape_string(implode("|",$liValues2));
}
} else {
$piece_key_number = 1;
$piece_key = $db->real_escape_string(trim(substr($element->plaintext, strlen(" Key "), strlen($element->plaintext)-1)));
}
break;
// AVERAGE DURATION
case(strpos($element->innertext, "Average Duration")):
if(strpos($element->innertext, "minute")){
//echo $element->plaintext;
$piece_duration = $db->real_escape_string((int)substr($element->plaintext, 31));
}elseif(strpos($element->innertext, "hour")){
$piece_duration = $db->real_escape_string(((int)substr($element->plaintext, 31)*60));
}
break;
// PIECE STYLE
case(strpos($element->innertext, "Piece Style")):
$piece_style = $db->real_escape_string(trim(substr($element->plaintext, strlen(" Piece Style "), strlen($element->plaintext)-1)));
break;
// PIECE COMPOSITION YEAR/DATE (ex. 1930 or 1928-31 or )
case(strpos($element->innertext, "Year/Date")):
// CONVERT DASHES (CHAR 150, CHAR 151 => CHAR 45)
$raw_year = trim(substr($element->plaintext, strlen(" Year/Date of CompositionY/D of Comp. ")));
if(strlen($raw_year)==4 && is_numeric($raw_year)){
// EXAMPLE: 1912
$piece_year = $db->real_escape_string($raw_year);
$piece_year_type = 1;
} elseif(strpos($raw_year, "-")){
$yearValues = array_map('trim', explode("-", $raw_year));
if(count($yearValues) == 2){
if(strlen($yearValues[0]) == 4 && is_numeric($yearValues[0]) && strlen($yearValues[1]) == 4 && is_numeric($yearValues[1])){
// EXAMPLE: 1912-1913
$piece_year = $yearValues[0] . "-" . $yearValues[1];
$piece_year_type = 2;
} elseif(strlen($yearValues[0]) == 4 && is_numeric($yearValues[0]) && strlen($yearValues[1]) == 2 && is_numeric($yearValues[1])){
// EXAMPLE: 1912-13 TO 1912-1913
$year_first_two_numbers = substr($yearValues[0], 0, 2);
$piece_year = $yearValues[0]."-".$year_first_two_numbers.$yearValues[1];
$piece_year_type = 2;
}
} elseif(count($yearValues) == 3){
if(strlen($yearValues[0]) == 4 && is_numeric($yearValues[0]) && strlen($yearValues[1]) == 2 && is_numeric($yearValues[1]) && strlen($yearValues[2]) == 2 && is_numeric($yearValues[2])){
// EXAMPLE: 1912-01-12 OR 1912-12-01 ???
$piece_year = $yearValues[0]."-".$yearValues[1]."-".$yearValues[2];
$piece_year_type = 3;
}
}
} else {
$piece_year = $db->real_escape_string(trim(substr($element->plaintext, strlen(" Year/Date of CompositionY/D of Comp. "), strlen($element->plaintext)-1)));
$piece_year_type = 30;
}
// Declare Data-Information Status if data is doubted
if(strpos($raw_year, "?") or strpos($raw_year, "supposedly")) {
// IF DATA IS DOUBTED
$piece_year_type = $piece_year_type + 20;
} elseif($piece_year_type == 0) {
// IF DATA COULDN'T BE PARSED
$piece_year_type = 30;
}
break;
// PIECE LANGUAGE
case(strpos($element->innertext, "Language") && !strpos($element->innertext, "Language=")):
$piece_langauge = $db->real_escape_string(trim(substr($element->plaintext, strlen(" Language "), strlen($element->plaintext)-1)));
break;
// PIECE INSTRUMENTATION
case(strpos($element->innertext, "Instrumentation")):
if(strpos($element->plaintext, "unspecified")){
$piece_instrumentation = "";
} else {
$piece_instrumentation = $db->real_escape_string(trim(substr($element->plaintext, strlen(" Instrumentation "), strlen($element->plaintext)-1)));
}
break;
// PIECE DEDICATION
case(strpos($element->innertext, "Dedication")):
$piece_dedication = $db->real_escape_string(trim(substr($element->plaintext, strlen(" Dedication "), strlen($element->plaintext)-1)));
break;
// RELATED WORKS
case(strpos($element->innertext, "Related Works")):
$html_a = str_get_html($element->innertext);
foreach($html_a->find('a') as $a) {
$aValues[] = trim($a->plaintext);
$aValues[] = trim($a->href);
}
$piece_related_work_link = $db->real_escape_string(implode("|",$aValues));
break;
// EXTERNAL LINKS
case(strpos($element->innertext, "External Links")):
$html_a_ext = str_get_html($element->innertext);
foreach($html_a_ext->find('a') as $a) {
$aExtValues[] = trim($a->href);
}
$piece_external_link = $db->real_escape_string(implode("|",$aExtValues));
break;
}
}
// ERROR-PREVENTION IN CASE OF NOT EXISTENCE
if($piece_duration == null) {
$piece_duration = 0;
}
if($piece_movement_number == null) {
$piece_movement_number = 0;
}
if($piece_key_number == null){
$piece_key_number = 0;
}
if($piece_year_type == null){
$piece_year_type = 0;
}
if(!is_numeric($piece_movement_number)){
$piece_movement_number = 0;
}
// GENERATE SEARCH QUERY DATA (to enable easy, reliable and fast MySql-Search
/*
IMPORTANT: SEARCH QUERY WORK-NUMBER INTERGRATION IS MISSING IN THIS GENERATOR!
*/
if($piece_movement_title_status === TRUE && $piece_key != null) {
$piece_query = $piece_title . " | " . $piece_key . " | " . $piece_movement_title;
} elseif($piece_movement_title_status === TRUE) {
$piece_query = $piece_title . " | " . $piece_key;
} elseif($piece_key != null) {
$piece_query = $piece_title . " | " . $piece_key;
} else {
$piece_query = $piece_title . " | ";
}
if($piece_movement_number == $piece_key_number && $piece_movement_title == null) {
$piece_movement_title = $piece_key;
}
// INSERT RETRIEVED INFORMATION INTO MYSQL-DATABASE
$record_date = $db->real_escape_string(date("Y-m-d"));
if($db->query("INSERT into XXX (XXX, XXX, XXX) VALUES (XXX, XXX, XXX)")){
$current_piece_id = $db->insert_id;
} else {
// IN CASE OF ERROR
$current_piece_id = 0;
}
}
// TRANSFER STATUS UPDATE (For the status-interface)
$db->query("INSERT into transfer_status (current_piece_id) VALUES ('$current_piece_id')");
goto data_retrieve;
}
exitPoint:
echo "done";
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.