Last active
June 28, 2021 20:42
-
-
Save dubsnipe/0732577c68b41277614d0f3800ca755f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
set_time_limit(0); | |
$pages = $_POST["the_text"]; | |
$pages_separated = explode(PHP_EOL,$pages); | |
$pages_separated = preg_replace("/\s/", "_", $pages_separated); | |
$pages_separated = preg_replace("/^_/", "", $pages_separated); | |
$pages_separated = preg_replace("/_$/", "", $pages_separated); | |
//echo json_encode($pages_separated)."\n"; | |
$output = []; | |
foreach($pages_separated as $key=>$page_name){ | |
//read from the API | |
$content = file_get_contents("https://www.appropedia.org/w/api.php?action=query&prop=revisions&titles=" . $page_name ."&rvslots=*&rvprop=content|timestamp&formatversion=latest&format=json"); | |
$json_values = json_decode($content, true); | |
$timestamp = $json_values["query"]["pages"][0]["revisions"][0]["timestamp"]; | |
//extract from infobox fields (databoxes have other fields | |
//e.g. Page data, Device data, Project data, Medical device data, etc. | |
preg_match("{{Infobox (device|project|medical device)([^\}\}]*)}}", $content, $matches); | |
$matches = $matches[0]; | |
$output[$key]["name"] = $page_name; | |
$affiliations_pattern = "/(?<=affiliations=).*?(?=[\\\]n)/"; | |
if (preg_match($affiliations_pattern, $matches, $match) ) { | |
$output[$key]["affiliations"] = $match[0]; | |
} else { | |
$output[$key]["affiliations"] = null; | |
} | |
$authors_pattern = "/(?<=authors=).*?(?=[\\\]n)/"; | |
if (preg_match($authors_pattern, $matches, $match) ){ | |
$output[$key]["authors"] = $match[0]; | |
} else { | |
$output[$key]["authors"] = null; | |
} | |
$bom_pattern = "/(?<=bom=).*?(?=[\\\]n)/"; | |
if (preg_match($bom_pattern, $matches, $match) ){ | |
$output[$key]["bom"] = $match[0]; | |
} else { | |
$output[$key]["bom"] = null; | |
} | |
$caption_pattern = "/(?<=caption=).*?(?=[\\\]n)/"; | |
if (preg_match($caption_pattern, $matches, $match) ){ | |
$output[$key]["caption"] = $match[0]; | |
} else { | |
$output[$key]["caption"] = null; | |
} | |
$cost_pattern = "/(?<=cost=).*?(?=[\\\]n)/"; | |
if (preg_match($cost_pattern, $matches, $match) ){ | |
$output[$key]["cost"] = $match[0]; | |
} else { | |
$output[$key]["cost"] = null; | |
} | |
$cost_currency_pattern = "/(?<=cost-currency=).*?(?=[\\\]n)/"; | |
if (preg_match($cost_currency_pattern, $matches, $match) ){ | |
$output[$key]["cost_currency"] = $match[0]; | |
} else { | |
$output[$key]["cost_currency"] = null; | |
} | |
$date_completed_pattern = "/(?<=date-completed=).*?(?=[\\\]n)/"; | |
if (preg_match($date_completed_pattern, $matches, $match) ){ | |
$output[$key]["date_completed"] = $match[0]; | |
} else { | |
$output[$key]["date_completed"] = null; | |
} | |
$date_published_pattern = "/(?<=date-published=).*?(?=[\\\]n)/"; | |
if (preg_match($date_published_pattern, $matches, $match) ){ | |
$output[$key]["date_published"] = $match[0]; | |
} else { | |
$output[$key]["date_published"] = null; | |
} | |
$date_updated_pattern = "/(?<=date-updated=).*?(?=[\\\]n)/"; | |
if (preg_match($date_updated_pattern, $matches, $match) ){ | |
$output[$key]["date_updated"] = $match[0]; | |
} else { | |
$output[$key]["date_updated"] = null; | |
} | |
$derivative_of_pattern = "/(?<=derivative-of=).*?(?=[\\\]n)/"; | |
if (preg_match($derivative_of_pattern, $matches, $match) ){ | |
$output[$key]["derivative_of"] = $match[0]; | |
} else { | |
$output[$key]["derivative_of"] = null; | |
} | |
$design_files_pattern = "/(?<=design-files=).*?(?=[\\\]n)/"; | |
if (preg_match($design_files_pattern, $matches, $match) ){ | |
$output[$key]["design_files"] = $match[0]; | |
} else { | |
$output[$key]["design_files"] = null; | |
} | |
$designed_in_pattern = "/(?<=designed-in=).*?(?=[\\\]n)/"; | |
if (preg_match($designed_in_pattern, $matches, $match) ){ | |
$output[$key]["designed_in"] = $match[0]; | |
} else { | |
$output[$key]["designed_in"] = null; | |
} | |
$disposal_instructions_pattern = "/(?<=disposal-instructions=).*?(?=[\\\]n)/"; | |
if (preg_match($disposal_instructions_pattern, $matches, $match) ){ | |
$output[$key]["disposal_instructions"] = $match[0]; | |
} else { | |
$output[$key]["disposal_instructions"] = null; | |
} | |
$health_classification_pattern = "/(?<=health-classification=).*?(?=[\\\]n)/"; | |
if (preg_match($health_classification_pattern, $matches, $match) ){ | |
$output[$key]["health_classification"] = $match[0]; | |
} else { | |
$output[$key]["health_classification"] = null; | |
} | |
$health_topic_pattern = "/(?<=health-topic=).*?(?=[\\\]n)/"; | |
if (preg_match($health_topic_pattern, $matches, $match) ){ | |
$output[$key]["health_topic"] = $match[0]; | |
} else { | |
$output[$key]["health_topic"] = null; | |
} | |
$image_pattern = "/(?<=image=).*?(?=[\\\]n)/"; | |
if (preg_match($image_pattern, $matches, $match) ){ | |
$output[$key]["image"] = $match[0]; | |
} else { | |
$output[$key]["image"] = null; | |
} | |
$instance_of_pattern = "/(?<=instance-of=).*?(?=[\\\]n)/"; | |
if (preg_match($instance_of_pattern, $matches, $match) ){ | |
$output[$key]["instance_of"] = $match[0]; | |
} else { | |
$output[$key]["instance_of"] = null; | |
} | |
$keywords_pattern = "/(?<=keywords=).*?(?=[\\\]n)/"; | |
if (preg_match($keywords_pattern, $matches, $match) ){ | |
$output[$key]["keywords"] = $match[0]; | |
} else { | |
$output[$key]["keywords"] = null; | |
} | |
$language_pattern = "/(?<=language=).*?(?=[\\\]n)/"; | |
if (preg_match($language_pattern, $matches, $match) ){ | |
$output[$key]["language"] = $match[0]; | |
} else { | |
$output[$key]["language"] = null; | |
} | |
$license_documentation_pattern = "/(?<=license-documentation=).*?(?=[\\\]n)/"; | |
if (preg_match($license_documentation_pattern, $matches, $match) ){ | |
$output[$key]["license_documentation"] = $match[0]; | |
} else { | |
$output[$key]["license_documentation"] = null; | |
} | |
$license_hardware_pattern = "/(?<=license-hardware=).*?(?=[\\\]n)/"; | |
if (preg_match($license_hardware_pattern, $matches, $match) ){ | |
$output[$key]["license_hardware"] = $match[0]; | |
} else { | |
$output[$key]["license_hardware"] = null; | |
} | |
$license_software_pattern = "/(?<=license-software=).*?(?=[\\\]n)/"; | |
if (preg_match($license_software_pattern, $matches, $match) ){ | |
$output[$key]["license_software"] = $match[0]; | |
} else { | |
$output[$key]["license_software"] = null; | |
} | |
$licensor_pattern = "/(?<=licensor=).*?(?=[\\\]n)/"; | |
if (preg_match($licensor_pattern, $matches, $match) ){ | |
$output[$key]["licensor"] = $match[0]; | |
} else { | |
$output[$key]["licensor"] = null; | |
} | |
$location_pattern = "/(?<=location=).*?(?=[\\\]n)/"; | |
if (preg_match($location_pattern, $matches, $match) ){ | |
$output[$key]["location"] = $match[0]; | |
} else { | |
$output[$key]["location"] = null; | |
} | |
$made_pattern = "/(?<=made=).*?(?=[\\\]n)/"; | |
if (preg_match($made_pattern, $matches, $match) ){ | |
$output[$key]["made"] = $match[0]; | |
} else { | |
$output[$key]["made"] = null; | |
} | |
$made_in_pattern = "/(?<=made-in=).*?(?=[\\\]n)/"; | |
if (preg_match($made_in_pattern, $matches, $match) ){ | |
$output[$key]["made_in"] = $match[0]; | |
} else { | |
$output[$key]["made_in"] = null; | |
} | |
$maintenance_instructions_pattern = "/(?<=maintenance-instructions=).*?(?=[\\\]n)/"; | |
if (preg_match($maintenance_instructions_pattern, $matches, $match) ){ | |
$output[$key]["maintenance_instructions"] = $match[0]; | |
} else { | |
$output[$key]["maintenance_instructions"] = null; | |
} | |
$making_instructions_pattern = "/(?<=making-instructions=).*?(?=[\\\]n)/"; | |
if (preg_match($making_instructions_pattern, $matches, $match) ){ | |
$output[$key]["making_instructions"] = $match[0]; | |
} else { | |
$output[$key]["making_instructions"] = null; | |
} | |
$manufacturing_files_pattern = "/(?<=manufacturing-files=).*?(?=[\\\]n)/"; | |
if (preg_match($manufacturing_files_pattern, $matches, $match) ){ | |
$output[$key]["manufacturing_files"] = $match[0]; | |
} else { | |
$output[$key]["manufacturing_files"] = null; | |
} | |
$materials_pattern = "/(?<=materials=).*?(?=[\\\]n)/"; | |
if (preg_match($materials_pattern, $matches, $match) ){ | |
$output[$key]["materials"] = $match[0]; | |
} else { | |
$output[$key]["materials"] = null; | |
} | |
$operating_instructions_pattern = "/(?<=operating-instructions=).*?(?=[\\\]n)/"; | |
if (preg_match($operating_instructions_pattern, $matches, $match) ){ | |
$output[$key]["operating_instructions"] = $match[0]; | |
} else { | |
$output[$key]["operating_instructions"] = null; | |
} | |
$ported_from_pattern = "/(?<=ported-from=).*?(?=[\\\]n)/"; | |
if (preg_match($ported_from_pattern, $matches, $match) ){ | |
$output[$key]["ported_from"] = $match[0]; | |
} else { | |
$output[$key]["ported_from"] = null; | |
} | |
$project_link_pattern = "/(?<=project-link=).*?(?=[\\\]n)/"; | |
if (preg_match($project_link_pattern, $matches, $match) ){ | |
$output[$key]["project_link"] = $match[0]; | |
} else { | |
$output[$key]["project_link"] = null; | |
} | |
$quality_instructions_pattern = "/(?<=quality-instructions=).*?(?=[\\\]n)/"; | |
if (preg_match($quality_instructions_pattern, $matches, $match) ){ | |
$output[$key]["quality_instructions"] = $match[0]; | |
} else { | |
$output[$key]["quality_instructions"] = null; | |
} | |
$replicated_pattern = "/(?<=replicated=).*?(?=[\\\]n)/"; | |
if (preg_match($replicated_pattern, $matches, $match) ){ | |
$output[$key]["replicated"] = $match[0]; | |
} else { | |
$output[$key]["replicated"] = null; | |
} | |
$replicated_in_pattern = "/(?<=replicated-in=).*?(?=[\\\]n)/"; | |
if (preg_match($replicated_in_pattern, $matches, $match) ){ | |
$output[$key]["replicated_in"] = $match[0]; | |
} else { | |
$output[$key]["replicated_in"] = null; | |
} | |
$risk_assessment_pattern = "/(?<=risk-assessment=).*?(?=[\\\]n)/"; | |
if (preg_match($risk_assessment_pattern, $matches, $match) ){ | |
$output[$key]["risk_assessment"] = $match[0]; | |
} else { | |
$output[$key]["risk_assessment"] = null; | |
} | |
$schematics_pattern = "/(?<=schematics=).*?(?=[\\\]n)/"; | |
if (preg_match($schematics_pattern, $matches, $match) ){ | |
$output[$key]["schematics"] = $match[0]; | |
} else { | |
$output[$key]["schematics"] = null; | |
} | |
$sdg_pattern = "/(?<=sdg=).*?(?=[\\\]n)/"; | |
if (preg_match($sdg_pattern, $matches, $match) ){ | |
$output[$key]["sdg"] = $match[0]; | |
} else { | |
$output[$key]["sdg"] = null; | |
} | |
$software_pattern = "/(?<=software=).*?(?=[\\\]n)/"; | |
if (preg_match($software_pattern, $matches, $match) ){ | |
$output[$key]["software"] = $match[0]; | |
} else { | |
$output[$key]["v"] = null; | |
} | |
$status_pattern = "/(?<=status=).*?(?=[\\\]n)/"; | |
if (preg_match($status_pattern, $matches, $match) ){ | |
$output[$key]["status"] = $match[0]; | |
} else { | |
$output[$key]["status"] = null; | |
} | |
$tool_settings_pattern = "/(?<=tool-settings=).*?(?=[\\\]n)/"; | |
if (preg_match($tool_settings_pattern, $matches, $match) ){ | |
$output[$key]["tool_settings"] = $match[0]; | |
} else { | |
$output[$key]["tool_settings"] = null; | |
} | |
$tools_pattern = "/(?<=tools=).*?(?=[\\\]n)/"; | |
if (preg_match($tools_pattern, $matches, $match) ){ | |
$output[$key]["tools"] = $match[0]; | |
} else { | |
$output[$key]["tools"] = null; | |
} | |
$translators_pattern = "/(?<=translators=).*?(?=[\\\]n)/"; | |
if (preg_match($translators_pattern, $matches, $match) ){ | |
$output[$key]["translators"] = $match[0]; | |
} else { | |
$output[$key]["translators"] = null; | |
} | |
$uses_pattern = "/(?<=uses=).*?(?=[\\\]n)/"; | |
if (preg_match($uses_pattern, $matches, $match) ){ | |
$output[$key]["uses"] = $match[0]; | |
} else { | |
$output[$key]["uses"] = null; | |
} | |
} | |
$output_json = json_encode($output); | |
// echo $output_json; | |
echo("Success!"); | |
// https://www.geeksforgeeks.org/how-to-convert-json-file-into-csv-in-php/ | |
$jsonans = json_decode($output_json, true); | |
$csv = 'output.csv'; | |
$file_pointer = fopen($csv, 'w'); | |
$headers = ["name","affiliations","authors","bom","caption","cost","cost-currency","date-completed","date-published","date-updated","derivative-of","design-files","designed-in","disposal-instructions","health-classification","health-topic","image","instance-of","keywords","language","license-documentation","license-hardware","license-software","licensor","location","made","made-in","maintenance-instructions","making-instructions","manufacturing-files","materials","operating-instructions","ported-from","quality-instructions","replicated","replicated-in","risk-assessment","schematics","sdg","software","status","tool-settings","tools","translators","uses"]; | |
fputcsv($file_pointer , $headers); | |
foreach($jsonans as $i){ | |
fputcsv($file_pointer, $i); | |
} | |
fclose($file_pointer); | |
// Export as YAML | |
// header ('Content-Type: application/x-yaml'); | |
// header('Content-Disposition: attachment;filename="export.yaml"'); | |
// echo( | |
// "# Open know-how manifest v0.1\n---\n" | |
// ."date-created=" . $timestamp."\n\n" | |
// ."#Properties\n" | |
// ."title=".$page_name."\n" | |
// ."affiliations = ".$affiliations."\n" | |
// ."authors = ".$authors."\n" | |
// ."bom = ".$bom."\n" | |
// ."caption = ".$caption."\n" | |
// ."cost = ".$cost."\n" | |
// ."cost-currency = ".$cost_currency."\n" | |
// ."date-completed = ".$date_completed."\n" | |
// ."date-published = ".$date_published."\n" | |
// ."date-updated = ".$date_updated."\n" | |
// ."derivative-of = ".$derivative_of."\n" | |
// ."design-files = ".$design_files."\n" | |
// ."designed-in = ".$designed_in."\n" | |
// ."disposal-instructions = ".$disposal_instructions."\n" | |
// ."health-classification = ".$health_classification."\n" | |
// ."health-topic = ".$health_topic."\n" | |
// ."image = ".$image."\n" | |
// ."instance-of = ".$instance_of."\n" | |
// ."keywords = ".$keywords."\n" | |
// ."language = ".$language."\n" | |
// ."license\n" | |
// ." documentation = ".$license_documentation."\n" | |
// ." hardware = ".$license_hardware."\n" | |
// ." software = ".$license_software."\n" | |
// ."licensor\nname = ".$licensor."\n" | |
// ."location = ".$location."\n" | |
// ."made = ".$made."\n" | |
// ."made-in = ".$made_in."\n" | |
// ."maintenance-instructions = ".$maintenance_instructions."\n" | |
// ."making-instructions = ".$making_instructions."\n" | |
// ."manufacturing-files = ".$manufacturing_files."\n" | |
// ."materials = ".$materials."\n" | |
// ."operating-instructions = ".$operating_instructions."\n" | |
// ."ported-from = ".$ported_from."\n" | |
// ."quality-instructions = ".$quality_instructions."\n" | |
// ."replicated = ".$replicated."\n" | |
// ."replicated-in = ".$replicated_in."\n" | |
// ."risk-assessment = ".$risk_assessment."\n" | |
// ."schematics = ".$schematics."\n" | |
// ."sdg = ".$sdg."\n" | |
// ."software = ".$software."\n" | |
// ."status = ".$status."\n" | |
// ."tool-settings = ".$tool_settings."\n" | |
// ."tools = ".$tools."\n" | |
// ."translators = ".$translators."\n" | |
// ."uses = ".$uses."\n" | |
// ); | |
//https://www.appropedia.org/w/api.php?action=query&prop=revisions&titles=3D_printed_2_liter_winnower&rvslots=*&rvprop=content|timestamp&formatversion=2&curtimestamp=true | |
//https://www.w3.org/wiki/api.php?action=help&modules=query%2Brevisions | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment