Last active
December 12, 2021 10:33
-
-
Save yosun/d1ef6ef56943bd2417b07f4970ff7447 to your computer and use it in GitHub Desktop.
large json file tools php specific
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php ini_set('memory_limit','512M');set_time_limit(0); | |
ini_set('display_errors', 1); | |
ini_set('display_startup_errors', 1); | |
error_reporting(E_ALL); | |
require_once( 'vendor/autoload.php' ); | |
// couldn't JsonMachines to work for poly_dump json (250M, with verbose objects) | |
//use \JsonMachine\JsonMachine; | |
//$string = file_get_contents("test.json"); | |
//$data = JsonMachine::fromFile('metadata_unique_all.json'); | |
use pcrov\JsonReader\JsonReader; | |
$testfile = __DIR__.'/metadata_unique_all.json'; | |
$reader = new JsonReader(); | |
$reader->open($testfile); | |
$reader->read();$reader->read(); | |
//foreach ($data as $datum) { // replaced with | |
while($reader->type() === JsonReader::OBJECT) { | |
$datum = $reader->value(); | |
// do stuff! | |
$reader->next(); | |
} | |
$reader->close(); | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// check if links are valid ... for example before piping to s3 | |
function url_exists($url) { | |
if (getHttpResponseCode_using_getheaders($url)==200)return 1; | |
return 0; | |
} | |
function getHttpResponseCode_using_curl($url, $followredirects = true){ | |
// returns int responsecode, or false (if url does not exist or connection timeout occurs) | |
// NOTE: could potentially take up to 0-30 seconds , blocking further code execution (more or less depending on connection, target site, and local timeout settings)) | |
// if $followredirects == false: return the FIRST known httpcode (ignore redirects) | |
// if $followredirects == true : return the LAST known httpcode (when redirected) | |
if(! $url || ! is_string($url)){ | |
return false; | |
} | |
$ch = @curl_init($url); | |
if($ch === false){ | |
return false; | |
} | |
@curl_setopt($ch, CURLOPT_HEADER ,true); // we want headers | |
@curl_setopt($ch, CURLOPT_NOBODY ,true); // dont need body | |
@curl_setopt($ch, CURLOPT_RETURNTRANSFER ,true); // catch output (do NOT print!) | |
if($followredirects){ | |
@curl_setopt($ch, CURLOPT_FOLLOWLOCATION ,true); | |
@curl_setopt($ch, CURLOPT_MAXREDIRS ,10); // fairly random number, but could prevent unwanted endless redirects with followlocation=true | |
}else{ | |
@curl_setopt($ch, CURLOPT_FOLLOWLOCATION ,false); | |
} | |
// @curl_setopt($ch, CURLOPT_CONNECTTIMEOUT ,5); // fairly random number (seconds)... but could prevent waiting forever to get a result | |
// @curl_setopt($ch, CURLOPT_TIMEOUT ,6); // fairly random number (seconds)... but could prevent waiting forever to get a result | |
// @curl_setopt($ch, CURLOPT_USERAGENT ,"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1"); // pretend we're a regular browser | |
@curl_exec($ch); | |
if(@curl_errno($ch)){ // should be 0 | |
@curl_close($ch); | |
return false; | |
} | |
$code = @curl_getinfo($ch, CURLINFO_HTTP_CODE); // note: php.net documentation shows this returns a string, but really it returns an int | |
@curl_close($ch); | |
return $code; | |
} | |
function getHttpResponseCode_using_getheaders($url, $followredirects = true){ | |
// returns string responsecode, or false if no responsecode found in headers (or url does not exist) | |
// NOTE: could potentially take up to 0-30 seconds , blocking further code execution (more or less depending on connection, target site, and local timeout settings)) | |
// if $followredirects == false: return the FIRST known httpcode (ignore redirects) | |
// if $followredirects == true : return the LAST known httpcode (when redirected) | |
if(! $url || ! is_string($url)){ | |
return false; | |
} | |
$headers = @get_headers($url); | |
if($headers && is_array($headers)){ | |
if($followredirects){ | |
// we want the last errorcode, reverse array so we start at the end: | |
$headers = array_reverse($headers); | |
} | |
foreach($headers as $hline){ | |
// search for things like "HTTP/1.1 200 OK" , "HTTP/1.0 200 OK" , "HTTP/1.1 301 PERMANENTLY MOVED" , "HTTP/1.1 400 Not Found" , etc. | |
// note that the exact syntax/version/output differs, so there is some string magic involved here | |
if(preg_match('/^HTTP\/\S+\s+([1-9][0-9][0-9])\s+.*/', $hline, $matches) ){// "HTTP/*** ### ***" | |
$code = $matches[1]; | |
return $code; | |
} | |
} | |
// no HTTP/xxx found in headers: | |
return false; | |
} | |
// no headers : | |
return false; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// testing | |
/* | |
$filepath = 'https://poly.googleapis.com/downloads/fp/1602064148752848/4vds6twPsb7/c8Ksvo0_VjG/C:/Users/PC/Desktop/pom/pimSurface_Color.png'; | |
$filenamewithextension = 'C:/Users/PC/Desktop/pom/pimSurface_Color.png'; | |
*/ | |
if(!isset($filepath))die('filepath?'); | |
if(!isset($filenamewithextension))die('filenamewithextension not set'); | |
$filenamewithextension = str_replace('\/','',$filenamewithextension); | |
require_once( 'vendor/autoload.php' ); | |
use Aws\S3\S3Client; | |
if(url_exists($filepath)){ | |
$credentials = new Aws\Credentials\Credentials(awsAccessKey, awsSecretKey); | |
$s3 = new S3Client([ | |
'version' => 'latest', | |
'region' => 'us-east-1', | |
'credentials'=>$credentials]); | |
if(!isset($bucketName))$bucketName = 'polydumpcurated'; | |
@$s3->putObject([ | |
'Key'=>$filenamewithextension, | |
'Bucket'=>$bucketName, | |
'Body'=>@file_get_contents($filepath) | |
]); | |
} | |
//unset($s3); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment