Skip to content

Instantly share code, notes, and snippets.

@paunin
Last active August 29, 2015 14:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save paunin/7507c9c5989d36034faa to your computer and use it in GitHub Desktop.
Save paunin/7507c9c5989d36034faa to your computer and use it in GitHub Desktop.
Comarer 2 files
<?php
namespace Compare;
/**
* нужно написать программу которая сравнивает два текстовых файла (терабайтных)
* и выводит в третий файл строки, которые есть в первом, но нет во втором.
*
* To run this utility just run `php ./Compare.php input_file1.txt input_file2.txt output_file.txt`
*/
class Compare
{
/**
* @var int Input file reading in bytes
*/
private $readLength = 1048576; // 1024 * 1024
/**
* @var resource first input file handler
*/
private $inputFile1;
/**
* @var resource second input file handler
*/
private $inputFile2;
/**
* @var resource output file handler
*/
private $outputFile;
/**
* Simple construct and open
*
* @param $inputFile1
* @param $inputFile2
* @param $outputFile
* @throws \Exception
*/
public function __construct($inputFile1, $inputFile2, $outputFile) // here you can add configs for redefine params
{
if (!$this->inputFile1 = @fopen($inputFile1, 'r')) {
throw new \Exception('Can\'t open first input file');
}
if (!$this->inputFile2 = @fopen($inputFile2, 'r')) {
throw new \Exception('Can\'t open second input file');
}
if (!$this->outputFile = @fopen($outputFile, 'a')) {
throw new \Exception('Can\'t open output file');
}
ftruncate($this->outputFile, 0);
rewind($this->outputFile);
}
/**
* Close open resources
*/
public function __destruct()
{
fclose($this->inputFile1);
fclose($this->inputFile2);
fclose($this->outputFile);
}
/**
* Main runner
*/
public function run()
{
$fpos = 0;
while (!feof($this->inputFile1)) {
$this->existString($fpos) ?
$this->skipString($fpos) :
$this->writeString($fpos);
$fpos = ftell($this->inputFile1);
}
}
/**
* Function to find string (start in $pos) from first file in second
*
* @param $pos
* @return bool
*/
public function existString($pos)
{
$equalPart = true;
fseek($this->inputFile1, $pos);
rewind($this->inputFile2);
while (!feof($this->inputFile2)) { // read every line of file 2
$rightBlock = fgets($this->inputFile2, $this->readLength);
//if string already not equal we need to finish string in file 2
if (!$equalPart) {
if ($this->eol($rightBlock) || feof($this->inputFile2)) {
fseek($this->inputFile1, $pos);
$equalPart = true; // we want to believe that next iteration make true
}
continue;
}
$leftBlock = fgets($this->inputFile1, $this->readLength);
if ($leftBlock !== $rightBlock) {
if ($this->eol($rightBlock) || feof($this->inputFile2)) { //second file string end too early
fseek($this->inputFile1, $pos);
$equalPart = true;
} else {
$equalPart = false;
}
} elseif ($this->eol($leftBlock) || feof($this->inputFile1)) {
return true;
}
}
return false; //So we have no string in second file at all
}
/**
* Write string from input file
*
* @param $pos String position for write
*/
public function writeString($pos)
{
fseek($this->inputFile1, $pos);
while (!feof($this->inputFile1)) {
$block = fgets($this->inputFile1, $this->readLength);
fwrite($this->outputFile, $block);
if ($this->eol($block) || feof($this->inputFile1)) {
break;
}
}
}
/**
* Skip string from input file
*
* @param $pos String position for write
*/
public function skipString($pos)
{
fseek($this->inputFile1, $pos);
while (!feof($this->inputFile1)) {
$block = fgets($this->inputFile1, $this->readLength);
if ($this->eol($block) || feof($this->inputFile1)) {
break;
}
}
}
/**
* Check if block has end of line
*
* @param $block
* @return bool
*/
public function eol($block)
{
if (preg_match('/(\n|\r|\r\n)$/', $block)) { //End Of Line - last block in string
return true;
} else {
return false;
}
}
}
//-------------------------------- <<< PROGRAM ------------------------------------
if (empty($argv[1])) {
die("Please show me first input file\n");
} else {
$input1 = $argv[1];
}
if (empty($argv[2])) {
die("Please show me second input file\n");
} else {
$input2 = $argv[2];
}
if (empty($argv[3])) {
die("Please show me output file\n");
} else {
$output = $argv[3];
}
try {
$comparer = new Compare($input1, $input2, $output);
$comparer->run();
} catch (\Exception $e) { //use native Exceptions
echo "Error: {$e->getMessage()}\n";
}
//-------------------------------- >>> PROGRAM ------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment