Last active
December 4, 2019 01:42
-
-
Save num8er/7f60ca9e540888e5ac0e4b843742baa7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@ | |
Немного текста abc_New_New_New ! другой текст ghi_jkl и другой текст:;. # {} () [] abc_New ! @ | |
Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@ | |
Lego_New New_Friends_New Lego_New | |
--------- | |
Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@ | |
Du texte abc_New_New_New ! autre texte ghi_jkl et autre texte:;. # {} () [] abc_New ! @ | |
--------- | |
Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@ | |
Qualche testo abc_New_New_New ! altro testo ghi_jkl e altro testo:;. # {} () [] abc_New ! @ | |
--------- | |
N_Lego_New_N N_New_Friends_New_N N_Lego_New_N | |
Lego_New New_Friends_New Lego_New | |
--------- | |
Some N_Mercedes-Benz_N, N_Chick-fil-A_N text N_abc_New_New_New_N other text N_ghi_jkl_N and other text:;.#{}()[]N_McDonald's*!&HP_"hey"_"A"_N.!@ N_"Actions Speak Louder Than Words"_N | |
بعض Mercedes-Benz و Chick-fil-A نص abc_New_New_New نص آخر ghi_jkl ونص آخر: ؛. # {} () [] McDonald's*!&HP_"hey"_"A" ! @ "Actions Speak Louder Than Words" | |
--------- | |
Some N_ free escaped text "string" which has @ny S!mB0l _N | |
بعض free escaped text "string" which has @ny S!mB0l | |
--------- | |
this is not working and like I'm like, you know, three years later, and I'm like sort of a king in this, in this world that I exist in. N_Ξ_N N_Я_N N_$_N I N_❤_N NY | |
これは働いておらず、私はあなたが3年後に、知っている、と私は私が存在すЯ $ I ❤ NY | |
--------- | |
some text N_Ξ_N N_Я_N N_₱_N N_$_N N_£_N N_₩_N I N_❤_N NY N_/_N N_\_N N_$_N | |
一部のテキストΞ Я ₱ $ £ ₩ I ❤ NY / \ $ | |
--------- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require_once('vendor/autoload.php'); | |
use Google\Cloud\Translate\V2\TranslateClient; | |
class WrapperMatcher { | |
private $startPattern = 'N_'; | |
private $endPattern = '_N'; | |
/** | |
* WrapperMatcher constructor. | |
* | |
* @param string $startPattern Left pattern (opening wrapper). Example: "N_ some string _N" $startPattern = 'N_' | |
* @param string $endPattern Right pattern (closing wrapper). Example: "N_ some string _N" $startPattern = '_N' | |
*/ | |
public function __construct($startPattern, $endPattern) { | |
$startPattern = trim($startPattern); | |
if (!$startPattern) throw new Error('Invalid wrapper start pattern'); | |
$endPattern = trim($endPattern); | |
if (!$endPattern) throw new Error('Invalid wrapper start pattern'); | |
$this->startPattern = $startPattern; | |
$this->endPattern = $endPattern; | |
} | |
private function lookupPositions($searchString) { | |
$chunkSize = | |
strlen($this->startPattern) > strlen($this->endPattern) | |
? strlen($this->startPattern) | |
: strlen($this->endPattern); | |
$minChunkSize = | |
strlen($this->startPattern) > strlen($this->endPattern) | |
? strlen($this->endPattern) | |
: strlen($this->startPattern); | |
$foundPositions = []; | |
$cursor = 0; | |
do { | |
$chunk = substr($searchString, $cursor, $chunkSize); | |
if (mb_strpos($chunk, $this->startPattern) === 0) { | |
$foundPositions[] = ['type' => 'start', 'pos' => $cursor]; | |
$cursor += $chunkSize - $minChunkSize; | |
} | |
if (mb_strpos($chunk, $this->endPattern) === 0) { | |
$foundPositions[] = ['type' => 'end', 'pos' => $cursor]; | |
$cursor += $chunkSize - $minChunkSize; | |
} | |
$cursor++; | |
} | |
while($cursor < strlen($searchString)); | |
$previousPosition = null; | |
$nextPosition = null; | |
$positions = []; | |
foreach($foundPositions AS $i => $foundPosition) { | |
if ($foundPosition['type'] === 'start') { | |
$previousPosition = $foundPosition; | |
$nextPosition = null; | |
$c = $i + 1; | |
while ($nextPosition['type'] === 'end' || $c < sizeof($foundPositions)) { | |
if ($nextPosition) { | |
$previousPosition = $nextPosition; | |
} | |
$nextPosition = $foundPositions[$c]; | |
if ($nextPosition['type'] === 'start' || $c === sizeof($foundPositions) - 1) { | |
$positions[] = [ | |
$foundPosition['pos'], | |
$previousPosition['pos'] | |
]; | |
break; | |
} | |
$c++; | |
} | |
} | |
} | |
if ($nextPosition) { | |
$positions[sizeof($positions) - 1][1] = $nextPosition['pos']; | |
} | |
$positions = array_filter($positions, function($position) { | |
return $position[0] !== $position[1]; | |
}); | |
return $positions; | |
} | |
private function getMatches($searchString, $positions) { | |
$matches = [[], []]; | |
foreach ($positions AS $position) { | |
$matches[0][] = substr($searchString, $position[0], $position[1] - $position[0] + strlen($this->endPattern)); | |
$matches[1][] = substr($searchString, $position[0] + strlen($this->startPattern), $position[1] - $position[0] - strlen($this->startPattern)); | |
} | |
return $matches; | |
} | |
public function matchForString($searchString) { | |
if ($this->startPattern === $this->endPattern) { | |
$pattern = preg_quote($this->startPattern); | |
$regexp = '/'.$pattern.'(.*?)'.$pattern.'/u'; | |
preg_match_all($regexp, $searchString, $matches); | |
array_walk($matches[0], function(&$item) { | |
$item = trim($item); | |
}); | |
return $matches; | |
} | |
$positions = $this->lookupPositions($searchString); | |
return $this->getMatches($searchString, $positions); | |
} | |
} | |
class WrappedTranslator { | |
private $startPattern; | |
private $endPattern; | |
public function __construct($startPattern, $endPattern) { | |
$this->startPattern = $startPattern; | |
$this->endPattern = $endPattern; | |
} | |
private function replaceFirst($search, $replace, $subject) { | |
$pos = strpos($subject, $search); | |
if ($pos !== false) { | |
$before = substr($subject, 0, $pos); | |
$after = substr($subject, $pos + strlen($search)); | |
$subject = $before.$replace.$after; | |
} | |
return $subject; | |
} | |
private $placeholderLeftWrapper = '<span translate="no">'; | |
private $placeholderRightWrapper = '</span>'; | |
private $whitespaceLeft = ''; | |
private $whitespaceRight = ''; | |
private function preserveWrappedPhrases($text, &$matches) { | |
$matches[2] = []; | |
$placeholdingElement = 0; | |
foreach ($matches[0] AS $i => $wrapped) { | |
$placeholdingElement++; | |
$placeholder = $this->placeholderLeftWrapper . $placeholdingElement . $this->placeholderRightWrapper; | |
$matches[2][$i] = $placeholdingElement; | |
$text = $this->replaceFirst($wrapped, $placeholder, $text); | |
} | |
return $text; | |
} | |
private function putWrappedPhrasesBack($text, &$matches) { | |
foreach ($matches[2] AS $i => $placeholdingElement) { | |
$placeholder = $this->placeholderLeftWrapper . $this->whitespaceLeft . $placeholdingElement . $this->whitespaceRight . $this->placeholderRightWrapper; | |
$phrase = $matches[1][$i]; | |
$text = $this->replaceFirst($placeholder, $phrase, $text); | |
} | |
return $text; | |
} | |
public function translate($originalTexts, $targetLanguage) { | |
$preservedTexts = []; | |
$allMatches = []; | |
foreach ($originalTexts AS $t => $originalText) { | |
$matches = $this->matchWrappedPhares($originalText); | |
$preservedTexts[] = $this->preserveWrappedPhrases($originalText, $matches); | |
$allMatches[$t] = [[], [], []]; | |
foreach ($matches[0] AS $i => $match) { | |
$allMatches[$t][0][] = $matches[0][$i]; | |
$allMatches[$t][1][] = $matches[1][$i]; | |
$allMatches[$t][2][] = $matches[2][$i]; | |
} | |
} | |
$translatedTexts = $this->googleTranslateIt($preservedTexts, $targetLanguage); | |
$result = []; | |
foreach ($translatedTexts AS $t => $translatedText) { | |
$result[$t] = $this->putWrappedPhrasesBack($translatedText, $allMatches[$t]); | |
} | |
return $result; | |
} | |
private function matchWrappedPhares($text) { | |
return (new WrapperMatcher($this->startPattern, $this->endPattern))->matchForString($text); | |
} | |
private function googleTranslateIt($texts, $target) { | |
// ToDo: REPLACE THIS WITH YOUR OWN TRANSLATE METHOD THAT RETURNS TRANSLATED STRING | |
$translate = new TranslateClient([ | |
'key' => 'AIzaSyCqy1m_REtoq5aZb30gKpHnRPTvhmM3_-M' | |
]); | |
$result = $translate->translateBatch($texts, ['target' => $target, 'format' => 'html']); | |
return array_map(function($result) {return $result['text'];}, $result); | |
} | |
} | |
$originalText1 = 'Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@'; | |
$originalText2 = "N_Lego_New_N N_New_Friends_New_N N_Lego_New_N "; | |
$translatedTexts = (new WrappedTranslator('N_', '_N'))->translate([$originalText1, $originalText2], 'ru'); | |
echo $originalText1."\n"; | |
echo $translatedTexts[0]."\n"; | |
echo $originalText1."\n"; | |
echo $translatedTexts[1]."\n"; | |
echo "\n---------\n"; | |
$originalText = 'Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@'; | |
$translatedText = (new WrappedTranslator('N_', '_N'))->translate([$originalText], 'fr'); | |
echo $originalText."\n"; | |
echo $translatedText[0]."\n"; | |
echo "\n---------\n"; | |
$originalText = 'Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@'; | |
$translatedText = (new WrappedTranslator('N_', '_N'))->translate([$originalText], 'it'); | |
echo $originalText."\n"; | |
echo $translatedText[0]."\n"; | |
echo "\n---------\n"; | |
$originalText = "N_Lego_New_N N_New_Friends_New_N N_Lego_New_N "; | |
$translatedText = (new WrappedTranslator('N_', '_N'))->translate([$originalText], 'ar'); | |
echo $originalText."\n"; | |
echo $translatedText[0]."\n"; | |
echo "\n---------\n"; | |
$originalText = 'Some N_Mercedes-Benz_N, N_Chick-fil-A_N text N_abc_New_New_New_N other text N_ghi_jkl_N and other text:;.#{}()[]N_McDonald\'s*!&HP_"hey"_"A"_N.!@ N_"Actions Speak Louder Than Words"_N'; | |
$translatedText = (new WrappedTranslator('N_', '_N'))->translate([$originalText], 'ar'); | |
echo $originalText."\n"; | |
echo $translatedText[0]."\n"; | |
echo "\n---------\n"; | |
$originalText = 'Some N_ free escaped text "string" which has @ny S!mB0l _N'; | |
$translatedText = (new WrappedTranslator('N_', '_N'))->translate([$originalText], 'ar'); | |
echo $originalText."\n"; | |
echo $translatedText[0]."\n"; | |
echo "\n---------\n"; | |
$originalText = 'this is not working and like I\'m like, you know, three years later, and I\'m like sort of a king in this, in this world that I exist in. N_Ξ_N N_Я_N N_$_N I N_❤_N NY'; | |
$translatedText = (new WrappedTranslator('N_', '_N'))->translate([$originalText], 'ja'); | |
echo $originalText."\n"; | |
echo $translatedText[0]."\n"; | |
echo "\n---------\n"; | |
$originalText = 'some text N_Ξ_N N_Я_N N_₱_N N_$_N N_£_N N_₩_N I N_❤_N NY N_/_N N_\_N N_$_N'; | |
$translatedText = (new WrappedTranslator('N_', '_N'))->translate([$originalText], 'ja'); | |
echo $originalText."\n"; | |
echo $translatedText[0]."\n"; | |
echo "\n---------\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment