Skip to content

Instantly share code, notes, and snippets.

@num8er
Last active December 4, 2019 01:42
Show Gist options
  • Save num8er/7f60ca9e540888e5ac0e4b843742baa7 to your computer and use it in GitHub Desktop.
Save num8er/7f60ca9e540888e5ac0e4b843742baa7 to your computer and use it in GitHub Desktop.
Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@
Немного текста abc_New_New_New ! другой текст ghi_jkl и другой текст:;. # {} () [] abc_New ! @
Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@
Lego_New New_Friends_New Lego_New
---------
Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@
Du texte abc_New_New_New ! autre texte ghi_jkl et autre texte:;. # {} () [] abc_New ! @
---------
Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@
Qualche testo abc_New_New_New ! altro testo ghi_jkl e altro testo:;. # {} () [] abc_New ! @
---------
N_Lego_New_N N_New_Friends_New_N N_Lego_New_N
Lego_New New_Friends_New Lego_New
---------
Some N_Mercedes-Benz_N, N_Chick-fil-A_N text N_abc_New_New_New_N other text N_ghi_jkl_N and other text:;.#{}()[]N_McDonald's*!&HP_"hey"_"A"_N.!@ N_"Actions Speak Louder Than Words"_N
بعض Mercedes-Benz و Chick-fil-A نص abc_New_New_New نص آخر ghi_jkl ونص آخر: ؛. # {} () [] McDonald's*!&HP_"hey"_"A" ! @ "Actions Speak Louder Than Words"
---------
Some N_ free escaped text "string" which has @ny S!mB0l _N
بعض free escaped text "string" which has @ny S!mB0l
---------
this is not working and like I'm like, you know, three years later, and I'm like sort of a king in this, in this world that I exist in. N_Ξ_N N_Я_N N_$_N I N_❤_N NY
これは働いておらず、私はあなたが3年後に、知っている、と私は私が存在すЯ $ I ❤ NY
---------
some text N_Ξ_N N_Я_N N_₱_N N_$_N N_£_N N_₩_N I N_❤_N NY N_/_N N_\_N N_$_N
一部のテキストΞ Я ₱ $ £ ₩ I ❤ NY / \ $
---------
<?php
require_once('vendor/autoload.php');
use Google\Cloud\Translate\V2\TranslateClient;
class WrapperMatcher {
private $startPattern = 'N_';
private $endPattern = '_N';
/**
* WrapperMatcher constructor.
*
* @param string $startPattern Left pattern (opening wrapper). Example: "N_ some string _N" $startPattern = 'N_'
* @param string $endPattern Right pattern (closing wrapper). Example: "N_ some string _N" $startPattern = '_N'
*/
public function __construct($startPattern, $endPattern) {
$startPattern = trim($startPattern);
if (!$startPattern) throw new Error('Invalid wrapper start pattern');
$endPattern = trim($endPattern);
if (!$endPattern) throw new Error('Invalid wrapper start pattern');
$this->startPattern = $startPattern;
$this->endPattern = $endPattern;
}
private function lookupPositions($searchString) {
$chunkSize =
strlen($this->startPattern) > strlen($this->endPattern)
? strlen($this->startPattern)
: strlen($this->endPattern);
$minChunkSize =
strlen($this->startPattern) > strlen($this->endPattern)
? strlen($this->endPattern)
: strlen($this->startPattern);
$foundPositions = [];
$cursor = 0;
do {
$chunk = substr($searchString, $cursor, $chunkSize);
if (mb_strpos($chunk, $this->startPattern) === 0) {
$foundPositions[] = ['type' => 'start', 'pos' => $cursor];
$cursor += $chunkSize - $minChunkSize;
}
if (mb_strpos($chunk, $this->endPattern) === 0) {
$foundPositions[] = ['type' => 'end', 'pos' => $cursor];
$cursor += $chunkSize - $minChunkSize;
}
$cursor++;
}
while($cursor < strlen($searchString));
$previousPosition = null;
$nextPosition = null;
$positions = [];
foreach($foundPositions AS $i => $foundPosition) {
if ($foundPosition['type'] === 'start') {
$previousPosition = $foundPosition;
$nextPosition = null;
$c = $i + 1;
while ($nextPosition['type'] === 'end' || $c < sizeof($foundPositions)) {
if ($nextPosition) {
$previousPosition = $nextPosition;
}
$nextPosition = $foundPositions[$c];
if ($nextPosition['type'] === 'start' || $c === sizeof($foundPositions) - 1) {
$positions[] = [
$foundPosition['pos'],
$previousPosition['pos']
];
break;
}
$c++;
}
}
}
if ($nextPosition) {
$positions[sizeof($positions) - 1][1] = $nextPosition['pos'];
}
$positions = array_filter($positions, function($position) {
return $position[0] !== $position[1];
});
return $positions;
}
private function getMatches($searchString, $positions) {
$matches = [[], []];
foreach ($positions AS $position) {
$matches[0][] = substr($searchString, $position[0], $position[1] - $position[0] + strlen($this->endPattern));
$matches[1][] = substr($searchString, $position[0] + strlen($this->startPattern), $position[1] - $position[0] - strlen($this->startPattern));
}
return $matches;
}
public function matchForString($searchString) {
if ($this->startPattern === $this->endPattern) {
$pattern = preg_quote($this->startPattern);
$regexp = '/'.$pattern.'(.*?)'.$pattern.'/u';
preg_match_all($regexp, $searchString, $matches);
array_walk($matches[0], function(&$item) {
$item = trim($item);
});
return $matches;
}
$positions = $this->lookupPositions($searchString);
return $this->getMatches($searchString, $positions);
}
}
class WrappedTranslator {
private $startPattern;
private $endPattern;
public function __construct($startPattern, $endPattern) {
$this->startPattern = $startPattern;
$this->endPattern = $endPattern;
}
private function replaceFirst($search, $replace, $subject) {
$pos = strpos($subject, $search);
if ($pos !== false) {
$before = substr($subject, 0, $pos);
$after = substr($subject, $pos + strlen($search));
$subject = $before.$replace.$after;
}
return $subject;
}
private $placeholderLeftWrapper = '<span translate="no">';
private $placeholderRightWrapper = '</span>';
private $whitespaceLeft = '';
private $whitespaceRight = '';
private function preserveWrappedPhrases($text, &$matches) {
$matches[2] = [];
$placeholdingElement = 0;
foreach ($matches[0] AS $i => $wrapped) {
$placeholdingElement++;
$placeholder = $this->placeholderLeftWrapper . $placeholdingElement . $this->placeholderRightWrapper;
$matches[2][$i] = $placeholdingElement;
$text = $this->replaceFirst($wrapped, $placeholder, $text);
}
return $text;
}
private function putWrappedPhrasesBack($text, &$matches) {
foreach ($matches[2] AS $i => $placeholdingElement) {
$placeholder = $this->placeholderLeftWrapper . $this->whitespaceLeft . $placeholdingElement . $this->whitespaceRight . $this->placeholderRightWrapper;
$phrase = $matches[1][$i];
$text = $this->replaceFirst($placeholder, $phrase, $text);
}
return $text;
}
public function translate($originalTexts, $targetLanguage) {
$preservedTexts = [];
$allMatches = [];
foreach ($originalTexts AS $t => $originalText) {
$matches = $this->matchWrappedPhares($originalText);
$preservedTexts[] = $this->preserveWrappedPhrases($originalText, $matches);
$allMatches[$t] = [[], [], []];
foreach ($matches[0] AS $i => $match) {
$allMatches[$t][0][] = $matches[0][$i];
$allMatches[$t][1][] = $matches[1][$i];
$allMatches[$t][2][] = $matches[2][$i];
}
}
$translatedTexts = $this->googleTranslateIt($preservedTexts, $targetLanguage);
$result = [];
foreach ($translatedTexts AS $t => $translatedText) {
$result[$t] = $this->putWrappedPhrasesBack($translatedText, $allMatches[$t]);
}
return $result;
}
private function matchWrappedPhares($text) {
return (new WrapperMatcher($this->startPattern, $this->endPattern))->matchForString($text);
}
private function googleTranslateIt($texts, $target) {
// ToDo: REPLACE THIS WITH YOUR OWN TRANSLATE METHOD THAT RETURNS TRANSLATED STRING
$translate = new TranslateClient([
'key' => 'AIzaSyCqy1m_REtoq5aZb30gKpHnRPTvhmM3_-M'
]);
$result = $translate->translateBatch($texts, ['target' => $target, 'format' => 'html']);
return array_map(function($result) {return $result['text'];}, $result);
}
}
$originalText1 = 'Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@';
$originalText2 = "N_Lego_New_N N_New_Friends_New_N N_Lego_New_N ";
$translatedTexts = (new WrappedTranslator('N_', '_N'))->translate([$originalText1, $originalText2], 'ru');
echo $originalText1."\n";
echo $translatedTexts[0]."\n";
echo $originalText1."\n";
echo $translatedTexts[1]."\n";
echo "\n---------\n";
$originalText = 'Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@';
$translatedText = (new WrappedTranslator('N_', '_N'))->translate([$originalText], 'fr');
echo $originalText."\n";
echo $translatedText[0]."\n";
echo "\n---------\n";
$originalText = 'Some text N_abc_New_New_New_N! other text N_ghi_jkl_N and other text:;.#{}()[]N_abc_New_N.!@';
$translatedText = (new WrappedTranslator('N_', '_N'))->translate([$originalText], 'it');
echo $originalText."\n";
echo $translatedText[0]."\n";
echo "\n---------\n";
$originalText = "N_Lego_New_N N_New_Friends_New_N N_Lego_New_N ";
$translatedText = (new WrappedTranslator('N_', '_N'))->translate([$originalText], 'ar');
echo $originalText."\n";
echo $translatedText[0]."\n";
echo "\n---------\n";
$originalText = 'Some N_Mercedes-Benz_N, N_Chick-fil-A_N text N_abc_New_New_New_N other text N_ghi_jkl_N and other text:;.#{}()[]N_McDonald\'s*!&HP_"hey"_&quot;A&quot;_N.!@ N_"Actions Speak Louder Than Words"_N';
$translatedText = (new WrappedTranslator('N_', '_N'))->translate([$originalText], 'ar');
echo $originalText."\n";
echo $translatedText[0]."\n";
echo "\n---------\n";
$originalText = 'Some N_ free escaped text "string" which has @ny S!mB0l _N';
$translatedText = (new WrappedTranslator('N_', '_N'))->translate([$originalText], 'ar');
echo $originalText."\n";
echo $translatedText[0]."\n";
echo "\n---------\n";
$originalText = 'this is not working and like I\'m like, you know, three years later, and I\'m like sort of a king in this, in this world that I exist in. N_Ξ_N N_Я_N N_$_N I N_❤_N NY';
$translatedText = (new WrappedTranslator('N_', '_N'))->translate([$originalText], 'ja');
echo $originalText."\n";
echo $translatedText[0]."\n";
echo "\n---------\n";
$originalText = 'some text N_Ξ_N N_Я_N N_₱_N N_$_N N_£_N N_₩_N I N_❤_N NY N_/_N N_\_N N_$_N';
$translatedText = (new WrappedTranslator('N_', '_N'))->translate([$originalText], 'ja');
echo $originalText."\n";
echo $translatedText[0]."\n";
echo "\n---------\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment