Skip to content

Instantly share code, notes, and snippets.

@aquaminer
Created August 27, 2018 08:35
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aquaminer/e34bc82a09fd8c61eb0eb183ef7dbade to your computer and use it in GitHub Desktop.
Save aquaminer/e34bc82a09fd8c61eb0eb183ef7dbade to your computer and use it in GitHub Desktop.
Парсинг текста по шаблону, вырезано из https://github.com/aymanrb/php-unstructured-text-parser
<?php
class TextParser {
public function parse(string $string, string $template){
$text = $this->prepareText($string);
$template = $this->prepareTemplate($template);
return $this->extractData($text, $template);
}
protected function prepareText($txt){
//Remove all multiple whitespaces and replace it with single space
$txt = preg_replace('/\s+/', ' ', $txt);
return trim($txt);
}
protected function prepareTemplate($template){
$patterns = ['/\\\{%(.*)%\\\}/U', // 1 Replace all {%Var%}...
'/\s+/', // 2 Replace all white-spaces...
];
$replacements = ['(?<$1>.*)', // 1 ...with (?<Var>.*)
' ', // 2 ...with a single space
];
$template = preg_replace($patterns, $replacements, preg_quote($template, '/'));
return trim($template);
}
protected function extractData($text, $template){
//Extract the text based on the provided template using REGEX
preg_match('/' . $template . '/s', $text, $matches);
//Extract only the named parameters from the matched regex array
$keys = array_filter(array_keys($matches), 'is_string');
$matches = array_intersect_key($matches, array_flip($keys));
if (!empty($matches)) {
return $this->cleanExtractedData($matches);
}
return false;
}
protected function cleanExtractedData($matches){
return array_map([$this, 'cleanElement'], $matches);
}
protected function cleanElement($value){
return trim(strip_tags($value));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment