Created
August 27, 2018 08:35
-
-
Save aquaminer/e34bc82a09fd8c61eb0eb183ef7dbade to your computer and use it in GitHub Desktop.
Парсинг текста по шаблону, вырезано из https://github.com/aymanrb/php-unstructured-text-parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class TextParser { | |
public function parse(string $string, string $template){ | |
$text = $this->prepareText($string); | |
$template = $this->prepareTemplate($template); | |
return $this->extractData($text, $template); | |
} | |
protected function prepareText($txt){ | |
//Remove all multiple whitespaces and replace it with single space | |
$txt = preg_replace('/\s+/', ' ', $txt); | |
return trim($txt); | |
} | |
protected function prepareTemplate($template){ | |
$patterns = ['/\\\{%(.*)%\\\}/U', // 1 Replace all {%Var%}... | |
'/\s+/', // 2 Replace all white-spaces... | |
]; | |
$replacements = ['(?<$1>.*)', // 1 ...with (?<Var>.*) | |
' ', // 2 ...with a single space | |
]; | |
$template = preg_replace($patterns, $replacements, preg_quote($template, '/')); | |
return trim($template); | |
} | |
protected function extractData($text, $template){ | |
//Extract the text based on the provided template using REGEX | |
preg_match('/' . $template . '/s', $text, $matches); | |
//Extract only the named parameters from the matched regex array | |
$keys = array_filter(array_keys($matches), 'is_string'); | |
$matches = array_intersect_key($matches, array_flip($keys)); | |
if (!empty($matches)) { | |
return $this->cleanExtractedData($matches); | |
} | |
return false; | |
} | |
protected function cleanExtractedData($matches){ | |
return array_map([$this, 'cleanElement'], $matches); | |
} | |
protected function cleanElement($value){ | |
return trim(strip_tags($value)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment