Skip to content

Instantly share code, notes, and snippets.

@ba0918
Last active April 30, 2017 15:36
Show Gist options
  • Save ba0918/dd86ecdc2a2fac245855ffc04a9df59c to your computer and use it in GitHub Desktop.
Save ba0918/dd86ecdc2a2fac245855ffc04a9df59c to your computer and use it in GitHub Desktop.
URLごにょごにょするやつ
<?php
$text = <<< EOT
http://example.com/path/to?key=value#hash
http://invalid.example.com/path/to?key=value#hash
EOT;
$validator = new UrlDomainValidator(['example.com']); // register domain to whitelist
$extractor = new UrlExtractor($text);
/* out
string(41) "http://example.com/path/to?key=value#hash"
*/
$url = $extractor->first();
/* out
array(2) {
[0]=>
string(41) "http://example.com/path/to?key=value#hash"
[1]=>
string(49) "http://invalid.example.com/path/to?key=value#hash"
}
*/
$urls = $extractor->all();
/* out
<a href="http://example.com/path/to?key=value#hash">http://example.com/path/to?key=value#hash</a
http://invalid.example.com/path/to?key=value#hash
*/
// URLをリンクにするやつ
$text = $extractor->replace(function($match) use($validator) {
return $validator->validate($match[0]) ? sprintf('<a href="%s">%s</a>', $match[0], $match[0]) : $match[0];
});
/* out
string(92) "http://example.com/path/to?key=value#hash
****://*******************/path/to?key=value#hash"
*/
// 許可ドメイン以外を含むURLを伏せ字にするやつ
$text = $extractor->replace(function($match) use($validator) {
$subject = $match[0];
if (!$validator->validate($subject)) {
$subject = str_replace($match[2], str_repeat('*', mb_strlen($match[2])), $subject);
$subject = str_replace($match[3], str_repeat('*', mb_strlen($match[3])), $subject);
}
return $subject;
});
<?php
class UrlDomainValidator
{
protected $domains = [];
public function __construct(array $domains = [])
{
$this->domains = $domains;
}
public function validate(string $url): bool
{
return $this->contains($url);
}
private function contains(string $url): bool
{
$parsed = parse_url($url);
if (!$parsed || !isset($parsed['host'])) {
return false;
}
if (!in_array($parsed['host'], $this->domains, true)) {
return false;
}
return true;
}
}
<?php
use PHPUnit\Framework\TestCase;
class UrlDomainValidatorTest extends TestCase
{
/**
* @dataProvider urlsProvider
*/
public function test_validate($url, $expected)
{
$validator = new UrlDomainValidator(['example.com', 'sample.example.com']);
$this->assertEquals($expected, $validator->validate($url));
}
public function urlsProvider()
{
return [
['http://example.com/', true],
['//example.com/', true],
['http://example.com:8080/', true],
['http://ba0918:pass@example.com:8080/', true],
['http://ba0918:pass@sample.example.com:8080/', true],
['http://ba0918:pass@lie.sample.example.com:8080/', false],
// invalid domain
['http://foo.bar.com/path/to', false],
// invalid url
['http:/example.com/', false],
];
}
}
<?php
class UrlExtractor
{
private $pattern = '/((http|https):)?(?:\/\/)([-\+;:&@=\$,\.\w_]+)(\/[-\+~%\/\.\w_]*)?(\?[-\+=&;%@\w_]*)?(#[\w]*)?/';
private $subject;
public function __construct(string $subject)
{
$this->subject = $subject ?? '';
}
public function first(): ?string
{
$result = $this->extract();
return $result[0] ?? null;
}
public function all(): array
{
$result = $this->extractAll();
return $result[0] ?? [];
}
public function replace(callable $callback): string
{
return preg_replace_callback($this->pattern, $callback, $this->subject);
}
private function extract(): array
{
if (preg_match($this->pattern, $this->subject, $matches)) {
return $matches;
}
return [];
}
private function extractAll(): array
{
if (preg_match_all($this->pattern, $this->subject, $matches)) {
return $matches;
}
return [];
}
}
<?php
use PHPUnit\Framework\TestCase;
class UrlExtractorTest extends TestCase
{
/**
* @dataProvider urlsProvider
*/
public function testFirst($subject, $expected)
{
$extractor = new UrlExtractor($subject);
$this->assertEquals($expected, $extractor->first());
}
public function urlsProvider()
{
return [
/* OK cases*/
['//hoge.example.com', '//hoge.example.com'],
['asdf//hoge.example.com', '//hoge.example.com'],
['http://hoge.example.com', 'http://hoge.example.com'],
['http://hoge.example.com/', 'http://hoge.example.com/'],
['http://ba0918:pass@hoge.example.com', 'http://ba0918:pass@hoge.example.com'],
['http://hoge.example.com/path/to', 'http://hoge.example.com/path/to'],
['http://hoge.example.com/path/to/', 'http://hoge.example.com/path/to/'],
['http://hoge.example.com/path/to/?key=value', 'http://hoge.example.com/path/to/?key=value'],
['http://hoge.example.com/path/to/?key=value&key2=value2', 'http://hoge.example.com/path/to/?key=value&key2=value2'],
['http://hoge.example.com/path/to/?key=value&key2=value2#hash', 'http://hoge.example.com/path/to/?key=value&key2=value2#hash'],
['http://hoge.example.com/?key=value', 'http://hoge.example.com/?key=value'],
['http://hoge.example.com/#hash', 'http://hoge.example.com/#hash'],
['http://hoge.example.com/#hash<br>', 'http://hoge.example.com/#hash'],
/* NG cases */
// invalid scheme
['http:/hoge.example.com/#hash', ''],
['invalid-scheme://hoge.example.com/#hash', '//hoge.example.com/#hash'],
// invalid query string
['http://hoge.example.com/&key=value', 'http://hoge.example.com/'],
// unsupported multibyte domain name
['http://日本語.jp/?key=value', ''],
];
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment