Last active
April 30, 2017 15:36
-
-
Save ba0918/dd86ecdc2a2fac245855ffc04a9df59c to your computer and use it in GitHub Desktop.
URLごにょごにょするやつ
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$text = <<< EOT | |
http://example.com/path/to?key=value#hash | |
http://invalid.example.com/path/to?key=value#hash | |
EOT; | |
$validator = new UrlDomainValidator(['example.com']); // register domain to whitelist | |
$extractor = new UrlExtractor($text); | |
/* out | |
string(41) "http://example.com/path/to?key=value#hash" | |
*/ | |
$url = $extractor->first(); | |
/* out | |
array(2) { | |
[0]=> | |
string(41) "http://example.com/path/to?key=value#hash" | |
[1]=> | |
string(49) "http://invalid.example.com/path/to?key=value#hash" | |
} | |
*/ | |
$urls = $extractor->all(); | |
/* out | |
<a href="http://example.com/path/to?key=value#hash">http://example.com/path/to?key=value#hash</a | |
http://invalid.example.com/path/to?key=value#hash | |
*/ | |
// URLをリンクにするやつ | |
$text = $extractor->replace(function($match) use($validator) { | |
return $validator->validate($match[0]) ? sprintf('<a href="%s">%s</a>', $match[0], $match[0]) : $match[0]; | |
}); | |
/* out | |
string(92) "http://example.com/path/to?key=value#hash | |
****://*******************/path/to?key=value#hash" | |
*/ | |
// 許可ドメイン以外を含むURLを伏せ字にするやつ | |
$text = $extractor->replace(function($match) use($validator) { | |
$subject = $match[0]; | |
if (!$validator->validate($subject)) { | |
$subject = str_replace($match[2], str_repeat('*', mb_strlen($match[2])), $subject); | |
$subject = str_replace($match[3], str_repeat('*', mb_strlen($match[3])), $subject); | |
} | |
return $subject; | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class UrlDomainValidator | |
{ | |
protected $domains = []; | |
public function __construct(array $domains = []) | |
{ | |
$this->domains = $domains; | |
} | |
public function validate(string $url): bool | |
{ | |
return $this->contains($url); | |
} | |
private function contains(string $url): bool | |
{ | |
$parsed = parse_url($url); | |
if (!$parsed || !isset($parsed['host'])) { | |
return false; | |
} | |
if (!in_array($parsed['host'], $this->domains, true)) { | |
return false; | |
} | |
return true; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
use PHPUnit\Framework\TestCase; | |
class UrlDomainValidatorTest extends TestCase | |
{ | |
/** | |
* @dataProvider urlsProvider | |
*/ | |
public function test_validate($url, $expected) | |
{ | |
$validator = new UrlDomainValidator(['example.com', 'sample.example.com']); | |
$this->assertEquals($expected, $validator->validate($url)); | |
} | |
public function urlsProvider() | |
{ | |
return [ | |
['http://example.com/', true], | |
['//example.com/', true], | |
['http://example.com:8080/', true], | |
['http://ba0918:pass@example.com:8080/', true], | |
['http://ba0918:pass@sample.example.com:8080/', true], | |
['http://ba0918:pass@lie.sample.example.com:8080/', false], | |
// invalid domain | |
['http://foo.bar.com/path/to', false], | |
// invalid url | |
['http:/example.com/', false], | |
]; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class UrlExtractor | |
{ | |
private $pattern = '/((http|https):)?(?:\/\/)([-\+;:&@=\$,\.\w_]+)(\/[-\+~%\/\.\w_]*)?(\?[-\+=&;%@\w_]*)?(#[\w]*)?/'; | |
private $subject; | |
public function __construct(string $subject) | |
{ | |
$this->subject = $subject ?? ''; | |
} | |
public function first(): ?string | |
{ | |
$result = $this->extract(); | |
return $result[0] ?? null; | |
} | |
public function all(): array | |
{ | |
$result = $this->extractAll(); | |
return $result[0] ?? []; | |
} | |
public function replace(callable $callback): string | |
{ | |
return preg_replace_callback($this->pattern, $callback, $this->subject); | |
} | |
private function extract(): array | |
{ | |
if (preg_match($this->pattern, $this->subject, $matches)) { | |
return $matches; | |
} | |
return []; | |
} | |
private function extractAll(): array | |
{ | |
if (preg_match_all($this->pattern, $this->subject, $matches)) { | |
return $matches; | |
} | |
return []; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
use PHPUnit\Framework\TestCase; | |
class UrlExtractorTest extends TestCase | |
{ | |
/** | |
* @dataProvider urlsProvider | |
*/ | |
public function testFirst($subject, $expected) | |
{ | |
$extractor = new UrlExtractor($subject); | |
$this->assertEquals($expected, $extractor->first()); | |
} | |
public function urlsProvider() | |
{ | |
return [ | |
/* OK cases*/ | |
['//hoge.example.com', '//hoge.example.com'], | |
['asdf//hoge.example.com', '//hoge.example.com'], | |
['http://hoge.example.com', 'http://hoge.example.com'], | |
['http://hoge.example.com/', 'http://hoge.example.com/'], | |
['http://ba0918:pass@hoge.example.com', 'http://ba0918:pass@hoge.example.com'], | |
['http://hoge.example.com/path/to', 'http://hoge.example.com/path/to'], | |
['http://hoge.example.com/path/to/', 'http://hoge.example.com/path/to/'], | |
['http://hoge.example.com/path/to/?key=value', 'http://hoge.example.com/path/to/?key=value'], | |
['http://hoge.example.com/path/to/?key=value&key2=value2', 'http://hoge.example.com/path/to/?key=value&key2=value2'], | |
['http://hoge.example.com/path/to/?key=value&key2=value2#hash', 'http://hoge.example.com/path/to/?key=value&key2=value2#hash'], | |
['http://hoge.example.com/?key=value', 'http://hoge.example.com/?key=value'], | |
['http://hoge.example.com/#hash', 'http://hoge.example.com/#hash'], | |
['http://hoge.example.com/#hash<br>', 'http://hoge.example.com/#hash'], | |
/* NG cases */ | |
// invalid scheme | |
['http:/hoge.example.com/#hash', ''], | |
['invalid-scheme://hoge.example.com/#hash', '//hoge.example.com/#hash'], | |
// invalid query string | |
['http://hoge.example.com/&key=value', 'http://hoge.example.com/'], | |
// unsupported multibyte domain name | |
['http://日本語.jp/?key=value', ''], | |
]; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment