Skip to content

Instantly share code, notes, and snippets.

@cmkilger
Last active February 8, 2019 17:24
Show Gist options
  • Save cmkilger/4d7093d88200a34a46db89983497fa99 to your computer and use it in GitHub Desktop.
Save cmkilger/4d7093d88200a34a46db89983497fa99 to your computer and use it in GitHub Desktop.
Detects if a string is made up of just emoji (and space).
<?php
function isEmojiOnly($subject) {
$pattern = "/^[\\s\n\r]*(?:(?:[\xc2\xa9\xc2\xae\xe2\x80\xbc\xe2\x81\x89\xe2\x84\xa2\xe2\x84\xb9\xe2\x86\x94-\xe2\x86\x99\xe2\x86\xa9-\xe2\x86\xaa\xe2\x8c\x9a-\xe2\x8c\x9b\xe2\x8c\xa8\xe2\x8f\x8f\xe2\x8f\xa9-\xe2\x8f\xb3\xe2\x8f\xb8-\xe2\x8f\xba\xe2\x93\x82\xe2\x96\xaa-\xe2\x96\xab\xe2\x96\xb6\xe2\x97\x80\xe2\x97\xbb-\xe2\x97\xbe\xe2\x98\x80-\xe2\x98\x84\xe2\x98\x8e\xe2\x98\x91\xe2\x98\x94-\xe2\x98\x95\xe2\x98\x98\xe2\x98\x9d\xe2\x98\xa0\xe2\x98\xa2-\xe2\x98\xa3\xe2\x98\xa6\xe2\x98\xaa\xe2\x98\xae-\xe2\x98\xaf\xe2\x98\xb8-\xe2\x98\xba\xe2\x99\x88-\xe2\x99\x93\xe2\x99\xa0\xe2\x99\xa3\xe2\x99\xa5-\xe2\x99\xa6\xe2\x99\xa8\xe2\x99\xbb\xe2\x99\xbf\xe2\x9a\x92-\xe2\x9a\x94\xe2\x9a\x96-\xe2\x9a\x97\xe2\x9a\x99\xe2\x9a\x9b-\xe2\x9a\x9c\xe2\x9a\xa0-\xe2\x9a\xa1\xe2\x9a\xaa-\xe2\x9a\xab\xe2\x9a\xb0-\xe2\x9a\xb1\xe2\x9a\xbd-\xe2\x9a\xbe\xe2\x9b\x84-\xe2\x9b\x85\xe2\x9b\x88\xe2\x9b\x8e-\xe2\x9b\x8f\xe2\x9b\x91\xe2\x9b\x93-\xe2\x9b\x94\xe2\x9b\xa9-\xe2\x9b\xaa\xe2\x9b\xb0-\xe2\x9b\xb5\xe2\x9b\xb7-\xe2\x9b\xba\xe2\x9b\xbd\xe2\x9c\x82\xe2\x9c\x85\xe2\x9c\x88-\xe2\x9c\x8d\xe2\x9c\x8f\xe2\x9c\x92\xe2\x9c\x94\xe2\x9c\x96\xe2\x9c\x9d\xe2\x9c\xa1\xe2\x9c\xa8\xe2\x9c\xb3-\xe2\x9c\xb4\xe2\x9d\x84\xe2\x9d\x87\xe2\x9d\x8c\xe2\x9d\x8e\xe2\x9d\x93-\xe2\x9d\x95\xe2\x9d\x97\xe2\x9d\xa3-\xe2\x9d\xa4\xe2\x9e\x95-\xe2\x9e\x97\xe2\x9e\xa1\xe2\x9e\xb0\xe2\x9e\xbf\xe2\xa4\xb4-\xe2\xa4\xb5\xe2\xac\x85-\xe2\xac\x87\xe2\xac\x9b-\xe2\xac\x9c\xe2\xad\x90\xe2\xad\x95\xe3\x80\xb0\xe3\x80\xbd\xe3\x8a\x97\xe3\x8a\x99\xf0\x9f\x80\x84\xf0\x9f\x83\x8f\xf0\x9f\x85\xb0-\xf0\x9f\x85\xb1\xf0\x9f\x85\xbe-\xf0\x9f\x85\xbf\xf0\x9f\x86\x8e\xf0\x9f\x86\x91-\xf0\x9f\x86\x9a\xf0\x9f\x88\x81-\xf0\x9f\x88\x82\xf0\x9f\x88\x9a\xf0\x9f\x88\xaf\xf0\x9f\x88\xb2-\xf0\x9f\x88\xba\xf0\x9f\x89\x90-\xf0\x9f\x89\x91\xe2\x80\x8d\xf0\x9f\x8c\x80-\xf0\x9f\x97\xbf\xf0\x9f\x98\x80-\xf0\x9f\x99\x8f\xf0\x9f\x9a\x80-\xf0\x9f\x9b\xbf\xf0\x9f\xa4\x80-\xf0\x9f\xa7\xbf\xf3\xa0\x80\xa0-\xf3\xa0\x81\xbf]|\xe2\x80\x8d[\xe2\x99\x80\xe2\x99\x82]|[\xf0\x9f\x87\xa6-\xf0\x9f\x87\xbf]{2}|.[\xe2\x83\xa0\xe2\x83\xa3\xef\xb8\x8f]+)+[\\s\n\r]*)+$/u";
return (preg_match($pattern, $subject) === 1);
}
function test($string, $isEmoji) {
if (isEmojiOnly($string) !== $isEmoji) {
echo $string." should".($isEmoji ? "" : " not")." be emoji only\n";
}
}
test("Hello ๐Ÿ˜ฒ", false);
test("-", false);
test("+", false);
test("$", false);
test(" ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ง ", true);
test(" 0๏ธโƒฃ โค๏ธ", true);
test("๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ง โค๏ธ", true);
test(" 0๏ธโƒฃ โค๏ธ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ง ", true);
test("0๏ธโƒฃ\nโค๏ธ", true);
test("ยฉยฎ๐Ÿ”๐Ÿ”›๐Ÿ”™๐Ÿ’ฑโž•โž–โž—ยฎยฉโ„ข๐Ÿ”œ๐Ÿ”ด๐Ÿ”ตโšซ๏ธโšช๏ธ๐Ÿ”น๐Ÿ”˜๐Ÿ”ธโ–ช๏ธโ–ซ๏ธโ—ป๏ธโ—พ๏ธ๐Ÿ”‰๐Ÿ”‡โ™ฆ๏ธโ™ฅ๏ธโ™ฃ๏ธโ™ ๏ธ๐Ÿ€„๏ธ๐ŸŽด๐Ÿ‘โ€๐Ÿ—จ๐Ÿ’ญ๐Ÿ—ฏ๐Ÿ•”๐Ÿ•“๐Ÿ•–๐Ÿ•–๐Ÿ•œ๐Ÿ•ฅ๐Ÿ’ฌโ˜ชโ˜ช๐Ÿ”โ˜‘๏ธ#๏ธโƒฃ#๏ธโƒฃโคต๏ธ*๏ธโƒฃโ„น๏ธโ†ฉ๏ธโ†ช๏ธโ†™๏ธโ†˜๏ธโžก๏ธ๐Ÿ”‚โ—€๏ธ๐Ÿ”ผโธโฏ4๏ธโƒฃ๐Ÿ†“๐Ÿ†–๐Ÿ“ถ๐Ÿ†’๐Ÿ›„๐Ÿง๐ŸŒ€๐Ÿ’ โœ…ใ€ฝ๏ธ๐Ÿ’ฏโ‰๏ธโ•๐Ÿšณ๐Ÿšฏ๐Ÿšท๐ŸšซโŒโญ•๏ธ๐Ÿ’ขโ›”๏ธ๐Ÿ…ฑใŠ™๏ธโœด๏ธ๐Ÿ†š๐Ÿ’˜๐Ÿ’•๐Ÿ’š๐Ÿ”Ž๐Ÿ“๐Ÿ”“๐Ÿšฉ๐ŸŽ๐ŸŽ€โ›ฑ๐Ÿ›๐Ÿ—ฟ๐Ÿ”‘๐Ÿท๐Ÿ”ฌ๐Ÿ›กโš”๐Ÿ”ซ๐Ÿ› ๐Ÿ› ๐Ÿ’Ž๐Ÿ’ด๐Ÿ•ฏ๐Ÿ“กโฒ๐Ÿ“ป๐Ÿ“ž๐Ÿ“ธ๐Ÿ’พ๐Ÿ–ฑ๐Ÿ“ฒ๐Ÿฏ๐ŸŽ‡๐Ÿ™๐Ÿœ๐Ÿž๐ŸŽข๐Ÿšฅ๐Ÿšฆ๐Ÿ’บ๐Ÿšค๐Ÿš‡๐Ÿš…๐ŸšŸ๐Ÿš๐Ÿšœ๐Ÿš‘๐Ÿš™๐ŸŽท๐ŸŽบ๐ŸŽฑ๐Ÿ‰๐Ÿ€๐Ÿถ๐Ÿต๐ŸŽ‚๐Ÿก๐Ÿ›๐Ÿ ๐Ÿ๐Ÿ‡โ›„๏ธโ˜ƒ๐ŸŒจโ˜€๏ธโ˜„โญ๏ธ๐ŸŒ๐ŸŒš๐ŸŒš๐ŸŒ๐ŸŒ‘๐ŸŒ’๐ŸŒ•๐ŸŒบ๐Ÿš๐Ÿ•ธ๐Ÿƒ๐ŸŒด๐Ÿ‘๐Ÿ„๐Ÿ„๐ŸŒ๐Ÿฆ๐Ÿ—๐Ÿฝ๐Ÿญ๐Ÿ’๐Ÿ•ถ๐Ÿ‘“๐Ÿ‘ ๐Ÿ‘ž๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘ฉ๐Ÿ‘ฅ๐Ÿ˜ฟ๐Ÿ˜ฒ๐Ÿ˜ช๐Ÿ™๐Ÿ˜Ÿ๐Ÿ˜˜๐Ÿ˜๐Ÿ‘๐ŸŒด๐Ÿ‘“๐Ÿ˜ฒ๐Ÿ˜˜", true);
// Requires Unicode 8.0 support for skin tones and additional emoji
test("๐Ÿ‘ด๐Ÿป", true);
test("๐Ÿ•Ž", true);
test("๐Ÿ•", true);
test("๐Ÿบ", true);
test("๐Ÿšด๐Ÿพ", true);
test("๐Ÿฟ", true);
test("๐ŸŒญ", true);
test("๐Ÿ™Ž๐Ÿผ", true);
test("๐Ÿ’‚๐Ÿฝ", true);
test("๐Ÿ‘‡๐Ÿป", true);
test("๐Ÿ‘๐Ÿป", true);
test("๐Ÿค•", true);
test("๐Ÿ™„", true);
test("ใŽ", false);
test("ใท", false);
test("ไท‘", false);
test("ไทซ", false);
test("ไทฝ", false);
test("๊’ž", false);
@cmkilger
Copy link
Author

cmkilger commented Jun 8, 2016

I hard-coded ranges for some Unicode blocks. Although many code points in them are currently unassigned, it's extremely likely that they would be emoji once assigned.

@cmkilger
Copy link
Author

\p{So}\p{Sk}\p{M} was matching a lot of things which would not be considered emoji. Now using specific code points and ranges.

@cmkilger
Copy link
Author

Support gender.

@cmkilger
Copy link
Author

Added support for flag tags.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment