Last active
August 29, 2015 14:05
-
-
Save bakercp/d7f62b32f0cf262206ab to your computer and use it in GitHub Desktop.
PocoRegex Experiment.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ============================================================================= | |
// | |
// Copyright (c) 2014 Christopher Baker <http://christopherbaker.net> | |
// | |
// Permission is hereby granted, free of charge, to any person obtaining a copy | |
// of this software and associated documentation files (the "Software"), to deal | |
// in the Software without restriction, including without limitation the rights | |
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
// copies of the Software, and to permit persons to whom the Software is | |
// furnished to do so, subject to the following conditions: | |
// | |
// The above copyright notice and this permission notice shall be included in | |
// all copies or substantial portions of the Software. | |
// | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
// THE SOFTWARE. | |
// | |
// ============================================================================= | |
#include "ofApp.h" | |
#include "Poco/RegularExpression.h" | |
void ofApp::setup() | |
{ | |
std::string haystack = "廢】跟我家那隻臘腸(據說是牧羊犬)亂聊,他說我是壓力鍋,然後...我想到...如果壓力鍋具有攻擊性是怎樣攻擊,蒸氣嗎???\n接著我幻想起鍋子大戰\n壓力鍋vs平底鍋\n燉鍋vs快鍋"; | |
try | |
{ | |
// This regex matches the 我 characther optionally surrounded by any number of unicode characters | |
// with the property Unicode property of "Letter" (\p{L}). In order to work correctly, the regex | |
// pattern must be compiled with Poco::RegularExpression::RE_UTF8. | |
Poco::RegularExpression regex("([\\p{L}]+)?我([\\p{L}]+)?", Poco::RegularExpression::RE_UTF8); | |
Poco::RegularExpression::Match theMatch; | |
std::string::size_type haystackOffset = 0; | |
std::vector<std::string> needles; | |
// This while loop moves through the "haystack" and returns any match. | |
// when a match is found, the search offset is incremented to skip over | |
// the last known match and the search resumes until the end of the string | |
// is reached. | |
while (regex.match(haystack, haystackOffset, theMatch)) | |
{ | |
haystackOffset = theMatch.offset + theMatch.length; | |
needles.push_back(std::string(haystack, theMatch.offset, theMatch.length)); | |
} | |
// Print the needles. | |
for (int i = 0; i < needles.size(); ++i) | |
{ | |
std::cout << needles[i] << std::endl; | |
} | |
} | |
catch (const Poco::RegularExpressionException& exc) | |
{ | |
std::cout << exc.displayText() << std::endl; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ============================================================================= | |
// | |
// Copyright (c) 2014 Christopher Baker <http://christopherbaker.net> | |
// | |
// Permission is hereby granted, free of charge, to any person obtaining a copy | |
// of this software and associated documentation files (the "Software"), to deal | |
// in the Software without restriction, including without limitation the rights | |
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
// copies of the Software, and to permit persons to whom the Software is | |
// furnished to do so, subject to the following conditions: | |
// | |
// The above copyright notice and this permission notice shall be included in | |
// all copies or substantial portions of the Software. | |
// | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
// THE SOFTWARE. | |
// | |
// ============================================================================= | |
#pragma once | |
#include "ofMain.h" | |
class ofApp: public ofBaseApp | |
{ | |
public: | |
void setup(); | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Should print out: | |
跟我家那隻臘腸 | |
他說我是壓力鍋 | |
我想到 | |
接著我幻想起鍋子大戰 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
By the way, all of these PCRE unicode property fields are available in Poco http://www.regular-expressions.info/unicode.html