Last active
December 14, 2015 01:18
-
-
Save mattn/5004875 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Index: twitvim.vim | |
=================================================================== | |
--- twitvim.vim (revision 339) | |
+++ twitvim.vim (working copy) | |
@@ -2696,18 +2696,13 @@ | |
let s:URL_PROTOCOL_HTTPS = '\%([Hh][Tt][Tt][Pp][Ss]\)://' | |
let s:URL_PROTOCOL_NON_HTTPS = '\%([Hh][Tt][Tt][Pp]\|[Ff][Tt][Pp]\)://' | |
-let s:URL_DOMAIN = '[^[:space:])/]\+' | |
-let s:URL_PATH_CHARS = '[^[:space:]()]' | |
+let s:URL_DOMAIN = '\a[a-zA-Z0-9_-]*\(\.[a-zA-Z0-9][a-zA-Z0-9_-]*\)*\(:\d+\)\{0,1}' | |
+let s:URL_PATH_CHARS = '[a-zA-Z0-9_/.\-+%#?&=;@$,!''*~]' | |
" URL paths may contain balanced parentheses. | |
let s:URL_PARENS = '('.s:URL_PATH_CHARS.'*)' | |
+let s:URL_PATH = '\%('.s:URL_PATH_CHARS.'*\%('.s:URL_PARENS.s:URL_PATH_CHARS.'*\)*\)\|\%('.s:URL_PATH_CHARS.'\+\)' | |
-" Avoid swallowing up certain punctuation characters after a URL but allow a | |
-" URL to end with a balanced parenthesis. | |
-let s:URL_PATH_END = '\%([^[:space:]\.,;:()]\|'.s:URL_PARENS.'\)' | |
- | |
-let s:URL_PATH = '\%('.s:URL_PATH_CHARS.'*\%('.s:URL_PARENS.s:URL_PATH_CHARS.'*\)*'.s:URL_PATH_END.'\)\|\%('.s:URL_PATH_CHARS.'\+\)' | |
- | |
" Bring it all together. Use this regex to match a URL. | |
let s:URLMATCH = s:URL_PROTOCOL.s:URL_DOMAIN.'\%(/\%('.s:URL_PATH.'\)\=\)\=' | |
let s:URLMATCH_HTTPS = s:URL_PROTOCOL_HTTPS.s:URL_DOMAIN.'\%(/\%('.s:URL_PATH.'\)\=\)\=' | |
@@ -2724,7 +2719,7 @@ | |
" Handle @-replies by showing that user's timeline. | |
" An @-reply must be preceded by a non-word character and ends at a | |
" non-word character. | |
- let matchres = matchlist(s, '\w\@<!@\(\w\+\)') | |
+ let matchres = matchlist(s, '[\w]\@<!@\(\w\+\)') | |
if matchres != [] | |
call s:get_timeline("user", matchres[1], 1, 0) | |
return | |
@@ -2985,7 +2980,7 @@ | |
" An @-reply must be preceded by a non-word character and ends at a | |
" non-word character. | |
- syntax match twitterReply "\w\@<!@\w\+" | |
+ syntax match twitterReply "[\w]\@<!@\w\+" | |
" A #-hashtag must be preceded by a non-word character and ends at a | |
" non-word character. |
CP932 is DBCS(double byte character set). And the trail-byte of CP932 is possible to contains ASCII range. For example, “
on CP932 is 0x81 0x67
.
I checked in some changes to tighten up URL recognition a bit because the first problem happens in UTF-8 too.
I do not see the "@name problem under UTF-8, so it's just cp932.
CP932 is store two bytes like below
leading byte: 0xa0 - 0xfe
trailing byte: 0x20 - 0x7e
So you can't use \w
for \@<!
, But I think this is vim's bug.
But you shouldn't use [^[:space:]]
for URL path.
Ah, This all of issue causes vim's bug. Below is a patch. I'll send this patch to vim-dev after some checking.
diff -r 8b86b69546a9 src/regexp.c
--- a/src/regexp.c Wed Feb 20 21:26:00 2013 +0100
+++ b/src/regexp.c Fri Feb 22 10:40:23 2013 +0900
@@ -5449,7 +5449,19 @@
}
}
else
- --rp->rs_un.regsave.rs_u.pos.col;
+ {
+#ifdef FEAT_MBYTE
+ if (has_mbyte)
+ {
+ int off = (*mb_head_off)(regline,
+ regline +
+ rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
+ rp->rs_un.regsave.rs_u.pos.col -= off;
+ }
+ else
+#endif
+ --rp->rs_un.regsave.rs_u.pos.col;
+ }
}
else
{
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
At the first, are you ok about URL issue? Currently, you have question only for
\w
?