Created
March 10, 2017 12:59
-
-
Save mcepl/190c8d7fabafd142928dc41b5c441f87 to your computer and use it in GitHub Desktop.
refreshed patch for vim/vim#846
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff -up vim-8.0.0363/src/Makefile.hunspell vim-8.0.0363/src/Makefile | |
--- vim-8.0.0363/src/Makefile.hunspell 2017-02-23 20:20:53.000000000 +0100 | |
+++ vim-8.0.0363/src/Makefile 2017-02-24 14:29:28.614983465 +0100 | |
@@ -1446,7 +1446,8 @@ ALL_LIBS = \ | |
$(RUBY_LIBS) \ | |
$(PROFILE_LIBS) \ | |
$(SANITIZER_LIBS) \ | |
- $(LEAK_LIBS) | |
+ $(LEAK_LIBS) \ | |
+ -lhunspell-1.4 | |
# abbreviations | |
DEST_BIN = $(DESTDIR)$(BINDIR) | |
diff -up vim-8.0.0363/src/spell.c.hunspell vim-8.0.0363/src/spell.c | |
--- vim-8.0.0363/src/spell.c.hunspell 2017-02-23 20:20:53.000000000 +0100 | |
+++ vim-8.0.0363/src/spell.c 2017-02-24 14:30:28.211114737 +0100 | |
@@ -405,6 +405,36 @@ static linenr_T dump_prefixes(slang_T *s | |
static char_u *repl_from = NULL; | |
static char_u *repl_to = NULL; | |
+static void | |
+ensurehunspellinit(lp) | |
+ slang_T *lp; | |
+{ | |
+ if (!lp->sl_hunspell) | |
+ { | |
+ char_u *dic = lp->sl_fname; | |
+ char_u *aff = vim_strnsave(dic, strlen(dic)); | |
+ | |
+ vim_strncpy(aff + strlen(aff) - 3, "aff", 3); | |
+ | |
+ lp->sl_hunspell = Hunspell_create(aff, dic); | |
+ | |
+ vim_free(aff); | |
+ | |
+ if (convert_setup(&lp->sl_tohunconv, spell_enc(), | |
+ Hunspell_get_dic_encoding(lp->sl_hunspell)) == FAIL) | |
+ { | |
+ lp->sl_tohunconv.vc_fail = TRUE; | |
+ } | |
+ | |
+ if (convert_setup(&lp->sl_fromhunconv, | |
+ Hunspell_get_dic_encoding(lp->sl_hunspell), spell_enc()) == FAIL) | |
+ { | |
+ lp->sl_fromhunconv.vc_fail = TRUE; | |
+ } | |
+ } | |
+} | |
+ | |
+ | |
/* | |
* Main spell-checking function. | |
* "ptr" points to a character that could be the start of a word. | |
@@ -513,27 +543,70 @@ spell_check( | |
{ | |
mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, lpi); | |
+ if (mi.mi_lp->lp_slang->sl_ishunspell) | |
+ { | |
+ slang_T *lp = mi.mi_lp->lp_slang; | |
+ char_u *converted = 0; | |
+ char_u *thisword; | |
+ char_u *mi_end = mi.mi_end; | |
+ char_u *mi_final = mi.mi_word + strlen(mi.mi_word); | |
+ | |
+ while (1) | |
+ { | |
+ ensurehunspellinit(lp); | |
+ if ((lp->sl_tohunconv.vc_fail == TRUE) || (lp->sl_fromhunconv.vc_fail == TRUE)) | |
+ break; | |
+ | |
+ if (mi_end != mi.mi_word) | |
+ { | |
+ thisword = vim_strnsave(mi.mi_word, mi_end - mi.mi_word); | |
+ converted = string_convert(&lp->sl_tohunconv, thisword, NULL); | |
+ if (converted) | |
+ { | |
+ if (Hunspell_spell(lp->sl_hunspell, converted) != 0) | |
+ { | |
+ mi.mi_result = SP_OK; | |
+ mi.mi_end = mi.mi_cend = mi.mi_word + strlen(thisword); | |
+ } | |
+ vim_free(converted); | |
+ } | |
+ vim_free(thisword); | |
+ } | |
+ | |
+ if (mi_end == mi_final) | |
+ break; | |
+ | |
+ do | |
+ { | |
+ mb_ptr_adv(mi_end); | |
+ } while (*mi_end != NUL && spell_iswordp(mi_end, wp->w_buffer)); | |
+ } | |
+ } | |
+ | |
/* If reloading fails the language is still in the list but everything | |
* has been cleared. */ | |
- if (mi.mi_lp->lp_slang->sl_fidxs == NULL) | |
+ if (!mi.mi_lp->lp_slang->sl_ishunspell && mi.mi_lp->lp_slang->sl_fidxs == NULL) | |
continue; | |
- /* Check for a matching word in case-folded words. */ | |
- find_word(&mi, FIND_FOLDWORD); | |
+ if (!mi.mi_lp->lp_slang->sl_ishunspell) | |
+ { | |
+ /* Check for a matching word in case-folded words. */ | |
+ find_word(&mi, FIND_FOLDWORD); | |
- /* Check for a matching word in keep-case words. */ | |
- find_word(&mi, FIND_KEEPWORD); | |
+ /* Check for a matching word in keep-case words. */ | |
+ find_word(&mi, FIND_KEEPWORD); | |
- /* Check for matching prefixes. */ | |
- find_prefix(&mi, FIND_FOLDWORD); | |
+ /* Check for matching prefixes. */ | |
+ find_prefix(&mi, FIND_FOLDWORD); | |
- /* For a NOBREAK language, may want to use a word without a following | |
- * word as a backup. */ | |
- if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD | |
- && mi.mi_result2 != SP_BAD) | |
- { | |
- mi.mi_result = mi.mi_result2; | |
- mi.mi_end = mi.mi_end2; | |
+ /* For a NOBREAK language, may want to use a word without a following | |
+ * word as a backup. */ | |
+ if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD | |
+ && mi.mi_result2 != SP_BAD) | |
+ { | |
+ mi.mi_result = mi.mi_result2; | |
+ mi.mi_end = mi.mi_end2; | |
+ } | |
} | |
/* Count the word in the first language where it's found to be OK. */ | |
@@ -1904,6 +1977,80 @@ spell_load_lang(char_u *lang) | |
if (r == FAIL) | |
{ | |
+# define HUNSPELLDICT "/usr/share/myspell/" | |
+ DIR *dirp = opendir(HUNSPELLDICT); | |
+ if (dirp != NULL) | |
+ { | |
+ slang_T* thislang[MAXREGIONS] = {0}; | |
+ slang_T *lp = 0; | |
+ struct dirent *dp; | |
+ int i = 0; | |
+ | |
+ while ((dp = readdir(dirp)) != NULL) | |
+ { | |
+ char_u final_name[MAXPATHL]; | |
+ char_u spf_name[MAXPATHL]; | |
+ char_u thisregion[3] = {0}; | |
+ char *resolvedlink = final_name; | |
+ int j, regionpos; | |
+ | |
+ if (strncmp(dp->d_name, lang, strlen(lang)) != 0) | |
+ continue; | |
+ | |
+ if ((strlen(dp->d_name) <= 4) || (dp->d_name[strlen(lang)] != '_')) | |
+ continue; | |
+ | |
+ if (strncmp(".dic", dp->d_name + strlen(dp->d_name) - 4, 4) != 0) | |
+ continue; | |
+ | |
+ vim_strncpy(spf_name, HUNSPELLDICT, strlen(HUNSPELLDICT)); | |
+ vim_strncpy(spf_name + strlen(HUNSPELLDICT), dp->d_name, strlen(HUNSPELLDICT)); | |
+ | |
+ if (realpath(spf_name, resolvedlink) != resolvedlink) | |
+ continue; | |
+ | |
+ thisregion[0] = tolower(dp->d_name[strlen(lang)+1]); | |
+ thisregion[1] = tolower(dp->d_name[strlen(lang)+1+1]); | |
+ | |
+ r = OK; | |
+ | |
+ for (j = 0; j < MAXREGIONS; ++j) | |
+ { | |
+ if (thislang[j] && (strcmp(thislang[j]->sl_fname, final_name) == 0)) | |
+ break; | |
+ } | |
+ | |
+ if (j < MAXREGIONS) | |
+ lp = thislang[j]; | |
+ else | |
+ { | |
+ lp = slang_alloc(lang); | |
+ lp->sl_ishunspell = TRUE; | |
+ | |
+ lp->sl_fname = vim_strsave(resolvedlink); | |
+ | |
+ lp->sl_next = first_lang; | |
+ first_lang = lp; | |
+ thislang[i] = lp; | |
+ } | |
+ | |
+ regionpos = 0; | |
+ while (lp->sl_regions[regionpos] != 0) ++regionpos; | |
+ | |
+ //silently lose regions which won't fit in | |
+ if (regionpos == MAXREGIONS * 2) | |
+ continue; | |
+ | |
+ vim_strncpy(lp->sl_regions + regionpos, thisregion, 2); | |
+ | |
+ ++i; | |
+ } | |
+ closedir(dirp); | |
+ } | |
+ } | |
+ | |
+ if (r == FAIL) | |
+ { | |
smsg((char_u *) | |
#ifdef VMS | |
_("Warning: Cannot find word list \"%s_%s.spl\" or \"%s_ascii.spl\""), | |
@@ -1976,6 +2123,9 @@ slang_alloc(char_u *lang) | |
void | |
slang_free(slang_T *lp) | |
{ | |
+ Hunspell_destroy(lp->sl_hunspell); | |
+ convert_setup(&lp->sl_tohunconv, NULL, NULL); | |
+ convert_setup(&lp->sl_fromhunconv, NULL, NULL); | |
vim_free(lp->sl_name); | |
vim_free(lp->sl_fname); | |
slang_clear(lp); | |
@@ -2392,6 +2542,7 @@ did_set_spelllang(win_T *wp) | |
/* Loop over comma separated language names. */ | |
for (splp = spl_copy; *splp != NUL; ) | |
{ | |
+ int hunspellregionunsupported; | |
/* Get one language name. */ | |
copy_option_part(&splp, lang, MAXWLEN, ","); | |
region = NULL; | |
@@ -2480,6 +2631,7 @@ did_set_spelllang(win_T *wp) | |
/* | |
* Loop over the languages, there can be several files for "lang". | |
*/ | |
+ hunspellregionunsupported = 0; | |
for (slang = first_lang; slang != NULL; slang = slang->sl_next) | |
if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME | |
: STRICMP(lang, slang->sl_name) == 0) | |
@@ -2497,6 +2649,11 @@ did_set_spelllang(win_T *wp) | |
/* This addition file is for other regions. */ | |
region_mask = 0; | |
} | |
+ else if (slang->sl_ishunspell) | |
+ { | |
+ region_mask = 0; | |
+ hunspellregionunsupported++; | |
+ } | |
else | |
/* This is probably an error. Give a warning and | |
* accept the words anyway. */ | |
@@ -2505,7 +2662,10 @@ did_set_spelllang(win_T *wp) | |
region); | |
} | |
else | |
+ { | |
+ hunspellregionunsupported--; | |
region_mask = 1 << c; | |
+ } | |
} | |
if (region_mask != 0) | |
@@ -2524,6 +2684,9 @@ did_set_spelllang(win_T *wp) | |
nobreak = TRUE; | |
} | |
} | |
+ | |
+ if (region && hunspellregionunsupported >= 1) | |
+ smsg((char_u *) _("Warning: region %s not supported"), region); | |
} | |
/* round 0: load int_wordlist, if possible. | |
@@ -4295,6 +4458,36 @@ suggest_try_change(suginfo_T *su) | |
{ | |
lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); | |
+ if (lp->lp_slang->sl_ishunspell) | |
+ { | |
+ slang_T *slp = lp->lp_slang; | |
+ char **slst; | |
+ char_u *converted = 0; | |
+ | |
+ ensurehunspellinit(slp); | |
+ | |
+ converted = string_convert(&slp->sl_tohunconv, su->su_fbadword, NULL); | |
+ if (converted) | |
+ { | |
+ int suggests; | |
+ suggests = Hunspell_suggest(slp->sl_hunspell, &slst, converted); | |
+ if (suggests > 0) | |
+ { | |
+ int i; | |
+ char_u *suggest; | |
+ for (i = 0; i < suggests; ++i) | |
+ { | |
+ suggest = string_convert(&slp->sl_fromhunconv, slst[i], NULL); | |
+ add_suggestion(su, &su->su_ga, suggest, su->su_badlen, i, 0, FALSE, | |
+ slp, FALSE); | |
+ vim_free(suggest); | |
+ } | |
+ free(slst); | |
+ } | |
+ vim_free(converted); | |
+ } | |
+ } | |
+ | |
/* If reloading a spell file fails it's still in the list but | |
* everything has been cleared. */ | |
if (lp->lp_slang->sl_fbyts == NULL) | |
diff -up vim-8.0.0363/src/spellfile.c.hunspell vim-8.0.0363/src/spellfile.c | |
--- vim-8.0.0363/src/spellfile.c.hunspell 2017-02-24 14:30:10.618075985 +0100 | |
+++ vim-8.0.0363/src/spellfile.c 2017-02-24 14:30:28.211114737 +0100 | |
@@ -832,7 +832,7 @@ read_region_section(FILE *fd, slang_T *l | |
{ | |
int i; | |
- if (len > 16) | |
+ if (len > MAXREGIONS*2) | |
return SP_FORMERROR; | |
for (i = 0; i < len; ++i) | |
lp->sl_regions[i] = getc(fd); /* <regionname> */ | |
@@ -1952,7 +1952,7 @@ typedef struct spellinfo_S | |
char_u *si_info; /* info text chars or NULL */ | |
int si_region_count; /* number of regions supported (1 when there | |
are no regions) */ | |
- char_u si_region_name[17]; /* region names; used only if | |
+ char_u si_region_name[MAXREGIONS*2]; /* region names; used only if | |
* si_region_count > 1) */ | |
garray_T si_rep; /* list of fromto_T entries from REP lines */ | |
@@ -4234,7 +4234,7 @@ spell_read_wordfile(spellinfo_T *spin, c | |
else | |
{ | |
line += 8; | |
- if (STRLEN(line) > 16) | |
+ if (STRLEN(line) > MAXREGIONS) | |
smsg((char_u *)_("Too many regions in %s line %d: %s"), | |
fname, lnum, line); | |
else | |
@@ -5954,7 +5954,7 @@ mkspell( | |
char_u *wfname; | |
char_u **innames; | |
int incount; | |
- afffile_T *(afile[8]); | |
+ afffile_T *(afile[MAXREGIONS]); | |
int i; | |
int len; | |
stat_T st; | |
@@ -6025,8 +6025,8 @@ mkspell( | |
EMSG(_(e_invarg)); /* need at least output and input names */ | |
else if (vim_strchr(gettail(wfname), '_') != NULL) | |
EMSG(_("E751: Output file name must not have region name")); | |
- else if (incount > 8) | |
- EMSG(_("E754: Only up to 8 regions supported")); | |
+ else if (incount > MAXREGIONS) | |
+ EMSG2(_("E754: Only up to %d regions supported"), MAXREGIONS); | |
else | |
{ | |
/* Check for overwriting before doing things that may take a lot of | |
diff -up vim-8.0.0363/src/spell.h.hunspell vim-8.0.0363/src/spell.h | |
--- vim-8.0.0363/src/spell.h.hunspell 2017-02-24 14:30:16.573089102 +0100 | |
+++ vim-8.0.0363/src/spell.h 2017-02-24 14:30:28.211114737 +0100 | |
@@ -29,10 +29,14 @@ | |
# define DEBUG_TRIEWALK | |
#endif | |
+#include "hunspell/hunspell.h" | |
+ | |
#define MAXWLEN 254 /* Assume max. word len is this many bytes. | |
Some places assume a word length fits in a | |
byte, thus it can't be above 255. | |
Must be >= PFD_NOTSPECIAL. */ | |
+#define MAXREGIONS 20 | |
+ | |
/* Type used for indexes in the word tree need to be at least 4 bytes. If int | |
* is 8 bytes we could use something smaller, but what? */ | |
@@ -70,6 +74,10 @@ struct slang_S | |
char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */ | |
char_u *sl_fname; /* name of .spl file */ | |
int sl_add; /* TRUE if it's a .add file. */ | |
+ int sl_ishunspell; /* TRUE if it's an unconverted hunspell aff/dic combination. */ | |
+ Hunhandle *sl_hunspell; | |
+ vimconv_T sl_tohunconv; | |
+ vimconv_T sl_fromhunconv; | |
char_u *sl_fbyts; /* case-folded word bytes */ | |
idx_T *sl_fidxs; /* case-folded word indexes */ | |
@@ -80,7 +88,7 @@ struct slang_S | |
char_u *sl_info; /* infotext string or NULL */ | |
- char_u sl_regions[17]; /* table with up to 8 region names plus NUL */ | |
+ char_u sl_regions[MAXREGIONS * 2 + 1]; /* table with up to 8 region names plus NUL */ | |
char_u *sl_midword; /* MIDWORD string or NULL */ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment