Skip to content

Instantly share code, notes, and snippets.

@mcepl
Created March 10, 2017 12:59
Show Gist options
  • Save mcepl/190c8d7fabafd142928dc41b5c441f87 to your computer and use it in GitHub Desktop.
Save mcepl/190c8d7fabafd142928dc41b5c441f87 to your computer and use it in GitHub Desktop.
refreshed patch for vim/vim#846
diff -up vim-8.0.0363/src/Makefile.hunspell vim-8.0.0363/src/Makefile
--- vim-8.0.0363/src/Makefile.hunspell 2017-02-23 20:20:53.000000000 +0100
+++ vim-8.0.0363/src/Makefile 2017-02-24 14:29:28.614983465 +0100
@@ -1446,7 +1446,8 @@ ALL_LIBS = \
$(RUBY_LIBS) \
$(PROFILE_LIBS) \
$(SANITIZER_LIBS) \
- $(LEAK_LIBS)
+ $(LEAK_LIBS) \
+ -lhunspell-1.4
# abbreviations
DEST_BIN = $(DESTDIR)$(BINDIR)
diff -up vim-8.0.0363/src/spell.c.hunspell vim-8.0.0363/src/spell.c
--- vim-8.0.0363/src/spell.c.hunspell 2017-02-23 20:20:53.000000000 +0100
+++ vim-8.0.0363/src/spell.c 2017-02-24 14:30:28.211114737 +0100
@@ -405,6 +405,36 @@ static linenr_T dump_prefixes(slang_T *s
static char_u *repl_from = NULL;
static char_u *repl_to = NULL;
+static void
+ensurehunspellinit(lp)
+ slang_T *lp;
+{
+ if (!lp->sl_hunspell)
+ {
+ char_u *dic = lp->sl_fname;
+ char_u *aff = vim_strnsave(dic, strlen(dic));
+
+ vim_strncpy(aff + strlen(aff) - 3, "aff", 3);
+
+ lp->sl_hunspell = Hunspell_create(aff, dic);
+
+ vim_free(aff);
+
+ if (convert_setup(&lp->sl_tohunconv, spell_enc(),
+ Hunspell_get_dic_encoding(lp->sl_hunspell)) == FAIL)
+ {
+ lp->sl_tohunconv.vc_fail = TRUE;
+ }
+
+ if (convert_setup(&lp->sl_fromhunconv,
+ Hunspell_get_dic_encoding(lp->sl_hunspell), spell_enc()) == FAIL)
+ {
+ lp->sl_fromhunconv.vc_fail = TRUE;
+ }
+ }
+}
+
+
/*
* Main spell-checking function.
* "ptr" points to a character that could be the start of a word.
@@ -513,27 +543,70 @@ spell_check(
{
mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, lpi);
+ if (mi.mi_lp->lp_slang->sl_ishunspell)
+ {
+ slang_T *lp = mi.mi_lp->lp_slang;
+ char_u *converted = 0;
+ char_u *thisword;
+ char_u *mi_end = mi.mi_end;
+ char_u *mi_final = mi.mi_word + strlen(mi.mi_word);
+
+ while (1)
+ {
+ ensurehunspellinit(lp);
+ if ((lp->sl_tohunconv.vc_fail == TRUE) || (lp->sl_fromhunconv.vc_fail == TRUE))
+ break;
+
+ if (mi_end != mi.mi_word)
+ {
+ thisword = vim_strnsave(mi.mi_word, mi_end - mi.mi_word);
+ converted = string_convert(&lp->sl_tohunconv, thisword, NULL);
+ if (converted)
+ {
+ if (Hunspell_spell(lp->sl_hunspell, converted) != 0)
+ {
+ mi.mi_result = SP_OK;
+ mi.mi_end = mi.mi_cend = mi.mi_word + strlen(thisword);
+ }
+ vim_free(converted);
+ }
+ vim_free(thisword);
+ }
+
+ if (mi_end == mi_final)
+ break;
+
+ do
+ {
+ mb_ptr_adv(mi_end);
+ } while (*mi_end != NUL && spell_iswordp(mi_end, wp->w_buffer));
+ }
+ }
+
/* If reloading fails the language is still in the list but everything
* has been cleared. */
- if (mi.mi_lp->lp_slang->sl_fidxs == NULL)
+ if (!mi.mi_lp->lp_slang->sl_ishunspell && mi.mi_lp->lp_slang->sl_fidxs == NULL)
continue;
- /* Check for a matching word in case-folded words. */
- find_word(&mi, FIND_FOLDWORD);
+ if (!mi.mi_lp->lp_slang->sl_ishunspell)
+ {
+ /* Check for a matching word in case-folded words. */
+ find_word(&mi, FIND_FOLDWORD);
- /* Check for a matching word in keep-case words. */
- find_word(&mi, FIND_KEEPWORD);
+ /* Check for a matching word in keep-case words. */
+ find_word(&mi, FIND_KEEPWORD);
- /* Check for matching prefixes. */
- find_prefix(&mi, FIND_FOLDWORD);
+ /* Check for matching prefixes. */
+ find_prefix(&mi, FIND_FOLDWORD);
- /* For a NOBREAK language, may want to use a word without a following
- * word as a backup. */
- if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
- && mi.mi_result2 != SP_BAD)
- {
- mi.mi_result = mi.mi_result2;
- mi.mi_end = mi.mi_end2;
+ /* For a NOBREAK language, may want to use a word without a following
+ * word as a backup. */
+ if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
+ && mi.mi_result2 != SP_BAD)
+ {
+ mi.mi_result = mi.mi_result2;
+ mi.mi_end = mi.mi_end2;
+ }
}
/* Count the word in the first language where it's found to be OK. */
@@ -1904,6 +1977,80 @@ spell_load_lang(char_u *lang)
if (r == FAIL)
{
+# define HUNSPELLDICT "/usr/share/myspell/"
+ DIR *dirp = opendir(HUNSPELLDICT);
+ if (dirp != NULL)
+ {
+ slang_T* thislang[MAXREGIONS] = {0};
+ slang_T *lp = 0;
+ struct dirent *dp;
+ int i = 0;
+
+ while ((dp = readdir(dirp)) != NULL)
+ {
+ char_u final_name[MAXPATHL];
+ char_u spf_name[MAXPATHL];
+ char_u thisregion[3] = {0};
+ char *resolvedlink = final_name;
+ int j, regionpos;
+
+ if (strncmp(dp->d_name, lang, strlen(lang)) != 0)
+ continue;
+
+ if ((strlen(dp->d_name) <= 4) || (dp->d_name[strlen(lang)] != '_'))
+ continue;
+
+ if (strncmp(".dic", dp->d_name + strlen(dp->d_name) - 4, 4) != 0)
+ continue;
+
+ vim_strncpy(spf_name, HUNSPELLDICT, strlen(HUNSPELLDICT));
+ vim_strncpy(spf_name + strlen(HUNSPELLDICT), dp->d_name, strlen(HUNSPELLDICT));
+
+ if (realpath(spf_name, resolvedlink) != resolvedlink)
+ continue;
+
+ thisregion[0] = tolower(dp->d_name[strlen(lang)+1]);
+ thisregion[1] = tolower(dp->d_name[strlen(lang)+1+1]);
+
+ r = OK;
+
+ for (j = 0; j < MAXREGIONS; ++j)
+ {
+ if (thislang[j] && (strcmp(thislang[j]->sl_fname, final_name) == 0))
+ break;
+ }
+
+ if (j < MAXREGIONS)
+ lp = thislang[j];
+ else
+ {
+ lp = slang_alloc(lang);
+ lp->sl_ishunspell = TRUE;
+
+ lp->sl_fname = vim_strsave(resolvedlink);
+
+ lp->sl_next = first_lang;
+ first_lang = lp;
+ thislang[i] = lp;
+ }
+
+ regionpos = 0;
+ while (lp->sl_regions[regionpos] != 0) ++regionpos;
+
+ //silently lose regions which won't fit in
+ if (regionpos == MAXREGIONS * 2)
+ continue;
+
+ vim_strncpy(lp->sl_regions + regionpos, thisregion, 2);
+
+ ++i;
+ }
+ closedir(dirp);
+ }
+ }
+
+ if (r == FAIL)
+ {
smsg((char_u *)
#ifdef VMS
_("Warning: Cannot find word list \"%s_%s.spl\" or \"%s_ascii.spl\""),
@@ -1976,6 +2123,9 @@ slang_alloc(char_u *lang)
void
slang_free(slang_T *lp)
{
+ Hunspell_destroy(lp->sl_hunspell);
+ convert_setup(&lp->sl_tohunconv, NULL, NULL);
+ convert_setup(&lp->sl_fromhunconv, NULL, NULL);
vim_free(lp->sl_name);
vim_free(lp->sl_fname);
slang_clear(lp);
@@ -2392,6 +2542,7 @@ did_set_spelllang(win_T *wp)
/* Loop over comma separated language names. */
for (splp = spl_copy; *splp != NUL; )
{
+ int hunspellregionunsupported;
/* Get one language name. */
copy_option_part(&splp, lang, MAXWLEN, ",");
region = NULL;
@@ -2480,6 +2631,7 @@ did_set_spelllang(win_T *wp)
/*
* Loop over the languages, there can be several files for "lang".
*/
+ hunspellregionunsupported = 0;
for (slang = first_lang; slang != NULL; slang = slang->sl_next)
if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME
: STRICMP(lang, slang->sl_name) == 0)
@@ -2497,6 +2649,11 @@ did_set_spelllang(win_T *wp)
/* This addition file is for other regions. */
region_mask = 0;
}
+ else if (slang->sl_ishunspell)
+ {
+ region_mask = 0;
+ hunspellregionunsupported++;
+ }
else
/* This is probably an error. Give a warning and
* accept the words anyway. */
@@ -2505,7 +2662,10 @@ did_set_spelllang(win_T *wp)
region);
}
else
+ {
+ hunspellregionunsupported--;
region_mask = 1 << c;
+ }
}
if (region_mask != 0)
@@ -2524,6 +2684,9 @@ did_set_spelllang(win_T *wp)
nobreak = TRUE;
}
}
+
+ if (region && hunspellregionunsupported >= 1)
+ smsg((char_u *) _("Warning: region %s not supported"), region);
}
/* round 0: load int_wordlist, if possible.
@@ -4295,6 +4458,36 @@ suggest_try_change(suginfo_T *su)
{
lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
+ if (lp->lp_slang->sl_ishunspell)
+ {
+ slang_T *slp = lp->lp_slang;
+ char **slst;
+ char_u *converted = 0;
+
+ ensurehunspellinit(slp);
+
+ converted = string_convert(&slp->sl_tohunconv, su->su_fbadword, NULL);
+ if (converted)
+ {
+ int suggests;
+ suggests = Hunspell_suggest(slp->sl_hunspell, &slst, converted);
+ if (suggests > 0)
+ {
+ int i;
+ char_u *suggest;
+ for (i = 0; i < suggests; ++i)
+ {
+ suggest = string_convert(&slp->sl_fromhunconv, slst[i], NULL);
+ add_suggestion(su, &su->su_ga, suggest, su->su_badlen, i, 0, FALSE,
+ slp, FALSE);
+ vim_free(suggest);
+ }
+ free(slst);
+ }
+ vim_free(converted);
+ }
+ }
+
/* If reloading a spell file fails it's still in the list but
* everything has been cleared. */
if (lp->lp_slang->sl_fbyts == NULL)
diff -up vim-8.0.0363/src/spellfile.c.hunspell vim-8.0.0363/src/spellfile.c
--- vim-8.0.0363/src/spellfile.c.hunspell 2017-02-24 14:30:10.618075985 +0100
+++ vim-8.0.0363/src/spellfile.c 2017-02-24 14:30:28.211114737 +0100
@@ -832,7 +832,7 @@ read_region_section(FILE *fd, slang_T *l
{
int i;
- if (len > 16)
+ if (len > MAXREGIONS*2)
return SP_FORMERROR;
for (i = 0; i < len; ++i)
lp->sl_regions[i] = getc(fd); /* <regionname> */
@@ -1952,7 +1952,7 @@ typedef struct spellinfo_S
char_u *si_info; /* info text chars or NULL */
int si_region_count; /* number of regions supported (1 when there
are no regions) */
- char_u si_region_name[17]; /* region names; used only if
+ char_u si_region_name[MAXREGIONS*2]; /* region names; used only if
* si_region_count > 1) */
garray_T si_rep; /* list of fromto_T entries from REP lines */
@@ -4234,7 +4234,7 @@ spell_read_wordfile(spellinfo_T *spin, c
else
{
line += 8;
- if (STRLEN(line) > 16)
+ if (STRLEN(line) > MAXREGIONS)
smsg((char_u *)_("Too many regions in %s line %d: %s"),
fname, lnum, line);
else
@@ -5954,7 +5954,7 @@ mkspell(
char_u *wfname;
char_u **innames;
int incount;
- afffile_T *(afile[8]);
+ afffile_T *(afile[MAXREGIONS]);
int i;
int len;
stat_T st;
@@ -6025,8 +6025,8 @@ mkspell(
EMSG(_(e_invarg)); /* need at least output and input names */
else if (vim_strchr(gettail(wfname), '_') != NULL)
EMSG(_("E751: Output file name must not have region name"));
- else if (incount > 8)
- EMSG(_("E754: Only up to 8 regions supported"));
+ else if (incount > MAXREGIONS)
+ EMSG2(_("E754: Only up to %d regions supported"), MAXREGIONS);
else
{
/* Check for overwriting before doing things that may take a lot of
diff -up vim-8.0.0363/src/spell.h.hunspell vim-8.0.0363/src/spell.h
--- vim-8.0.0363/src/spell.h.hunspell 2017-02-24 14:30:16.573089102 +0100
+++ vim-8.0.0363/src/spell.h 2017-02-24 14:30:28.211114737 +0100
@@ -29,10 +29,14 @@
# define DEBUG_TRIEWALK
#endif
+#include "hunspell/hunspell.h"
+
#define MAXWLEN 254 /* Assume max. word len is this many bytes.
Some places assume a word length fits in a
byte, thus it can't be above 255.
Must be >= PFD_NOTSPECIAL. */
+#define MAXREGIONS 20
+
/* Type used for indexes in the word tree need to be at least 4 bytes. If int
* is 8 bytes we could use something smaller, but what? */
@@ -70,6 +74,10 @@ struct slang_S
char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */
char_u *sl_fname; /* name of .spl file */
int sl_add; /* TRUE if it's a .add file. */
+ int sl_ishunspell; /* TRUE if it's an unconverted hunspell aff/dic combination. */
+ Hunhandle *sl_hunspell;
+ vimconv_T sl_tohunconv;
+ vimconv_T sl_fromhunconv;
char_u *sl_fbyts; /* case-folded word bytes */
idx_T *sl_fidxs; /* case-folded word indexes */
@@ -80,7 +88,7 @@ struct slang_S
char_u *sl_info; /* infotext string or NULL */
- char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
+ char_u sl_regions[MAXREGIONS * 2 + 1]; /* table with up to 8 region names plus NUL */
char_u *sl_midword; /* MIDWORD string or NULL */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment