Created
July 22, 2011 10:46
-
-
Save rctay/1099236 to your computer and use it in GitHub Desktop.
[gsoc-diff] skip hashing of common leading lines
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c | |
index 82bff88..f926966 100644 | |
--- a/xdiff/xprepare.c | |
+++ b/xdiff/xprepare.c | |
@@ -55,7 +55,7 @@ static void xdl_free_classifier(xdlclassifier_t *cf); | |
static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits, | |
xrecord_t *rec); | |
static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, | |
- xdlclassifier_t *cf, xdfile_t *xdf); | |
+ xdlclassifier_t *cf, xdfile_t *xdf, xrecord_t **arec); | |
static void xdl_free_ctx(xdfile_t *xdf); | |
static int xdl_clean_mmatch(char const *dis, long i, long s, long e); | |
static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2); | |
@@ -104,9 +104,10 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned | |
line = rec->ptr; | |
hi = (long) XDL_HASHLONG(rec->ha, cf->hbits); | |
for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) | |
- if (rcrec->ha == rec->ha && | |
+ if (rec->ha == 0 || | |
+ (rcrec->ha == rec->ha && | |
xdl_recmatch(rcrec->line, rcrec->size, | |
- rec->ptr, rec->size, cf->flags)) | |
+ rec->ptr, rec->size, cf->flags))) | |
break; | |
if (!rcrec) { | |
@@ -132,8 +133,53 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned | |
} | |
+static int xdl_trim_head(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |
+ xdfile_t *xdf1, xdfile_t *xdf2) { | |
+ | |
+ const int blk = 128; | |
+ long trimmed = 0, recovered = 0; | |
+ long smaller = XDL_MIN(mf1->size, mf2->size); | |
+ char const *p1, *p2; | |
+ char const *cur, *top; | |
+ char const *prev1, *prev2; | |
+ | |
+ /* part 1: blocks */ | |
+ p1 = mf1->ptr; | |
+ p2 = mf2->ptr; | |
+ | |
+ while (blk + trimmed <= smaller && !memcmp(p1, p2, blk)) { | |
+ trimmed += blk; | |
+ p1 += blk; | |
+ p2 += blk; | |
+ } | |
+ | |
+ while (recovered < trimmed) | |
+ if (*(p1 - recovered++) == '\n') | |
+ break; | |
+ p1 -= recovered; | |
+ p2 -= recovered; | |
+ | |
+ prev1 = xdf1->rstart = cur = p1; | |
+ prev2 = xdf2->rstart = p2; | |
+ | |
+ top = cur + smaller - trimmed + recovered; | |
+ | |
+ /* part 2: line based */ | |
+ while (cur < top | |
+ && (cur = memchr(cur, '\n', top - cur)) | |
+ && !memcmp(prev1, prev2, cur - prev1)) { | |
+ prev2 += ++cur - prev1; | |
+ prev1 = cur; | |
+ } | |
+ xdf1->rstart = prev1; | |
+ xdf2->rstart = prev2; | |
+ | |
+ return 0; | |
+} | |
+ | |
+ | |
static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, | |
- xdlclassifier_t *cf, xdfile_t *xdf) { | |
+ xdlclassifier_t *cf, xdfile_t *xdf, xrecord_t **arec) { | |
unsigned int hbits; | |
long nrec, hsize, bsize; | |
unsigned long hav; | |
@@ -167,10 +213,19 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, | |
} | |
nrec = 0; | |
+ xdf->dstart = 0; | |
if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { | |
for (top = blk + bsize; cur < top; ) { | |
prev = cur; | |
- hav = xdl_hash_record(&cur, top, xpp->flags); | |
+ if (cur < xdf->rstart) { | |
+ if (arec) | |
+ cur += (arec++)[0]->size; | |
+ else | |
+ cur = memchr(cur, '\n', top - cur) + 1; | |
+ hav = 0; | |
+ xdf->dstart++; | |
+ } else | |
+ hav = xdl_hash_record(&cur, top, xpp->flags); | |
if (nrec >= narec) { | |
narec *= 2; | |
if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) | |
@@ -207,7 +262,6 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, | |
xdf->rindex = rindex; | |
xdf->nreff = 0; | |
xdf->ha = ha; | |
- xdf->dstart = 0; | |
xdf->dend = nrec - 1; | |
return 0; | |
@@ -257,12 +311,18 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |
return -1; | |
} | |
- if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1) < 0) { | |
+ if (xdl_trim_head(mf1, mf2, xpp, &xe->xdf1, &xe->xdf2) < 0) { | |
+ | |
+ xdl_free_classifier(&cf); | |
+ return -1; | |
+ } | |
+ | |
+ if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1, NULL) < 0) { | |
xdl_free_classifier(&cf); | |
return -1; | |
} | |
- if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2) < 0) { | |
+ if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2, xe->xdf1.recs) < 0) { | |
xdl_free_ctx(&xe->xdf1); | |
xdl_free_classifier(&cf); | |
@@ -428,14 +488,8 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { | |
long i, lim; | |
xrecord_t **recs1, **recs2; | |
- recs1 = xdf1->recs; | |
- recs2 = xdf2->recs; | |
- for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; | |
- i++, recs1++, recs2++) | |
- if ((*recs1)->ha != (*recs2)->ha) | |
- break; | |
- | |
- xdf1->dstart = xdf2->dstart = i; | |
+ lim = XDL_MIN(xdf1->nrec, xdf2->nrec); | |
+ i = XDL_MIN(xdf1->dstart, lim); | |
recs1 = xdf1->recs + xdf1->nrec - 1; | |
recs2 = xdf2->recs + xdf2->nrec - 1; | |
diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h | |
index 2511aef..6330a9f 100644 | |
--- a/xdiff/xtypes.h | |
+++ b/xdiff/xtypes.h | |
@@ -51,6 +51,7 @@ typedef struct s_xdfile { | |
unsigned int hbits; | |
xrecord_t **rhash; | |
long dstart, dend; | |
+ const char *rstart, *rend; | |
xrecord_t **recs; | |
char *rchg; | |
long *rindex; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment