Created
July 22, 2011 14:07
-
-
Save rctay/1099515 to your computer and use it in GitHub Desktop.
[gsoc-diff] xdl_trim_head()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c | |
index 82bff88..061ace9 100644 | |
--- a/xdiff/xprepare.c | |
+++ b/xdiff/xprepare.c | |
@@ -55,7 +55,7 @@ static void xdl_free_classifier(xdlclassifier_t *cf); | |
static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits, | |
xrecord_t *rec); | |
static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, | |
- xdlclassifier_t *cf, xdfile_t *xdf); | |
+ xdlclassifier_t *cf, xdfile_t *xdf, xrecord_t **arec); | |
static void xdl_free_ctx(xdfile_t *xdf); | |
static int xdl_clean_mmatch(char const *dis, long i, long s, long e); | |
static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2); | |
@@ -104,9 +104,10 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned | |
line = rec->ptr; | |
hi = (long) XDL_HASHLONG(rec->ha, cf->hbits); | |
for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) | |
- if (rcrec->ha == rec->ha && | |
+ if (rec->ha == 0 || | |
+ (rcrec->ha == rec->ha && | |
xdl_recmatch(rcrec->line, rcrec->size, | |
- rec->ptr, rec->size, cf->flags)) | |
+ rec->ptr, rec->size, cf->flags))) | |
break; | |
if (!rcrec) { | |
@@ -132,8 +133,55 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned | |
} | |
+static int xdl_trim_head(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |
+ xdfile_t *xdf1, xdfile_t *xdf2) { | |
+ | |
+ const int blk = 128; | |
+ long trimmed = 0, recovered = 0; | |
+ long smaller = XDL_MIN(mf1->size, mf2->size); | |
+ char const *p1, *p2; | |
+ char const *cur, *top; | |
+ char const *prev1, *prev2; | |
+ | |
+ /* part 1: blocks */ | |
+ p1 = mf1->ptr; | |
+ p2 = mf2->ptr; | |
+ | |
+ top = p1 + smaller; | |
+ | |
+ while (blk + trimmed <= smaller && !memcmp(p1, p2, blk)) { | |
+ trimmed += blk; | |
+ p1 += blk; | |
+ p2 += blk; | |
+ } | |
+ | |
+ while (recovered < trimmed) { | |
+ if (*(p1 - recovered++) == '\n') | |
+ break; | |
+ } | |
+ p1 -= recovered; | |
+ p2 -= recovered; | |
+ | |
+ prev1 = xdf1->rstart = cur = p1; | |
+ prev2 = xdf2->rstart = p2; | |
+ | |
+ /* part 2: line based */ | |
+ while (cur < top | |
+ && (cur = memchr(cur, '\n', top - cur)) | |
+ && !memcmp(prev1, prev2, ++cur - prev1)) { | |
+ prev2 += cur - prev1; | |
+ prev1 = cur; | |
+ } | |
+ | |
+ xdf1->rstart = prev1; | |
+ xdf2->rstart = prev2; | |
+ | |
+ return 0; | |
+} | |
+ | |
+ | |
static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, | |
- xdlclassifier_t *cf, xdfile_t *xdf) { | |
+ xdlclassifier_t *cf, xdfile_t *xdf, xrecord_t **arec) { | |
unsigned int hbits; | |
long nrec, hsize, bsize; | |
unsigned long hav; | |
@@ -167,10 +215,19 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, | |
} | |
nrec = 0; | |
+ xdf->dstart = 0; | |
if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { | |
for (top = blk + bsize; cur < top; ) { | |
prev = cur; | |
- hav = xdl_hash_record(&cur, top, xpp->flags); | |
+ if (cur < xdf->rstart) { | |
+ if (arec) | |
+ cur += (arec++)[0]->size; | |
+ else | |
+ cur = memchr(cur, '\n', top - cur) + 1; | |
+ hav = 0; | |
+ xdf->dstart++; | |
+ } else | |
+ hav = xdl_hash_record(&cur, top, xpp->flags); | |
if (nrec >= narec) { | |
narec *= 2; | |
if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) | |
@@ -207,7 +264,6 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, | |
xdf->rindex = rindex; | |
xdf->nreff = 0; | |
xdf->ha = ha; | |
- xdf->dstart = 0; | |
xdf->dend = nrec - 1; | |
return 0; | |
@@ -257,12 +313,18 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |
return -1; | |
} | |
- if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1) < 0) { | |
+ if (xdl_trim_head(mf1, mf2, xpp, &xe->xdf1, &xe->xdf2) < 0) { | |
+ | |
+ xdl_free_classifier(&cf); | |
+ return -1; | |
+ } | |
+ | |
+ if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1, NULL) < 0) { | |
xdl_free_classifier(&cf); | |
return -1; | |
} | |
- if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2) < 0) { | |
+ if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2, xe->xdf1.recs) < 0) { | |
xdl_free_ctx(&xe->xdf1); | |
xdl_free_classifier(&cf); | |
@@ -428,15 +490,19 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { | |
long i, lim; | |
xrecord_t **recs1, **recs2; | |
- recs1 = xdf1->recs; | |
- recs2 = xdf2->recs; | |
- for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; | |
- i++, recs1++, recs2++) | |
+ lim = XDL_MIN(xdf1->nrec, xdf2->nrec); | |
+ i = XDL_MIN(lim, xdf1->dstart); | |
+ /* | |
+ i = xdf1->dstart; | |
+ recs1 = xdf1->recs + i; | |
+ recs2 = xdf2->recs + i; | |
+ for (lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; | |
+ i++, recs1++, recs2++) | |
if ((*recs1)->ha != (*recs2)->ha) | |
break; | |
xdf1->dstart = xdf2->dstart = i; | |
- | |
+ */ | |
recs1 = xdf1->recs + xdf1->nrec - 1; | |
recs2 = xdf2->recs + xdf2->nrec - 1; | |
for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment