Skip to content

Instantly share code, notes, and snippets.

@rctay
Created July 22, 2011 10:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rctay/1099236 to your computer and use it in GitHub Desktop.
Save rctay/1099236 to your computer and use it in GitHub Desktop.
[gsoc-diff] skip hashing of common leading lines
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index 82bff88..f926966 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -55,7 +55,7 @@ static void xdl_free_classifier(xdlclassifier_t *cf);
static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits,
xrecord_t *rec);
static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
- xdlclassifier_t *cf, xdfile_t *xdf);
+ xdlclassifier_t *cf, xdfile_t *xdf, xrecord_t **arec);
static void xdl_free_ctx(xdfile_t *xdf);
static int xdl_clean_mmatch(char const *dis, long i, long s, long e);
static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2);
@@ -104,9 +104,10 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned
line = rec->ptr;
hi = (long) XDL_HASHLONG(rec->ha, cf->hbits);
for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next)
- if (rcrec->ha == rec->ha &&
+ if (rec->ha == 0 ||
+ (rcrec->ha == rec->ha &&
xdl_recmatch(rcrec->line, rcrec->size,
- rec->ptr, rec->size, cf->flags))
+ rec->ptr, rec->size, cf->flags)))
break;
if (!rcrec) {
@@ -132,8 +133,53 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned
}
+static int xdl_trim_head(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
+ xdfile_t *xdf1, xdfile_t *xdf2) {
+
+ const int blk = 128;
+ long trimmed = 0, recovered = 0;
+ long smaller = XDL_MIN(mf1->size, mf2->size);
+ char const *p1, *p2;
+ char const *cur, *top;
+ char const *prev1, *prev2;
+
+ /* part 1: blocks */
+ p1 = mf1->ptr;
+ p2 = mf2->ptr;
+
+ while (blk + trimmed <= smaller && !memcmp(p1, p2, blk)) {
+ trimmed += blk;
+ p1 += blk;
+ p2 += blk;
+ }
+
+ while (recovered < trimmed)
+ if (*(p1 - recovered++) == '\n')
+ break;
+ p1 -= recovered;
+ p2 -= recovered;
+
+ prev1 = xdf1->rstart = cur = p1;
+ prev2 = xdf2->rstart = p2;
+
+ top = cur + smaller - trimmed + recovered;
+
+ /* part 2: line based */
+ while (cur < top
+ && (cur = memchr(cur, '\n', top - cur))
+ && !memcmp(prev1, prev2, cur - prev1)) {
+ prev2 += ++cur - prev1;
+ prev1 = cur;
+ }
+ xdf1->rstart = prev1;
+ xdf2->rstart = prev2;
+
+ return 0;
+}
+
+
static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
- xdlclassifier_t *cf, xdfile_t *xdf) {
+ xdlclassifier_t *cf, xdfile_t *xdf, xrecord_t **arec) {
unsigned int hbits;
long nrec, hsize, bsize;
unsigned long hav;
@@ -167,10 +213,19 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
}
nrec = 0;
+ xdf->dstart = 0;
if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) {
for (top = blk + bsize; cur < top; ) {
prev = cur;
- hav = xdl_hash_record(&cur, top, xpp->flags);
+ if (cur < xdf->rstart) {
+ if (arec)
+ cur += (arec++)[0]->size;
+ else
+ cur = memchr(cur, '\n', top - cur) + 1;
+ hav = 0;
+ xdf->dstart++;
+ } else
+ hav = xdl_hash_record(&cur, top, xpp->flags);
if (nrec >= narec) {
narec *= 2;
if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *))))
@@ -207,7 +262,6 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
xdf->rindex = rindex;
xdf->nreff = 0;
xdf->ha = ha;
- xdf->dstart = 0;
xdf->dend = nrec - 1;
return 0;
@@ -257,12 +311,18 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
return -1;
}
- if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
+ if (xdl_trim_head(mf1, mf2, xpp, &xe->xdf1, &xe->xdf2) < 0) {
+
+ xdl_free_classifier(&cf);
+ return -1;
+ }
+
+ if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1, NULL) < 0) {
xdl_free_classifier(&cf);
return -1;
}
- if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
+ if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2, xe->xdf1.recs) < 0) {
xdl_free_ctx(&xe->xdf1);
xdl_free_classifier(&cf);
@@ -428,14 +488,8 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
long i, lim;
xrecord_t **recs1, **recs2;
- recs1 = xdf1->recs;
- recs2 = xdf2->recs;
- for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim;
- i++, recs1++, recs2++)
- if ((*recs1)->ha != (*recs2)->ha)
- break;
-
- xdf1->dstart = xdf2->dstart = i;
+ lim = XDL_MIN(xdf1->nrec, xdf2->nrec);
+ i = XDL_MIN(xdf1->dstart, lim);
recs1 = xdf1->recs + xdf1->nrec - 1;
recs2 = xdf2->recs + xdf2->nrec - 1;
diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h
index 2511aef..6330a9f 100644
--- a/xdiff/xtypes.h
+++ b/xdiff/xtypes.h
@@ -51,6 +51,7 @@ typedef struct s_xdfile {
unsigned int hbits;
xrecord_t **rhash;
long dstart, dend;
+ const char *rstart, *rend;
xrecord_t **recs;
char *rchg;
long *rindex;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment