Skip to content

Instantly share code, notes, and snippets.

@rctay
Created July 22, 2011 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rctay/1099515 to your computer and use it in GitHub Desktop.
Save rctay/1099515 to your computer and use it in GitHub Desktop.
[gsoc-diff] xdl_trim_head()
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index 82bff88..061ace9 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -55,7 +55,7 @@ static void xdl_free_classifier(xdlclassifier_t *cf);
static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits,
xrecord_t *rec);
static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
- xdlclassifier_t *cf, xdfile_t *xdf);
+ xdlclassifier_t *cf, xdfile_t *xdf, xrecord_t **arec);
static void xdl_free_ctx(xdfile_t *xdf);
static int xdl_clean_mmatch(char const *dis, long i, long s, long e);
static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2);
@@ -104,9 +104,10 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned
line = rec->ptr;
hi = (long) XDL_HASHLONG(rec->ha, cf->hbits);
for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next)
- if (rcrec->ha == rec->ha &&
+ if (rec->ha == 0 ||
+ (rcrec->ha == rec->ha &&
xdl_recmatch(rcrec->line, rcrec->size,
- rec->ptr, rec->size, cf->flags))
+ rec->ptr, rec->size, cf->flags)))
break;
if (!rcrec) {
@@ -132,8 +133,55 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned
}
+static int xdl_trim_head(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
+ xdfile_t *xdf1, xdfile_t *xdf2) {
+
+ const int blk = 128;
+ long trimmed = 0, recovered = 0;
+ long smaller = XDL_MIN(mf1->size, mf2->size);
+ char const *p1, *p2;
+ char const *cur, *top;
+ char const *prev1, *prev2;
+
+ /* part 1: blocks */
+ p1 = mf1->ptr;
+ p2 = mf2->ptr;
+
+ top = p1 + smaller;
+
+ while (blk + trimmed <= smaller && !memcmp(p1, p2, blk)) {
+ trimmed += blk;
+ p1 += blk;
+ p2 += blk;
+ }
+
+ while (recovered < trimmed) {
+ if (*(p1 - recovered++) == '\n')
+ break;
+ }
+ p1 -= recovered;
+ p2 -= recovered;
+
+ prev1 = xdf1->rstart = cur = p1;
+ prev2 = xdf2->rstart = p2;
+
+ /* part 2: line based */
+ while (cur < top
+ && (cur = memchr(cur, '\n', top - cur))
+ && !memcmp(prev1, prev2, ++cur - prev1)) {
+ prev2 += cur - prev1;
+ prev1 = cur;
+ }
+
+ xdf1->rstart = prev1;
+ xdf2->rstart = prev2;
+
+ return 0;
+}
+
+
static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
- xdlclassifier_t *cf, xdfile_t *xdf) {
+ xdlclassifier_t *cf, xdfile_t *xdf, xrecord_t **arec) {
unsigned int hbits;
long nrec, hsize, bsize;
unsigned long hav;
@@ -167,10 +215,19 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
}
nrec = 0;
+ xdf->dstart = 0;
if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) {
for (top = blk + bsize; cur < top; ) {
prev = cur;
- hav = xdl_hash_record(&cur, top, xpp->flags);
+ if (cur < xdf->rstart) {
+ if (arec)
+ cur += (arec++)[0]->size;
+ else
+ cur = memchr(cur, '\n', top - cur) + 1;
+ hav = 0;
+ xdf->dstart++;
+ } else
+ hav = xdl_hash_record(&cur, top, xpp->flags);
if (nrec >= narec) {
narec *= 2;
if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *))))
@@ -207,7 +264,6 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
xdf->rindex = rindex;
xdf->nreff = 0;
xdf->ha = ha;
- xdf->dstart = 0;
xdf->dend = nrec - 1;
return 0;
@@ -257,12 +313,18 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
return -1;
}
- if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
+ if (xdl_trim_head(mf1, mf2, xpp, &xe->xdf1, &xe->xdf2) < 0) {
+
+ xdl_free_classifier(&cf);
+ return -1;
+ }
+
+ if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1, NULL) < 0) {
xdl_free_classifier(&cf);
return -1;
}
- if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
+ if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2, xe->xdf1.recs) < 0) {
xdl_free_ctx(&xe->xdf1);
xdl_free_classifier(&cf);
@@ -428,15 +490,19 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
long i, lim;
xrecord_t **recs1, **recs2;
- recs1 = xdf1->recs;
- recs2 = xdf2->recs;
- for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim;
- i++, recs1++, recs2++)
+ lim = XDL_MIN(xdf1->nrec, xdf2->nrec);
+ i = XDL_MIN(lim, xdf1->dstart);
+ /*
+ i = xdf1->dstart;
+ recs1 = xdf1->recs + i;
+ recs2 = xdf2->recs + i;
+ for (lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim;
+ i++, recs1++, recs2++)
if ((*recs1)->ha != (*recs2)->ha)
break;
xdf1->dstart = xdf2->dstart = i;
-
+ */
recs1 = xdf1->recs + xdf1->nrec - 1;
recs2 = xdf2->recs + xdf2->nrec - 1;
for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment