last_match = offset;
if (buf)
- show_progress(last_match, buf->size);
+ show_progress(last_match, buf->file_size);
if (i == -1) end_progress();
}
struct map_struct *buf,OFF_T len)
{
OFF_T offset;
- int j,k;
+ int j,k, last_i;
int end;
char sum2[SUM_LENGTH];
uint32 s1, s2, sum;
schar *map;
+ /* last_i is used to encourage adjacent matches, allowing the RLL coding of the
+ output to work more efficiently */
+ last_i = -1;
+
if (verbose > 2)
rprintf(FINFO,"hash search b=%d len=%d\n",s->n,(int)len);
sum = (s1 & 0xffff) | (s2 << 16);
tag_hits++;
for (; j<s->count && targets[j].t == t; j++) {
- int i = targets[j].i;
+ int l, i = targets[j].i;
if (sum != s->sums[i].sum1) continue;
+ /* also make sure the two blocks are the same length */
+ l = MIN(s->n,len-offset);
+ if (l != s->sums[i].len) continue;
+
if (verbose > 3)
rprintf(FINFO,"potential match at %d target=%d %d sum=%08x\n",
(int)offset,j,i,sum);
if (!done_csum2) {
- int l = MIN(s->n,len-offset);
map = (schar *)map_ptr(buf,offset,l);
get_checksum2((char *)map,l,sum2);
done_csum2 = 1;
false_alarms++;
continue;
}
+
+ /* we've found a match, but now check to see
+ if last_i can hint at a better match */
+ for (j++; j<s->count && targets[j].t == t; j++) {
+ int i2 = targets[j].i;
+ if (i2 == last_i + 1) {
+ if (sum != s->sums[i2].sum1) break;
+ if (memcmp(sum2,s->sums[i2].sum2,csum_length) != 0) break;
+ /* we've found an adjacent match - the RLL coder
+ will be happy */
+ i = i2;
+ break;
+ }
+ }
+
+ last_i = i;
matched(f,s,buf,offset,i);
offset += s->sums[i].len - 1;