extern int verbose;
extern int am_server;
extern int do_progress;
+extern int checksum_seed;
+extern int append_mode;
+
+int updating_basis_file;
typedef unsigned short tag;
#define TABLESIZE (1<<16)
-#define NULL_TAG ((size_t)-1)
+#define NULL_TAG (-1)
static int false_alarms;
static int tag_hits;
struct target {
tag t;
- size_t i;
+ int32 i;
};
static struct target *targets;
-static size_t *tag_table;
+static int32 *tag_table;
#define gettag2(s1,s2) (((s1) + (s2)) & 0xFFFF)
#define gettag(sum) gettag2((sum)&0xFFFF,(sum)>>16)
static void build_hash_table(struct sum_struct *s)
{
- size_t i;
+ int32 i;
if (!tag_table)
- tag_table = new_array(size_t, TABLESIZE);
+ tag_table = new_array(int32, TABLESIZE);
targets = new_array(struct target, s->count);
if (!tag_table || !targets)
* @param i If >0, the number of a matched token. If 0, indicates we
* have only literal data.
**/
-static void matched(int f,struct sum_struct *s,struct map_struct *buf,
- OFF_T offset,int i)
+static void matched(int f, struct sum_struct *s, struct map_struct *buf,
+ OFF_T offset, int32 i)
{
- OFF_T n = offset - last_match;
- OFF_T j;
-
- if (verbose > 2 && i >= 0)
- rprintf(FINFO,"match at %.0f last_match=%.0f j=%d len=%u n=%.0f\n",
- (double)offset,(double)last_match,i,s->sums[i].len,(double)n);
+ int32 n = offset - last_match; /* max value: block_size (int32) */
+ int32 j;
+
+ if (verbose > 2 && i >= 0) {
+ rprintf(FINFO,
+ "match at %.0f last_match=%.0f j=%d len=%ld n=%ld\n",
+ (double)offset, (double)last_match, i,
+ (long)s->sums[i].len, (long)n);
+ }
- send_token(f,i,buf,last_match,n,i<0?0:s->sums[i].len);
+ send_token(f, i, buf, last_match, n, i < 0 ? 0 : s->sums[i].len);
data_transfer += n;
if (i >= 0) {
}
for (j = 0; j < n; j += CHUNK_SIZE) {
- int n1 = MIN(CHUNK_SIZE,n-j);
- sum_update(map_ptr(buf,last_match+j,n1),n1);
+ int32 n1 = MIN(CHUNK_SIZE, n - j);
+ sum_update(map_ptr(buf, last_match + j, n1), n1);
}
-
if (i >= 0)
last_match = offset + s->sums[i].len;
else
last_match = offset;
- if (buf && do_progress) {
+ if (buf && do_progress)
show_progress(last_match, buf->file_size);
-
- if (i == -1)
- end_progress(buf->file_size);
- }
}
static void hash_search(int f,struct sum_struct *s,
struct map_struct *buf, OFF_T len)
{
- OFF_T offset, end;
- unsigned int k;
- size_t last_i;
+ OFF_T offset, end, backup;
+ int32 k, want_i;
char sum2[SUM_LENGTH];
uint32 s1, s2, sum;
+ int more;
schar *map;
- /* last_i is used to encourage adjacent matches, allowing the RLL coding of the
- output to work more efficiently */
- last_i = (size_t)-1;
+ /* want_i is used to encourage adjacent matches, allowing the RLL
+ * coding of the output to work more efficiently. */
+ want_i = 0;
if (verbose > 2) {
- rprintf(FINFO,"hash search b=%u len=%.0f\n",
- s->blength, (double)len);
+ rprintf(FINFO, "hash search b=%ld len=%.0f\n",
+ (long)s->blength, (double)len);
}
- k = MIN(len, s->blength);
+ k = (int32)MIN(len, (OFF_T)s->blength);
map = (schar *)map_ptr(buf, 0, k);
s1 = sum & 0xFFFF;
s2 = sum >> 16;
if (verbose > 3)
- rprintf(FINFO, "sum=%.8x k=%u\n", sum, k);
+ rprintf(FINFO, "sum=%.8x k=%ld\n", sum, (long)k);
offset = 0;
end = len + 1 - s->sums[s->count-1].len;
if (verbose > 3) {
- rprintf(FINFO, "hash search s->blength=%u len=%.0f count=%.0f\n",
- s->blength, (double)len, (double)s->count);
+ rprintf(FINFO, "hash search s->blength=%ld len=%.0f count=%.0f\n",
+ (long)s->blength, (double)len, (double)s->count);
}
do {
tag t = gettag2(s1,s2);
int done_csum2 = 0;
- size_t j = tag_table[t];
+ int32 j = tag_table[t];
if (verbose > 4)
rprintf(FINFO,"offset=%.0f sum=%08x\n",(double)offset,sum);
sum = (s1 & 0xffff) | (s2 << 16);
tag_hits++;
- for (; j < s->count && targets[j].t == t; j++) {
- unsigned int l;
- size_t i = targets[j].i;
+ do {
+ int32 l, i = targets[j].i;
if (sum != s->sums[i].sum1)
continue;
/* also make sure the two blocks are the same length */
- l = MIN((OFF_T)s->blength, len-offset);
+ l = (int32)MIN((OFF_T)s->blength, len-offset);
if (l != s->sums[i].len)
continue;
+ /* in-place: ensure chunk's offset is either >= our
+ * offset or that the data didn't move. */
+ if (updating_basis_file && s->sums[i].offset < offset
+ && !(s->sums[i].flags & SUMFLG_SAME_OFFSET))
+ continue;
+
if (verbose > 3)
rprintf(FINFO,"potential match at %.0f target=%.0f %.0f sum=%08x\n",
(double)offset,(double)j,(double)i,sum);
continue;
}
- /* we've found a match, but now check to see
- * if last_i can hint at a better match */
- for (j++; j < s->count && targets[j].t == t; j++) {
- size_t i2 = targets[j].i;
- if (i2 == last_i + 1) {
- if (sum != s->sums[i2].sum1)
- break;
- if (memcmp(sum2,s->sums[i2].sum2,s->s2length) != 0)
- break;
- /* we've found an adjacent match - the RLL coder
- * will be happy */
- i = i2;
- break;
- }
+ /* When updating in-place, the best possible match is
+ * one with an identical offset, so we prefer that over
+ * the following want_i optimization. */
+ if (updating_basis_file) {
+ do {
+ int32 i2 = targets[j].i;
+ if (s->sums[i2].offset != offset)
+ continue;
+ if (i2 != i) {
+ if (sum != s->sums[i2].sum1)
+ break;
+ if (memcmp(sum2, s->sums[i2].sum2,
+ s->s2length) != 0)
+ break;
+ i = i2;
+ }
+ /* This chunk was at the same offset on
+ * both the sender and the receiver. */
+ s->sums[i].flags |= SUMFLG_SAME_OFFSET;
+ goto set_want_i;
+ } while (++j < s->count && targets[j].t == t);
}
- last_i = i;
+ /* we've found a match, but now check to see
+ * if want_i can hint at a better match. */
+ if (i != want_i && want_i < s->count
+ && (!updating_basis_file || s->sums[want_i].offset >= offset
+ || s->sums[want_i].flags & SUMFLG_SAME_OFFSET)
+ && sum == s->sums[want_i].sum1
+ && memcmp(sum2, s->sums[want_i].sum2, s->s2length) == 0) {
+ /* we've found an adjacent match - the RLL coder
+ * will be happy */
+ i = want_i;
+ }
+ set_want_i:
+ want_i = i + 1;
matched(f,s,buf,offset,i);
offset += s->sums[i].len - 1;
- k = MIN(s->blength, len-offset);
+ k = (int32)MIN((OFF_T)s->blength, len-offset);
map = (schar *)map_ptr(buf, offset, k);
sum = get_checksum1((char *)map, k);
s1 = sum & 0xFFFF;
s2 = sum >> 16;
matches++;
break;
- }
+ } while (++j < s->count && targets[j].t == t);
null_tag:
+ backup = offset - last_match;
+ /* We sometimes read 1 byte prior to last_match... */
+ if (backup < 0)
+ backup = 0;
+
/* Trim off the first byte from the checksum */
- map = (schar *)map_ptr(buf, offset, k+1);
+ more = offset + k < len;
+ map = (schar *)map_ptr(buf, offset - backup, k + more + backup)
+ + backup;
s1 -= map[0] + CHAR_OFFSET;
s2 -= k * (map[0]+CHAR_OFFSET);
/* Add on the next byte (if there is one) to the checksum */
- if (k < (len-offset)) {
- s1 += (map[k]+CHAR_OFFSET);
+ if (more) {
+ s1 += map[k] + CHAR_OFFSET;
s2 += s1;
} else
--k;
match. The 3 reads are caused by the
running match, the checksum update and the
literal send. */
- if (offset > last_match
- && offset-last_match >= CHUNK_SIZE+s->blength
- && end-offset > CHUNK_SIZE) {
- matched(f,s,buf,offset - s->blength, -2);
- }
+ if (backup >= s->blength+CHUNK_SIZE && end-offset > CHUNK_SIZE)
+ matched(f, s, buf, offset - s->blength, -2);
} while (++offset < end);
- matched(f,s,buf,len,-1);
- map_ptr(buf,len-1,1);
+ matched(f, s, buf, len, -1);
+ map_ptr(buf, len-1, 1);
}
void match_sums(int f, struct sum_struct *s, struct map_struct *buf, OFF_T len)
{
char file_sum[MD4_SUM_LENGTH];
- extern int write_batch;
last_match = 0;
false_alarms = 0;
matches = 0;
data_transfer = 0;
- sum_init();
+ sum_init(checksum_seed);
- if (len > 0 && s->count>0) {
+ if (append_mode) {
+ OFF_T j = 0;
+ for (j = CHUNK_SIZE; j < s->flength; j += CHUNK_SIZE) {
+ if (buf && do_progress)
+ show_progress(last_match, buf->file_size);
+ sum_update(map_ptr(buf, last_match, CHUNK_SIZE),
+ CHUNK_SIZE);
+ last_match = j;
+ }
+ if (last_match < s->flength) {
+ int32 len = s->flength - last_match;
+ if (buf && do_progress)
+ show_progress(last_match, buf->file_size);
+ sum_update(map_ptr(buf, last_match, len), len);
+ last_match = s->flength;
+ }
+ s->count = 0;
+ }
+
+ if (len > 0 && s->count > 0) {
build_hash_table(s);
if (verbose > 2)
} else {
OFF_T j;
/* by doing this in pieces we avoid too many seeks */
- for (j = 0; j < len-CHUNK_SIZE; j += CHUNK_SIZE) {
- int n1 = MIN(CHUNK_SIZE,(len-CHUNK_SIZE)-j);
- matched(f,s,buf,j+n1,-2);
- }
- matched(f,s,buf,len,-1);
+ for (j = last_match + CHUNK_SIZE; j < len; j += CHUNK_SIZE)
+ matched(f, s, buf, j, -2);
+ matched(f, s, buf, len, -1);
}
sum_end(file_sum);
if (verbose > 2)
rprintf(FINFO,"sending file_sum\n");
write_buf(f,file_sum,MD4_SUM_LENGTH);
- if (write_batch)
- write_batch_delta_file(file_sum, MD4_SUM_LENGTH);
if (targets) {
free(targets);