Fixed the multiplying of blength*blength so that it can't overflow
[rsync/rsync.git] / match.c
diff --git a/match.c b/match.c
index 845bd35..95d84b9 100644 (file)
--- a/match.c
+++ b/match.c
 
 #include "rsync.h"
 
-extern int csum_length;
-
 extern int verbose;
 extern int am_server;
-
-extern int remote_version;
+extern int do_progress;
 
 typedef unsigned short tag;
 
@@ -71,7 +68,7 @@ static void build_hash_table(struct sum_struct *s)
   if (!tag_table || !targets) 
     out_of_memory("build_hash_table");
 
-  for (i=0;i<s->count;i++) {
+  for (i=0;i<(int) s->count;i++) {
     targets[i].i = i;
     targets[i].t = gettag(s->sums[i].sum1);
   }
@@ -90,15 +87,27 @@ static void build_hash_table(struct sum_struct *s)
 static OFF_T last_match;
 
 
+/**
+ * Transmit a literal and/or match token.
+ *
+ * This delightfully-named function is called either when we find a
+ * match and need to transmit all the unmatched data leading up to it,
+ * or when we get bored of accumulating literal data and just need to
+ * transmit it.  As a result of this second case, it is called even if
+ * we have not matched at all!
+ *
+ * @param i If >0, the number of a matched token.  If 0, indicates we
+ * have only literal data.
+ **/
 static void matched(int f,struct sum_struct *s,struct map_struct *buf,
                    OFF_T offset,int i)
 {
        OFF_T n = offset - last_match;
-       int j;
+       OFF_T j;
 
        if (verbose > 2 && i >= 0)
-               rprintf(FINFO,"match at %d last_match=%d j=%d len=%d n=%d\n",
-                       (int)offset,(int)last_match,i,(int)s->sums[i].len,(int)n);
+               rprintf(FINFO,"match at %.0f last_match=%.0f j=%d len=%d n=%.0f\n",
+                       (double)offset,(double)last_match,i,s->sums[i].len,(double)n);
 
        send_token(f,i,buf,last_match,n,i<0?0:s->sums[i].len);
        data_transfer += n;
@@ -119,27 +128,34 @@ static void matched(int f,struct sum_struct *s,struct map_struct *buf,
        else
                last_match = offset;
 
-       if (buf)
-               show_progress(last_match, buf->size);
+       if (buf && do_progress) {
+               show_progress(last_match, buf->file_size);
 
-       if (i == -1) end_progress();
+               if (i == -1) end_progress(buf->file_size);
+       }
 }
 
 
 static void hash_search(int f,struct sum_struct *s,
                        struct map_struct *buf,OFF_T len)
 {
-       OFF_T offset;
-       int j,k;
-       int end;
+       OFF_T offset, end;
+       int j,k, last_i;
        char sum2[SUM_LENGTH];
        uint32 s1, s2, sum; 
        schar *map;
 
+       /* last_i is used to encourage adjacent matches, allowing the RLL coding of the
+          output to work more efficiently */
+       last_i = -1;
+
        if (verbose > 2)
-               rprintf(FINFO,"hash search b=%d len=%d\n",s->n,(int)len);
+               rprintf(FINFO,"hash search b=%ld len=%.0f\n",
+                       (long) s->blength, (double)len);
 
-       k = MIN(len, s->n);
+       /* cast is to make s->blength signed; it should always be reasonably
+        * small */
+       k = MIN(len, (OFF_T) s->blength);
        
        map = (schar *)map_ptr(buf,0,k);
        
@@ -154,8 +170,8 @@ static void hash_search(int f,struct sum_struct *s,
        end = len + 1 - s->sums[s->count-1].len;
        
        if (verbose > 3)
-               rprintf(FINFO,"hash search s->n=%d len=%d count=%d\n",
-                       s->n,(int)len,s->count);
+               rprintf(FINFO, "hash search s->blength=%ld len=%.0f count=%ld\n",
+                       (long) s->blength, (double) len, (long) s->count);
        
        do {
                tag t = gettag2(s1,s2);
@@ -163,7 +179,7 @@ static void hash_search(int f,struct sum_struct *s,
                        
                j = tag_table[t];
                if (verbose > 4)
-                       rprintf(FINFO,"offset=%d sum=%08x\n",(int)offset,sum);
+                       rprintf(FINFO,"offset=%.0f sum=%08x\n",(double)offset,sum);
                
                if (j == NULL_TAG) {
                        goto null_tag;
@@ -171,30 +187,49 @@ static void hash_search(int f,struct sum_struct *s,
 
                sum = (s1 & 0xffff) | (s2 << 16);
                tag_hits++;
-               for (; j<s->count && targets[j].t == t; j++) {
-                       int i = targets[j].i;
+               for (; j < (int) s->count && targets[j].t == t; j++) {
+                       int l, i = targets[j].i;
                        
                        if (sum != s->sums[i].sum1) continue;
                        
+                       /* also make sure the two blocks are the same length */
+                       l = MIN(s->blength,len-offset);
+                       if (l != s->sums[i].len) continue;                      
+
                        if (verbose > 3)
-                               rprintf(FINFO,"potential match at %d target=%d %d sum=%08x\n",
-                                       (int)offset,j,i,sum);
+                               rprintf(FINFO,"potential match at %.0f target=%d %d sum=%08x\n",
+                                       (double)offset,j,i,sum);
                        
                        if (!done_csum2) {
-                               int l = MIN(s->n,len-offset);
                                map = (schar *)map_ptr(buf,offset,l);
                                get_checksum2((char *)map,l,sum2);
                                done_csum2 = 1;
                        }
                        
-                       if (memcmp(sum2,s->sums[i].sum2,csum_length) != 0) {
+                       if (memcmp(sum2,s->sums[i].sum2,s->s2length) != 0) {
                                false_alarms++;
                                continue;
                        }
+
+                       /* we've found a match, but now check to see
+                           if last_i can hint at a better match */
+                       for (j++; j < (int) s->count && targets[j].t == t; j++) {
+                               int i2 = targets[j].i;
+                               if (i2 == last_i + 1) {
+                                       if (sum != s->sums[i2].sum1) break;
+                                       if (memcmp(sum2,s->sums[i2].sum2,s->s2length) != 0) break;
+                                       /* we've found an adjacent match - the RLL coder 
+                                          will be happy */
+                                       i = i2;
+                                       break;
+                               }
+                       }
+
+                       last_i = i;
                        
                        matched(f,s,buf,offset,i);
                        offset += s->sums[i].len - 1;
-                       k = MIN((len-offset), s->n);
+                       k = MIN((len-offset), s->blength);
                        map = (schar *)map_ptr(buf,offset,k);
                        sum = get_checksum1((char *)map, k);
                        s1 = sum & 0xFFFF;
@@ -223,9 +258,10 @@ static void hash_search(int f,struct sum_struct *s,
                   match. The 3 reads are caused by the
                   running match, the checksum update and the
                   literal send. */
-               if (offset-last_match >= CHUNK_SIZE+s->n && 
+               if (offset > last_match &&
+                   offset-last_match >= CHUNK_SIZE+s->blength && 
                    (end-offset > CHUNK_SIZE)) {
-                       matched(f,s,buf,offset - s->n, -2);
+                       matched(f,s,buf,offset - s->blength, -2);
                }
        } while (++offset < end);
        
@@ -234,9 +270,24 @@ static void hash_search(int f,struct sum_struct *s,
 }
 
 
-void match_sums(int f,struct sum_struct *s,struct map_struct *buf,OFF_T len)
+/**
+ * Scan through a origin file, looking for sections that match
+ * checksums from the generator, and transmit either literal or token
+ * data.
+ *
+ * Also calculates the MD4 checksum of the whole file, using the md
+ * accumulator.  This is transmitted with the file as protection
+ * against corruption on the wire.
+ *
+ * @param s Checksums received from the generator.  If <tt>s->count ==
+ * 0</tt>, then there are actually no checksums for this file.
+ *
+ * @param len Length of the file to send.
+ **/
+void match_sums(int f, struct sum_struct *s, struct map_struct *buf, OFF_T len)
 {
        char file_sum[MD4_SUM_LENGTH];
+       extern int write_batch;  /*  dw */
 
        last_match = 0;
        false_alarms = 0;
@@ -268,11 +319,11 @@ void match_sums(int f,struct sum_struct *s,struct map_struct *buf,OFF_T len)
 
        sum_end(file_sum);
 
-       if (remote_version >= 14) {
-               if (verbose > 2)
-                       rprintf(FINFO,"sending file_sum\n");
-               write_buf(f,file_sum,MD4_SUM_LENGTH);
-       }
+       if (verbose > 2)
+               rprintf(FINFO,"sending file_sum\n");
+       write_buf(f,file_sum,MD4_SUM_LENGTH);
+       if (write_batch) /* dw */
+               write_batch_delta_file(file_sum, MD4_SUM_LENGTH);
 
        if (targets) {
                free(targets);