X-Git-Url: https://mattmccutchen.net/rsync/rsync.git/blobdiff_plain/45f133b9769fb45a329d3d41e121109d430e307d..0503f06089b89aa4166d6ced8d5901ad6a112c41:/match.c

diff --git a/match.c b/match.c
index 0d8bc31b..7f3212c8 100644
--- a/match.c
+++ b/match.c
@@ -29,18 +29,18 @@ extern int remote_version;
 typedef unsigned short tag;
 
 #define TABLESIZE (1<<16)
-#define NULL_TAG ((tag)-1)
+#define NULL_TAG (-1)
 
 static int false_alarms;
 static int tag_hits;
 static int matches;
-static int data_transfer;
+static int64 data_transfer;
 
 static int total_false_alarms;
 static int total_tag_hits;
 static int total_matches;
-static int64 total_data_transfer;
 
+extern struct stats stats;
 
 struct target {
   tag t;
@@ -49,7 +49,7 @@ struct target {
 
 static struct target *targets;
 
-static tag *tag_table;
+static int *tag_table;
 
 #define gettag2(s1,s2) (((s1) + (s2)) & 0xFFFF)
 #define gettag(sum) gettag2((sum)&0xFFFF,(sum)>>16)
@@ -65,7 +65,7 @@ static void build_hash_table(struct sum_struct *s)
   int i;
 
   if (!tag_table)
-    tag_table = (tag *)malloc(sizeof(tag)*TABLESIZE);
+    tag_table = (int *)malloc(sizeof(tag_table[0])*TABLESIZE);
 
   targets = (struct target *)malloc(sizeof(targets[0])*s->count);
   if (!tag_table || !targets) 
@@ -94,7 +94,7 @@ static void matched(int f,struct sum_struct *s,struct map_struct *buf,
 		    OFF_T offset,int i)
 {
 	OFF_T n = offset - last_match;
-	int j;
+	OFF_T j;
 
 	if (verbose > 2 && i >= 0)
 		rprintf(FINFO,"match at %d last_match=%d j=%d len=%d n=%d\n",
@@ -103,11 +103,10 @@ static void matched(int f,struct sum_struct *s,struct map_struct *buf,
 	send_token(f,i,buf,last_match,n,i<0?0:s->sums[i].len);
 	data_transfer += n;
 
-	if (n > 0)
-		write_flush(f);
-
-	if (i >= 0)
+	if (i >= 0) {
+		stats.matched_data += s->sums[i].len;
 		n += s->sums[i].len;
+	}
   
 	for (j=0;j<n;j+=CHUNK_SIZE) {
 		int n1 = MIN(CHUNK_SIZE,n-j);
@@ -119,6 +118,11 @@ static void matched(int f,struct sum_struct *s,struct map_struct *buf,
 		last_match = offset + s->sums[i].len;
 	else
 		last_match = offset;
+
+	if (buf)
+		show_progress(last_match, buf->file_size);
+
+	if (i == -1) end_progress();
 }
 
 
@@ -126,12 +130,15 @@ static void hash_search(int f,struct sum_struct *s,
 			struct map_struct *buf,OFF_T len)
 {
 	OFF_T offset;
-	int j,k;
+	int j,k, last_i;
 	int end;
 	char sum2[SUM_LENGTH];
 	uint32 s1, s2, sum; 
 	schar *map;
-	extern int do_compression;
+
+	/* last_i is used to encourage adjacent matches, allowing the RLL coding of the
+	   output to work more efficiently */
+	last_i = -1;
 
 	if (verbose > 2)
 		rprintf(FINFO,"hash search b=%d len=%d\n",s->n,(int)len);
@@ -169,16 +176,19 @@ static void hash_search(int f,struct sum_struct *s,
 		sum = (s1 & 0xffff) | (s2 << 16);
 		tag_hits++;
 		for (; j<s->count && targets[j].t == t; j++) {
-			int i = targets[j].i;
+			int l, i = targets[j].i;
 			
 			if (sum != s->sums[i].sum1) continue;
 			
+			/* also make sure the two blocks are the same length */
+			l = MIN(s->n,len-offset);
+			if (l != s->sums[i].len) continue;			
+
 			if (verbose > 3)
 				rprintf(FINFO,"potential match at %d target=%d %d sum=%08x\n",
 					(int)offset,j,i,sum);
 			
 			if (!done_csum2) {
-				int l = MIN(s->n,len-offset);
 				map = (schar *)map_ptr(buf,offset,l);
 				get_checksum2((char *)map,l,sum2);
 				done_csum2 = 1;
@@ -188,6 +198,22 @@ static void hash_search(int f,struct sum_struct *s,
 				false_alarms++;
 				continue;
 			}
+
+			/* we've found a match, but now check to see
+                           if last_i can hint at a better match */
+			for (j++; j<s->count && targets[j].t == t; j++) {
+				int i2 = targets[j].i;
+				if (i2 == last_i + 1) {
+					if (sum != s->sums[i2].sum1) break;
+					if (memcmp(sum2,s->sums[i2].sum2,csum_length) != 0) break;
+					/* we've found an adjacent match - the RLL coder 
+					   will be happy */
+					i = i2;
+					break;
+				}
+			}
+
+			last_i = i;
 			
 			matched(f,s,buf,offset,i);
 			offset += s->sums[i].len - 1;
@@ -214,21 +240,15 @@ static void hash_search(int f,struct sum_struct *s,
 			--k;
 		}
 
-		if (!do_compression) {
-			/* By matching early we avoid re-reading the
-			   data 3 times in the case where a token
-			   match comes a long way after last
-			   match. The 3 reads are caused by the
-			   running match, the checksum update and the
-			   literal send.
-
-			   we don't enable this for the compressed
-			   case yet as the deflated token code can't
-			   handle it. Paul is working on it */
-			if (offset-last_match >= CHUNK_SIZE+s->n && 
-			    (end-offset > CHUNK_SIZE)) {
-				matched(f,s,buf,offset - s->n, -2);
-			}
+		/* By matching early we avoid re-reading the
+		   data 3 times in the case where a token
+		   match comes a long way after last
+		   match. The 3 reads are caused by the
+		   running match, the checksum update and the
+		   literal send. */
+		if (offset-last_match >= CHUNK_SIZE+s->n && 
+		    (end-offset > CHUNK_SIZE)) {
+			matched(f,s,buf,offset - s->n, -2);
 		}
 	} while (++offset < end);
 	
@@ -260,6 +280,12 @@ void match_sums(int f,struct sum_struct *s,struct map_struct *buf,OFF_T len)
 		if (verbose > 2) 
 			rprintf(FINFO,"done hash search\n");
 	} else {
+		OFF_T j;
+		/* by doing this in pieces we avoid too many seeks */
+		for (j=0;j<(len-CHUNK_SIZE);j+=CHUNK_SIZE) {
+			int n1 = MIN(CHUNK_SIZE,(len-CHUNK_SIZE)-j);
+			matched(f,s,buf,j+n1,-2);
+		}
 		matched(f,s,buf,len,-1);
 	}
 
@@ -283,7 +309,7 @@ void match_sums(int f,struct sum_struct *s,struct map_struct *buf,OFF_T len)
 	total_tag_hits += tag_hits;
 	total_false_alarms += false_alarms;
 	total_matches += matches;
-	total_data_transfer += data_transfer;
+	stats.literal_data += data_transfer;
 }
 
 void match_report(void)
@@ -292,7 +318,8 @@ void match_report(void)
 		return;
 
 	rprintf(FINFO,
-		"total: matches=%d  tag_hits=%d  false_alarms=%d  data=%ld\n",
+		"total: matches=%d  tag_hits=%d  false_alarms=%d data=%.0f\n",
 		total_matches,total_tag_hits,
-		total_false_alarms,(long)total_data_transfer);
+		total_false_alarms,
+		(double)stats.literal_data);
 }