this fixes two problems:
[rsync/rsync.git] / match.c
CommitLineData
c627d613
AT
1/*
2 Copyright (C) Andrew Tridgell 1996
3 Copyright (C) Paul Mackerras 1996
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18*/
19
20#include "rsync.h"
21
43a481dc
AT
22extern int csum_length;
23
c627d613
AT
24extern int verbose;
25extern int am_server;
26
9e31c482
AT
27extern int remote_version;
28
c627d613
AT
29typedef unsigned short tag;
30
31#define TABLESIZE (1<<16)
32#define NULL_TAG ((tag)-1)
33
34static int false_alarms;
35static int tag_hits;
36static int matches;
37static int data_transfer;
38
3a6a366f
AT
39static int total_false_alarms;
40static int total_tag_hits;
41static int total_matches;
71c46176 42static int64 total_data_transfer;
c627d613
AT
43
44
45struct target {
46 tag t;
47 int i;
48};
49
3a6a366f 50static struct target *targets;
c627d613 51
3a6a366f 52static tag *tag_table;
c627d613
AT
53
54#define gettag2(s1,s2) (((s1) + (s2)) & 0xFFFF)
55#define gettag(sum) gettag2((sum)&0xFFFF,(sum)>>16)
56
57static int compare_targets(struct target *t1,struct target *t2)
58{
a16bbc39 59 return((int)t1->t - (int)t2->t);
c627d613
AT
60}
61
62
63static void build_hash_table(struct sum_struct *s)
64{
65 int i;
66
67 if (!tag_table)
68 tag_table = (tag *)malloc(sizeof(tag)*TABLESIZE);
69
70 targets = (struct target *)malloc(sizeof(targets[0])*s->count);
71 if (!tag_table || !targets)
72 out_of_memory("build_hash_table");
73
74 for (i=0;i<s->count;i++) {
75 targets[i].i = i;
76 targets[i].t = gettag(s->sums[i].sum1);
77 }
78
79 qsort(targets,s->count,sizeof(targets[0]),(int (*)())compare_targets);
80
81 for (i=0;i<TABLESIZE;i++)
82 tag_table[i] = NULL_TAG;
83
84 for (i=s->count-1;i>=0;i--) {
85 tag_table[targets[i].t] = i;
86 }
87}
88
89
bcacc18b 90static OFF_T last_match;
c627d613
AT
91
92
c6e7fcb4 93static void matched(int f,struct sum_struct *s,struct map_struct *buf,
bcacc18b 94 OFF_T offset,int i)
c627d613 95{
bcacc18b 96 OFF_T n = offset - last_match;
e7ebc36c 97 int j;
9e31c482 98
0d0e2e93 99 if (verbose > 2 && i >= 0)
9486289c 100 rprintf(FINFO,"match at %d last_match=%d j=%d len=%d n=%d\n",
0b910560 101 (int)offset,(int)last_match,i,(int)s->sums[i].len,(int)n);
c627d613 102
45f133b9 103 send_token(f,i,buf,last_match,n,i<0?0:s->sums[i].len);
e7ebc36c 104 data_transfer += n;
70d794dc 105
e7ebc36c
AT
106 if (n > 0)
107 write_flush(f);
9e31c482 108
0d0e2e93 109 if (i >= 0)
e7ebc36c 110 n += s->sums[i].len;
70d794dc 111
e7ebc36c
AT
112 for (j=0;j<n;j+=CHUNK_SIZE) {
113 int n1 = MIN(CHUNK_SIZE,n-j);
114 sum_update(map_ptr(buf,last_match+j,n1),n1);
115 }
9e31c482 116
9e31c482 117
0d0e2e93 118 if (i >= 0)
e7ebc36c 119 last_match = offset + s->sums[i].len;
0d0e2e93
AT
120 else
121 last_match = offset;
9e31c482
AT
122}
123
124
c6e7fcb4 125static void hash_search(int f,struct sum_struct *s,
bcacc18b 126 struct map_struct *buf,OFF_T len)
c627d613 127{
bcacc18b 128 OFF_T offset;
0b910560 129 int j,k;
e7ebc36c
AT
130 int end;
131 char sum2[SUM_LENGTH];
132 uint32 s1, s2, sum;
debb4505 133 schar *map;
45f133b9 134 extern int do_compression;
e7ebc36c
AT
135
136 if (verbose > 2)
9486289c 137 rprintf(FINFO,"hash search b=%d len=%d\n",s->n,(int)len);
e7ebc36c
AT
138
139 k = MIN(len, s->n);
140
debb4505 141 map = (schar *)map_ptr(buf,0,k);
e7ebc36c
AT
142
143 sum = get_checksum1((char *)map, k);
144 s1 = sum & 0xFFFF;
145 s2 = sum >> 16;
146 if (verbose > 3)
9486289c 147 rprintf(FINFO, "sum=%.8x k=%d\n", sum, k);
e7ebc36c
AT
148
149 offset = 0;
150
151 end = len + 1 - s->sums[s->count-1].len;
152
153 if (verbose > 3)
9486289c 154 rprintf(FINFO,"hash search s->n=%d len=%d count=%d\n",
e7ebc36c
AT
155 s->n,(int)len,s->count);
156
157 do {
158 tag t = gettag2(s1,s2);
159 int done_csum2 = 0;
160
161 j = tag_table[t];
162 if (verbose > 4)
9486289c 163 rprintf(FINFO,"offset=%d sum=%08x\n",(int)offset,sum);
e7ebc36c
AT
164
165 if (j == NULL_TAG) {
166 goto null_tag;
167 }
168
169 sum = (s1 & 0xffff) | (s2 << 16);
170 tag_hits++;
171 for (; j<s->count && targets[j].t == t; j++) {
172 int i = targets[j].i;
173
174 if (sum != s->sums[i].sum1) continue;
175
176 if (verbose > 3)
9486289c 177 rprintf(FINFO,"potential match at %d target=%d %d sum=%08x\n",
0b910560 178 (int)offset,j,i,sum);
e7ebc36c
AT
179
180 if (!done_csum2) {
181 int l = MIN(s->n,len-offset);
debb4505 182 map = (schar *)map_ptr(buf,offset,l);
e7ebc36c
AT
183 get_checksum2((char *)map,l,sum2);
184 done_csum2 = 1;
185 }
186
187 if (memcmp(sum2,s->sums[i].sum2,csum_length) != 0) {
188 false_alarms++;
189 continue;
190 }
191
192 matched(f,s,buf,offset,i);
193 offset += s->sums[i].len - 1;
194 k = MIN((len-offset), s->n);
debb4505 195 map = (schar *)map_ptr(buf,offset,k);
e7ebc36c
AT
196 sum = get_checksum1((char *)map, k);
197 s1 = sum & 0xFFFF;
198 s2 = sum >> 16;
199 matches++;
200 break;
201 }
202
203 null_tag:
204 /* Trim off the first byte from the checksum */
debb4505 205 map = (schar *)map_ptr(buf,offset,k+1);
e7ebc36c
AT
206 s1 -= map[0] + CHAR_OFFSET;
207 s2 -= k * (map[0]+CHAR_OFFSET);
208
209 /* Add on the next byte (if there is one) to the checksum */
210 if (k < (len-offset)) {
211 s1 += (map[k]+CHAR_OFFSET);
212 s2 += s1;
213 } else {
214 --k;
215 }
45f133b9
AT
216
217 if (!do_compression) {
218 /* By matching early we avoid re-reading the
219 data 3 times in the case where a token
220 match comes a long way after last
221 match. The 3 reads are caused by the
222 running match, the checksum update and the
223 literal send.
224
225 we don't enable this for the compressed
226 case yet as the deflated token code can't
227 handle it. Paul is working on it */
228 if (offset-last_match >= CHUNK_SIZE+s->n &&
229 (end-offset > CHUNK_SIZE)) {
230 matched(f,s,buf,offset - s->n, -2);
231 }
232 }
e7ebc36c
AT
233 } while (++offset < end);
234
235 matched(f,s,buf,len,-1);
236 map_ptr(buf,len-1,1);
c627d613
AT
237}
238
239
bcacc18b 240void match_sums(int f,struct sum_struct *s,struct map_struct *buf,OFF_T len)
c627d613 241{
e7ebc36c
AT
242 char file_sum[MD4_SUM_LENGTH];
243
244 last_match = 0;
245 false_alarms = 0;
246 tag_hits = 0;
247 matches=0;
248 data_transfer=0;
249
250 sum_init();
251
252 if (len > 0 && s->count>0) {
253 build_hash_table(s);
254
255 if (verbose > 2)
9486289c 256 rprintf(FINFO,"built hash table\n");
e7ebc36c
AT
257
258 hash_search(f,s,buf,len);
259
260 if (verbose > 2)
9486289c 261 rprintf(FINFO,"done hash search\n");
e7ebc36c
AT
262 } else {
263 matched(f,s,buf,len,-1);
264 }
9e31c482 265
e7ebc36c 266 sum_end(file_sum);
c627d613 267
e7ebc36c
AT
268 if (remote_version >= 14) {
269 if (verbose > 2)
9486289c 270 rprintf(FINFO,"sending file_sum\n");
e7ebc36c
AT
271 write_buf(f,file_sum,MD4_SUM_LENGTH);
272 }
c627d613 273
e7ebc36c
AT
274 if (targets) {
275 free(targets);
276 targets=NULL;
277 }
278
279 if (verbose > 2)
9486289c 280 rprintf(FINFO, "false_alarms=%d tag_hits=%d matches=%d\n",
e7ebc36c
AT
281 false_alarms, tag_hits, matches);
282
283 total_tag_hits += tag_hits;
284 total_false_alarms += false_alarms;
285 total_matches += matches;
286 total_data_transfer += data_transfer;
c627d613
AT
287}
288
289void match_report(void)
290{
e7ebc36c
AT
291 if (verbose <= 1)
292 return;
c627d613 293
9486289c 294 rprintf(FINFO,
3a6a366f 295 "total: matches=%d tag_hits=%d false_alarms=%d data=%ld\n",
e7ebc36c 296 total_matches,total_tag_hits,
3a6a366f 297 total_false_alarms,(long)total_data_transfer);
c627d613 298}