2 Copyright (C) Andrew Tridgell 1996
3 Copyright (C) Paul Mackerras 1996
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 extern int csum_length;
27 extern int remote_version;
29 typedef unsigned short tag;
31 #define TABLESIZE (1<<16)
32 #define NULL_TAG ((tag)-1)
34 static int false_alarms;
37 static int data_transfer;
39 static int total_false_alarms;
40 static int total_tag_hits;
41 static int total_matches;
42 static int64 total_data_transfer;
50 static struct target *targets;
52 static tag *tag_table;
54 #define gettag2(s1,s2) (((s1) + (s2)) & 0xFFFF)
55 #define gettag(sum) gettag2((sum)&0xFFFF,(sum)>>16)
57 static int compare_targets(struct target *t1,struct target *t2)
59 return((int)t1->t - (int)t2->t);
63 static void build_hash_table(struct sum_struct *s)
68 tag_table = (tag *)malloc(sizeof(tag)*TABLESIZE);
70 targets = (struct target *)malloc(sizeof(targets[0])*s->count);
71 if (!tag_table || !targets)
72 out_of_memory("build_hash_table");
74 for (i=0;i<s->count;i++) {
76 targets[i].t = gettag(s->sums[i].sum1);
79 qsort(targets,s->count,sizeof(targets[0]),(int (*)())compare_targets);
81 for (i=0;i<TABLESIZE;i++)
82 tag_table[i] = NULL_TAG;
84 for (i=s->count-1;i>=0;i--) {
85 tag_table[targets[i].t] = i;
90 static OFF_T last_match;
93 static void matched(int f,struct sum_struct *s,struct map_struct *buf,
96 OFF_T n = offset - last_match;
99 if (verbose > 2 && i >= 0)
100 rprintf(FINFO,"match at %d last_match=%d j=%d len=%d n=%d\n",
101 (int)offset,(int)last_match,i,(int)s->sums[i].len,(int)n);
103 send_token(f,i,buf,last_match,n,i<0?0:s->sums[i].len);
112 for (j=0;j<n;j+=CHUNK_SIZE) {
113 int n1 = MIN(CHUNK_SIZE,n-j);
114 sum_update(map_ptr(buf,last_match+j,n1),n1);
119 last_match = offset + s->sums[i].len;
125 static void hash_search(int f,struct sum_struct *s,
126 struct map_struct *buf,OFF_T len)
131 char sum2[SUM_LENGTH];
134 extern int do_compression;
137 rprintf(FINFO,"hash search b=%d len=%d\n",s->n,(int)len);
141 map = (schar *)map_ptr(buf,0,k);
143 sum = get_checksum1((char *)map, k);
147 rprintf(FINFO, "sum=%.8x k=%d\n", sum, k);
151 end = len + 1 - s->sums[s->count-1].len;
154 rprintf(FINFO,"hash search s->n=%d len=%d count=%d\n",
155 s->n,(int)len,s->count);
158 tag t = gettag2(s1,s2);
163 rprintf(FINFO,"offset=%d sum=%08x\n",(int)offset,sum);
169 sum = (s1 & 0xffff) | (s2 << 16);
171 for (; j<s->count && targets[j].t == t; j++) {
172 int i = targets[j].i;
174 if (sum != s->sums[i].sum1) continue;
177 rprintf(FINFO,"potential match at %d target=%d %d sum=%08x\n",
178 (int)offset,j,i,sum);
181 int l = MIN(s->n,len-offset);
182 map = (schar *)map_ptr(buf,offset,l);
183 get_checksum2((char *)map,l,sum2);
187 if (memcmp(sum2,s->sums[i].sum2,csum_length) != 0) {
192 matched(f,s,buf,offset,i);
193 offset += s->sums[i].len - 1;
194 k = MIN((len-offset), s->n);
195 map = (schar *)map_ptr(buf,offset,k);
196 sum = get_checksum1((char *)map, k);
204 /* Trim off the first byte from the checksum */
205 map = (schar *)map_ptr(buf,offset,k+1);
206 s1 -= map[0] + CHAR_OFFSET;
207 s2 -= k * (map[0]+CHAR_OFFSET);
209 /* Add on the next byte (if there is one) to the checksum */
210 if (k < (len-offset)) {
211 s1 += (map[k]+CHAR_OFFSET);
217 if (!do_compression) {
218 /* By matching early we avoid re-reading the
219 data 3 times in the case where a token
220 match comes a long way after last
221 match. The 3 reads are caused by the
222 running match, the checksum update and the
225 we don't enable this for the compressed
226 case yet as the deflated token code can't
227 handle it. Paul is working on it */
228 if (offset-last_match >= CHUNK_SIZE+s->n &&
229 (end-offset > CHUNK_SIZE)) {
230 matched(f,s,buf,offset - s->n, -2);
233 } while (++offset < end);
235 matched(f,s,buf,len,-1);
236 map_ptr(buf,len-1,1);
240 void match_sums(int f,struct sum_struct *s,struct map_struct *buf,OFF_T len)
242 char file_sum[MD4_SUM_LENGTH];
252 if (len > 0 && s->count>0) {
256 rprintf(FINFO,"built hash table\n");
258 hash_search(f,s,buf,len);
261 rprintf(FINFO,"done hash search\n");
263 matched(f,s,buf,len,-1);
268 if (remote_version >= 14) {
270 rprintf(FINFO,"sending file_sum\n");
271 write_buf(f,file_sum,MD4_SUM_LENGTH);
280 rprintf(FINFO, "false_alarms=%d tag_hits=%d matches=%d\n",
281 false_alarms, tag_hits, matches);
283 total_tag_hits += tag_hits;
284 total_false_alarms += false_alarms;
285 total_matches += matches;
286 total_data_transfer += data_transfer;
289 void match_report(void)
295 "total: matches=%d tag_hits=%d false_alarms=%d data=%ld\n",
296 total_matches,total_tag_hits,
297 total_false_alarms,(long)total_data_transfer);