Added a simple hashtable routine for hashing st_dev and st_ino info
[rsync/rsync.git] / hlink.c
1 /*
2  * Routines to support hard-linking.
3  *
4  * Copyright (C) 1996 Andrew Tridgell
5  * Copyright (C) 1996 Paul Mackerras
6  * Copyright (C) 2002 Martin Pool <mbp@samba.org>
7  * Copyright (C) 2004, 2005, 2006 Wayne Davison
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write to the Free Software Foundation, Inc.,
21  * 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
22  */
23
24 #include "rsync.h"
25
26 extern int verbose;
27 extern int dry_run;
28 extern int do_xfers;
29 extern int link_dest;
30 extern int make_backups;
31 extern int protocol_version;
32 extern int remove_source_files;
33 extern int stdout_format_has_i;
34 extern int maybe_ATTRS_REPORT;
35 extern char *basis_dir[];
36 extern struct file_list *the_file_list;
37
38 #ifdef SUPPORT_HARD_LINKS
39
40 alloc_pool_t hlink_pool;
41
42 #define HASH_LOAD_LIMIT(size) ((size)*3/4)
43 #define FPTR(i) (the_file_list->files[i])
44
45 struct ihash_table {
46         int32 size;
47         int32 entries;
48         struct idev_node *buckets;
49 } *dev_tbl;
50
51 static struct idev_node *ihash_node(struct ihash_table *tbl, int64 key);
52
53 /* Starting with protocol 30, we use a simple hashtable on the sending side
54  * for hashing the st_dev and st_ino info.  The receiving side gets told
55  * (via flags and a "group index") which items are hard-linked together, so
56  * we can avoid the pool of dev+inode data. */
57
58 static struct ihash_table *ihash_create(int size)
59 {
60         struct ihash_table *tbl;
61
62         /* Pick a power of 2 that can hold the requested size. */
63         if (size & (size-1)) {
64                 int req = size;
65                 size = 32;
66                 while (size < req)
67                         size *= 2;
68         }
69
70         if (!(tbl = new(struct ihash_table))
71          || !(tbl->buckets = new_array(struct idev_node, size)))
72                 out_of_memory("ihash_create");
73         memset(tbl->buckets, 0, size * sizeof tbl->buckets[0]);
74         tbl->size = size;
75         tbl->entries = 0;
76
77         return tbl;
78 }
79
80 static void ihash_destroy(struct ihash_table *tbl)
81 {
82         free(tbl->buckets);
83         free(tbl);
84 }
85
86 void init_hard_links(void)
87 {
88         if (protocol_version >= 30) {
89                 dev_tbl = ihash_create(16);
90                 return;
91         }
92
93         if (!(hlink_pool = pool_create(HLINK_EXTENT, sizeof (struct idev),
94                                        out_of_memory, POOL_INTERN)))
95                 out_of_memory("init_hard_links");
96 }
97
98 static void expand_ihash(struct ihash_table *tbl)
99 {
100         struct idev_node *old_buckets;
101         int size = tbl->size * 2;
102         int i;
103
104         old_buckets = tbl->buckets;
105         if (!(tbl->buckets = new_array(struct idev_node, size)))
106                 out_of_memory("ihash_create");
107         memset(tbl->buckets, 0, size * sizeof tbl->buckets[0]);
108         tbl->size = size;
109         tbl->entries = 0;
110
111         for (i = size / 2; i-- > 0; ) {
112                 int64 key = old_buckets[i].key;
113                 if (key == 0)
114                         continue;
115                 ihash_node(tbl, key)->data = old_buckets[i].data;
116         }
117
118         free(old_buckets);
119 }
120
121 /* This returns the node for the indicated key, either newly created,
122  * or already existing. */
123 static struct idev_node *ihash_node(struct ihash_table *tbl, int64 key)
124 {
125         uint32 bkt;
126
127         if (tbl->entries > HASH_LOAD_LIMIT(tbl->size))
128                 expand_ihash(tbl);
129
130 #if SIZEOF_INT64 < 8
131         /* Based on Jenkins One-at-a-time hash. */
132         {
133                 uchar *keyp = (uchar*)&key;
134                 int i;
135
136                 for (bkt = 0, i = 0; i < SIZEOF_INT64; i++) {
137                         bkt += keyp[i];
138                         bkt += (bkt << 10);
139                         bkt ^= (bkt >> 6);
140                 }
141                 bkt += (bkt << 3);
142                 bkt ^= (bkt >> 11);
143                 bkt += (bkt << 15);
144         }
145 #else
146 #define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))
147         /* Based on Jenkins hashword() from lookup3.c. */
148         {
149                 uint32 a, b, c;
150
151                 /* Set up the internal state */
152                 a = b = c = 0xdeadbeef + (8 << 2);
153
154                 b += (uint32)(key >> 32);
155                 a += (uint32)key;
156                 c ^= b; c -= rot(b, 14);
157                 a ^= c; a -= rot(c, 11);
158                 b ^= a; b -= rot(a, 25);
159                 c ^= b; c -= rot(b, 16);
160                 a ^= c; a -= rot(c, 4);
161                 b ^= a; b -= rot(a, 14);
162                 c ^= b; c -= rot(b, 24);
163                 bkt = c;
164         }
165 #endif
166
167         /* If it already exists, return it. */
168         while (1) {
169                 bkt &= tbl->size - 1;
170                 if (tbl->buckets[bkt].key == key)
171                         return &tbl->buckets[bkt];
172                 if (tbl->buckets[bkt].key == 0)
173                         break;
174                 bkt++;
175         }
176
177         /* Otherwise, take over this empty spot and then return it. */
178         tbl->buckets[bkt].key = key;
179         tbl->entries++;
180         return &tbl->buckets[bkt];
181 }
182
183 struct idev_node *idev_node(int64 dev, int64 ino)
184 {
185         static struct idev_node *dev_node = NULL;
186         struct ihash_table *tbl;
187
188         if (!dev_node || dev_node->key != dev) {
189                 /* We keep a separate hash table of inodes for every device. */
190                 dev_node = ihash_node(dev_tbl, dev);
191                 if (!(tbl = dev_node->data))
192                         tbl = dev_node->data = ihash_create(512);
193         } else
194                 tbl = dev_node->data;
195
196         return ihash_node(tbl, ino);
197 }
198
199 void idev_destroy(void)
200 {
201         int i;
202
203         for (i = 0; i < dev_tbl->size; i++) {
204                 if (dev_tbl->buckets[i].data)
205                         ihash_destroy(dev_tbl->buckets[i].data);
206         }
207
208         ihash_destroy(dev_tbl);
209 }
210
211 /* This routine compares dev+inode info for protocols < 30. */
212 static int hlink_compare_idev(int *int1, int *int2)
213 {
214         struct file_struct *f1 = FPTR(*int1);
215         struct file_struct *f2 = FPTR(*int2);
216         struct idev *i1 = F_HL_IDEV(f1);
217         struct idev *i2 = F_HL_IDEV(f2);
218
219         if (i1->dev != i2->dev)
220                 return i1->dev > i2->dev ? 1 : -1;
221
222         if (i1->ino != i2->ino)
223                 return i1->ino > i2->ino ? 1 : -1;
224
225         return *int1 > *int2 ? 1 : -1;
226 }
227
228 /* This routine compares group numbers for protocols >= 30. */
229 static int hlink_compare_gnum(int *int1, int *int2)
230 {
231         struct file_struct *f1 = FPTR(*int1);
232         struct file_struct *f2 = FPTR(*int2);
233         int gnum1 = F_HL_GNUM(f1);
234         int gnum2 = F_HL_GNUM(f2);
235
236         if (gnum1 != gnum2)
237                 return gnum1 > gnum2 ? 1 : -1;
238
239         return *int1 > *int2 ? 1 : -1;
240 }
241
242 /* The match routine for protocols < 30. */
243 static void match_idevs(int32 *ndx_list, int ndx_count)
244 {
245         int32 from, prev;
246         struct file_struct *file, *file_next;
247         struct idev *idev, *idev_next;
248
249         qsort(ndx_list, ndx_count, sizeof ndx_list[0],
250              (int (*)()) hlink_compare_idev);
251
252         for (from = 0; from < ndx_count; from++) {
253                 for (file = FPTR(ndx_list[from]), idev = F_HL_IDEV(file), prev = -1;
254                      from < ndx_count-1;
255                      file = file_next, idev = idev_next, prev = ndx_list[from++])
256                 {
257                         file_next = FPTR(ndx_list[from+1]);
258                         idev_next = F_HL_IDEV(file_next);
259                         if (idev->dev != idev_next->dev || idev->ino != idev_next->ino)
260                                 break;
261                         pool_free(hlink_pool, 0, idev);
262                         if (prev < 0)
263                                 file->flags |= FLAG_HLINK_FIRST;
264                         F_HL_PREV(file) = prev;
265                 }
266                 pool_free(hlink_pool, 0, idev);
267                 if (prev < 0)
268                         file->flags &= ~FLAG_HLINKED;
269                 else {
270                         file->flags |= FLAG_HLINK_LAST;
271                         F_HL_PREV(file) = prev;
272                 }
273         }
274 }
275
276 /* The match routine for protocols >= 30. */
277 static void match_gnums(int32 *ndx_list, int ndx_count)
278 {
279         int32 from, prev;
280         struct file_struct *file, *file_next;
281         int gnum, gnum_next;
282
283         qsort(ndx_list, ndx_count, sizeof ndx_list[0],
284              (int (*)()) hlink_compare_gnum);
285
286         for (from = 0; from < ndx_count; from++) {
287                 for (file = FPTR(ndx_list[from]), gnum = F_HL_GNUM(file), prev = -1;
288                      from < ndx_count-1;
289                      file = file_next, gnum = gnum_next, prev = ndx_list[from++])
290                 {
291                         file_next = FPTR(ndx_list[from+1]);
292                         gnum_next = F_HL_GNUM(file_next);
293                         if (gnum != gnum_next)
294                                 break;
295                         if (prev < 0)
296                                 file->flags |= FLAG_HLINK_FIRST;
297                         F_HL_PREV(file) = prev;
298                 }
299                 if (prev < 0)
300                         file->flags &= ~FLAG_HLINKED;
301                 else {
302                         file->flags |= FLAG_HLINK_LAST;
303                         F_HL_PREV(file) = prev;
304                 }
305         }
306 }
307
308 /* Analyze the hard-links in the file-list by creating a list of all the
309  * items that have hlink data, sorting them, and matching up identical
310  * values into clusters.  These will be a single linked list from last
311  * to first when we're done. */
312 void match_hard_links(void)
313 {
314         int i, ndx_count = 0;
315         int32 *ndx_list;
316
317         if (!(ndx_list = new_array(int32, the_file_list->count)))
318                 out_of_memory("match_hard_links");
319
320         for (i = 0; i < the_file_list->count; i++) {
321                 if (F_IS_HLINKED(FPTR(i)))
322                         ndx_list[ndx_count++] = i;
323         }
324
325         if (ndx_count) {
326                 if (protocol_version >= 30)
327                         match_gnums(ndx_list, ndx_count);
328                 else {
329                         match_idevs(ndx_list, ndx_count);
330                         pool_destroy(hlink_pool);
331                 }
332         }
333         free(ndx_list);
334 }
335
336 static int maybe_hard_link(struct file_struct *file, int ndx,
337                            const char *fname, int statret, STRUCT_STAT *stp,
338                            const char *oldname, STRUCT_STAT *old_stp,
339                            const char *realname, int itemizing, enum logcode code)
340 {
341         if (statret == 0) {
342                 if (stp->st_dev == old_stp->st_dev
343                  && stp->st_ino == old_stp->st_ino) {
344                         if (itemizing) {
345                                 itemize(file, ndx, statret, stp,
346                                         ITEM_LOCAL_CHANGE | ITEM_XNAME_FOLLOWS,
347                                         0, "");
348                         }
349                         if (verbose > 1 && maybe_ATTRS_REPORT)
350                                 rprintf(FCLIENT, "%s is uptodate\n", fname);
351                         file->flags |= FLAG_HLINK_DONE;
352                         return 0;
353                 }
354                 if (make_backups) {
355                         if (!make_backup(fname))
356                                 return -1;
357                 } else if (robust_unlink(fname)) {
358                         rsyserr(FERROR, errno, "unlink %s failed",
359                                 full_fname(fname));
360                         return -1;
361                 }
362         }
363
364         if (hard_link_one(file, fname, oldname, 0)) {
365                 if (itemizing) {
366                         itemize(file, ndx, statret, stp,
367                                 ITEM_LOCAL_CHANGE | ITEM_XNAME_FOLLOWS, 0,
368                                 realname);
369                 }
370                 if (code != FNONE && verbose)
371                         rprintf(code, "%s => %s\n", fname, realname);
372                 return 0;
373         }
374         return -1;
375 }
376
377 /* Only called if FLAG_HLINKED is set and FLAG_HLINK_FIRST is not.  Returns:
378  * 0 = process the file, 1 = skip the file, -1 = error occurred. */
379 int hard_link_check(struct file_struct *file, int ndx, const char *fname,
380                     int statret, STRUCT_STAT *stp, int itemizing,
381                     enum logcode code)
382 {
383         STRUCT_STAT prev_st;
384         char prev_name[MAXPATHLEN], altbuf[MAXPATHLEN], *realname;
385         int alt_dest, prev_ndx = F_HL_PREV(file);
386         struct file_struct *prev_file = FPTR(prev_ndx);
387
388         /* Is the previous link is not complete yet? */
389         if (!(prev_file->flags & FLAG_HLINK_DONE)) {
390                 /* Is the previous link being transferred? */
391                 if (prev_file->flags & FLAG_SENT) {
392                         /* Add ourselves to the list of files that will be
393                          * updated when the transfer completes, and mark
394                          * ourself as waiting for the transfer. */
395                         F_HL_PREV(file) = F_HL_PREV(prev_file);
396                         F_HL_PREV(prev_file) = ndx;
397                         file->flags |= FLAG_SENT;
398                         return 1;
399                 }
400                 return 0;
401         }
402
403         /* There is a finished file to link with! */
404         if (!(prev_file->flags & FLAG_HLINK_FIRST)) {
405                 /* The previous previous will be marked with FIRST. */
406                 prev_ndx = F_HL_PREV(prev_file);
407                 prev_file = FPTR(prev_ndx);
408                 /* Update our previous pointer to point to the first. */
409                 F_HL_PREV(file) = prev_ndx;
410         }
411         alt_dest = F_HL_PREV(prev_file); /* alternate value when DONE && FIRST */
412         if (alt_dest >= 0 && dry_run) {
413                 pathjoin(prev_name, MAXPATHLEN, basis_dir[alt_dest],
414                          f_name(prev_file, NULL));
415                 f_name(prev_file, altbuf);
416                 realname = altbuf;
417         } else {
418                 f_name(prev_file, prev_name);
419                 realname = prev_name;
420         }
421
422         if (link_stat(prev_name, &prev_st, 0) < 0) {
423                 rsyserr(FERROR, errno, "stat %s failed",
424                         full_fname(prev_name));
425                 return -1;
426         }
427
428         if (statret < 0 && basis_dir[0] != NULL) {
429                 /* If we match an alt-dest item, we don't output this as a change. */
430                 char cmpbuf[MAXPATHLEN];
431                 STRUCT_STAT alt_st;
432                 int j = 0;
433                 do {
434                         pathjoin(cmpbuf, MAXPATHLEN, basis_dir[j], fname);
435                         if (link_stat(cmpbuf, &alt_st, 0) < 0)
436                                 continue;
437                         if (link_dest) {
438                                 if (prev_st.st_dev != alt_st.st_dev
439                                  || prev_st.st_ino != alt_st.st_ino)
440                                         continue;
441                                 statret = 1;
442                                 *stp = alt_st;
443                                 if (verbose < 2 || !stdout_format_has_i) {
444                                         itemizing = 0;
445                                         code = FNONE;
446                                         if (verbose > 1 && maybe_ATTRS_REPORT)
447                                                 rprintf(FCLIENT, "%s is uptodate\n", fname);
448                                 }
449                                 break;
450                         }
451                         if (!unchanged_file(cmpbuf, file, &alt_st))
452                                 continue;
453                         statret = 1;
454                         *stp = alt_st;
455                         if (unchanged_attrs(file, &alt_st))
456                                 break;
457                 } while (basis_dir[++j] != NULL);
458         }
459
460         if (maybe_hard_link(file, ndx, fname, statret, stp, prev_name, &prev_st,
461                             realname, itemizing, code) < 0)
462                 return -1;
463
464         if (remove_source_files == 1 && do_xfers)
465                 send_msg_int(MSG_SUCCESS, ndx);
466
467         return 1;
468 }
469
470 int hard_link_one(struct file_struct *file, const char *fname,
471                   const char *oldname, int terse)
472 {
473         if (do_link(oldname, fname) < 0) {
474                 enum logcode code;
475                 if (terse) {
476                         if (!verbose)
477                                 return -1;
478                         code = FINFO;
479                 } else
480                         code = FERROR;
481                 rsyserr(code, errno, "link %s => %s failed",
482                         full_fname(fname), oldname);
483                 return 0;
484         }
485
486         file->flags |= FLAG_HLINK_DONE;
487
488         return 1;
489 }
490
491 void finish_hard_link(struct file_struct *file, const char *fname,
492                       STRUCT_STAT *stp, int itemizing, enum logcode code,
493                       int alt_dest)
494 {
495         STRUCT_STAT st, prev_st;
496         char alt_name[MAXPATHLEN], *prev_name;
497         const char *our_name;
498         int prev_statret, ndx, prev_ndx = F_HL_PREV(file);
499
500         if (stp == NULL && prev_ndx >= 0) {
501                 if (link_stat(fname, &st, 0) < 0) {
502                         rsyserr(FERROR, errno, "stat %s failed",
503                                 full_fname(fname));
504                         return;
505                 }
506                 stp = &st;
507         }
508
509         /* FIRST combined with DONE means we were the first to get done. */
510         file->flags |= FLAG_HLINK_FIRST | FLAG_HLINK_DONE;
511         F_HL_PREV(file) = alt_dest;
512         if (alt_dest >= 0 && dry_run) {
513                 pathjoin(alt_name, MAXPATHLEN, basis_dir[alt_dest],
514                          f_name(file, NULL));
515                 our_name = alt_name;
516         } else
517                 our_name = fname;
518
519         while ((ndx = prev_ndx) >= 0) {
520                 file = FPTR(ndx);
521                 file->flags = (file->flags & ~FLAG_HLINK_FIRST) | FLAG_HLINK_DONE;
522                 prev_ndx = F_HL_PREV(file);
523                 prev_name = f_name(file, NULL);
524                 prev_statret = link_stat(prev_name, &prev_st, 0);
525                 if (maybe_hard_link(file, ndx, prev_name, prev_statret, &prev_st,
526                                     our_name, stp, fname, itemizing, code) < 0)
527                         continue;
528                 if (remove_source_files == 1 && do_xfers)
529                         send_msg_int(MSG_SUCCESS, ndx);
530         }
531 }
532 #endif