From c627d61324e9dcd5df833ee6236dd10415f5bac4 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Sat, 22 Jun 1996 05:04:20 +0000 Subject: [PATCH] Initial revision --- .cvsignore | 50 ++++ .ignore | 19 ++ COPYING | 339 ++++++++++++++++++++++ Makefile.in | 49 ++++ README | 82 ++++++ byteorder.h | 54 ++++ checksum.c | 78 +++++ configure.in | 48 ++++ exclude.c | 202 +++++++++++++ flist.c | 524 +++++++++++++++++++++++++++++++++ install-sh | 238 +++++++++++++++ lib/.cvsignore | 47 +++ lib/fnmatch.c | 204 +++++++++++++ lib/fnmatch.h | 69 +++++ lib/getopt.c | 751 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/getopt.h | 129 +++++++++ main.c | 718 +++++++++++++++++++++++++++++++++++++++++++++ match.c | 246 ++++++++++++++++ md4.c | 263 +++++++++++++++++ md4.h | 49 ++++ mkproto.awk | 41 +++ rsync.c | 733 ++++++++++++++++++++++++++++++++++++++++++++++ rsync.h | 228 +++++++++++++++ tech_report.tex | 310 ++++++++++++++++++++ util.c | 229 +++++++++++++++ version.h | 1 + 26 files changed, 5701 insertions(+) create mode 100644 .cvsignore create mode 100644 .ignore create mode 100644 COPYING create mode 100644 Makefile.in create mode 100644 README create mode 100644 byteorder.h create mode 100644 checksum.c create mode 100644 configure.in create mode 100644 exclude.c create mode 100644 flist.c create mode 100755 install-sh create mode 100644 lib/.cvsignore create mode 100644 lib/fnmatch.c create mode 100644 lib/fnmatch.h create mode 100644 lib/getopt.c create mode 100644 lib/getopt.h create mode 100644 main.c create mode 100644 match.c create mode 100644 md4.c create mode 100644 md4.h create mode 100644 mkproto.awk create mode 100644 rsync.c create mode 100644 rsync.h create mode 100644 tech_report.tex create mode 100644 util.c create mode 100644 version.h diff --git a/.cvsignore b/.cvsignore new file mode 100644 index 00000000..5dfdd137 --- /dev/null +++ b/.cvsignore @@ -0,0 +1,50 @@ +Makefile +a +b +config.cache +config.h +config.log +config.status +dist.tar.gz +rsync +rsync-0.1 +rsync-0.1 +rsync-0.1 +rsync-0.1.tar.gz +rsync-0.2 +rsync-0.2 +rsync-0.2.tar.gz +rsync-0.3 +rsync-0.3 +rsync-0.3.tar.gz +rsync-0.4 +rsync-0.4 +rsync-0.4.tar.gz +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5.tar.gz +rsync-0.6 +rsync-0.7 +rsync-0.7 +rsync-0.8 +rsync-0.8 +rsync-0.8 +rsync-0.8 +rsync-0.8.tar.gz +rsync-0.9 +rsync-0.9.tar.gz +rsync-ERSION +rsync.aux +rsync.dvi +rsync.log +tech_report.aux +tech_report.dvi +tech_report.log +tech_report.ps +test diff --git a/.ignore b/.ignore new file mode 100644 index 00000000..dd4c59c1 --- /dev/null +++ b/.ignore @@ -0,0 +1,19 @@ +.#* +*.log +Makefile +config.h +*.o +CVS +.ignore +.cvsignore +*~ +rsync +config.status +config.cache +TAGS +config.log +test +*.gz +rsync-* +*.dvi +*.aux diff --git a/COPYING b/COPYING new file mode 100644 index 00000000..a43ea212 --- /dev/null +++ b/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 00000000..9402dc8c --- /dev/null +++ b/Makefile.in @@ -0,0 +1,49 @@ +# Makefile for rsync. This is processed by configure to produce the final +# Makefile + +INSTALL_BIN=@prefix@/bin +INSTALL_MAN=@prefix@/man + +CCOPTFLAGS = -O + +LIBS=@LIBS@ +CC=@CC@ $(CCOPTFLAGS) + +INSTALLCMD=@INSTALL@ + + +SRC=@srcdir@ +SHELL=/bin/sh + + +.SUFFIXES: +.SUFFIXES: .c .o + +LIBOBJ=lib/getopt.o lib/fnmatch.o +OBJS=rsync.o exclude.o util.o md4.o main.o checksum.o match.o flist.o $(LIBOBJ) + +.c.o: + $(CC) $(CFLAGS) -c $*.c -o $*.o + +all: rsync + +install: all + ${INSTALLCMD} -m 755 rsync ${INSTALL_BIN} + ${INSTALLCMD} -m 644 rsync.1 ${INSTALL_MAN}/man1 + +rsync: $(OBJS) + $(CC) $(CFLAGS) -o rsync $(OBJS) $(LIBS) + +proto: + cat *.c | awk -f mkproto.awk > proto.h + +clean: + rm -f *~ $(OBJS) rsync config.cache config.log config.status + +dist: + tar --exclude-from .ignore -czf dist.tar.gz . + -mkdir rsync-$(VERSION) + (cd rsync-$(VERSION) ; tar xzf ../dist.tar.gz) + tar -czf rsync-$(VERSION).tar.gz rsync-$(VERSION) + rm -f dist.tar.gz + echo rsync-$(VERSION) >> .cvsignore diff --git a/README b/README new file mode 100644 index 00000000..1440c343 --- /dev/null +++ b/README @@ -0,0 +1,82 @@ +WHAT IS RSYNC? +-------------- + +rsync is a replacement for rcp that has many more features. + +rsyns uses the "rsync algorithm" which provides a very fast method for +bringing remote files into sync. It does this by sending just the +differences in the files across the link, without requiring that both +sets of files are present at one of the ends of the link beforehand. +At first glance this may seem impossible because the calculation of +diffs between two files normally requires local access to both +files. + +A technical report describing the rsync algorithm is included with +this package. + + +USAGE +----- + +Basically you use rsync just like rcp, but rsync has many additional options. + +Here is a brief description of available options: + +-v, --verbose increase verbosity +-c, --checksum always checksum +-a, --archive archive mode (same as -rlptDog) +-r, --recursive recurse into directories +-b, --backup make backups (default ~ extension) +-u, --update update only (don't overwrite newer files) +-l, --links preserve soft links +-p, --perms preserve permissions +-o, --owner preserve owner (root only) +-g, --group preserve group +-D, --devices preserve devices (root only) +-t, --times preserve times +-n, --dry-run show what would have been transferred +-x, --one-file-system don't cross filesystem boundaries +-B, --block-size SIZE checksum blocking size +-e, --rsh COMMAND specify rsh replacement + --rsync-path PATH specify path to rsync on the remote machine +-C, --cvs-exclude auto ignore files in the same way CVS does + --delete delete files that don't exist on the sending side +-I, --ignore-times don't exclude files that match length and time + --exclude FILE exclude file FILE + --exclude-from FILE exclude files listed in FILE + --suffix SUFFIX override backup suffix + --version print version number + + + +SETUP +----- + +Rsync uses rsh or ssh for communication. It does not need to be setuid +and requires no special privilages for installation. It does not +require a inetd entry or a daemon. You must, however, have a working +rsh or ssh system. Using ssh is recommended for its security and +compression features. + +To install rsync, first run the "configure" script. This will create a +Makefile and config.h appropriate for your system. Then type +"make". + +Once built put a copy of rsync in your search path on the local and +remote systems (or use "make install"). That's it! + + +COPYRIGHT +--------- + +Rsync was written by Andrew Tridgell and Paul Mackerras, and is +available under the GPL. + +Andrew.Tridgell@anu.edu.au +paulus@cs.anu.edu.au + + +AVAILABILITY +------------ + +The main ftp site for rsync is ftp://samba.anu.edu.au/pub/rsync diff --git a/byteorder.h b/byteorder.h new file mode 100644 index 00000000..5e0a171b --- /dev/null +++ b/byteorder.h @@ -0,0 +1,54 @@ +/* + simple byteorder handling + Copyright (C) Andrew Tridgell 1992-1995 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#undef CAREFUL_ALIGNMENT + +/* we know that the x86 can handle misalignment and has the "right" + byteorder */ +#ifdef __i386__ +#define CAREFUL_ALIGNMENT 0 +#endif + +#ifndef CAREFUL_ALIGNMENT +#define CAREFUL_ALIGNMENT 1 +#endif + +#define CVAL(buf,pos) (((unsigned char *)(buf))[pos]) +#define PVAL(buf,pos) ((unsigned)CVAL(buf,pos)) +#define SCVAL(buf,pos,val) (CVAL(buf,pos) = (val)) + + +#if CAREFUL_ALIGNMENT +#define SVAL(buf,pos) (PVAL(buf,pos)|PVAL(buf,(pos)+1)<<8) +#define IVAL(buf,pos) (SVAL(buf,pos)|SVAL(buf,(pos)+2)<<16) +#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8) +#define SIVALX(buf,pos,val) (SSVALX(buf,pos,val&0xFFFF),SSVALX(buf,pos+2,val>>16)) +#define SIVAL(buf,pos,val) SIVALX((buf),(pos),((uint32)(val))) +#else +/* this handles things for architectures like the 386 that can handle + alignment errors */ +/* + WARNING: This section is dependent on the length of int32 + being correct. set CAREFUL_ALIGNMENT if it is not. +*/ +#define IVAL(buf,pos) (*(uint32 *)((char *)(buf) + (pos))) +#define SIVAL(buf,pos,val) IVAL(buf,pos)=((uint32)(val)) +#endif + + diff --git a/checksum.c b/checksum.c new file mode 100644 index 00000000..cdd554fd --- /dev/null +++ b/checksum.c @@ -0,0 +1,78 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "rsync.h" + + +/* + a simple 32 bit checksum that can be upadted from either end + (inspired by Mark Adler's Adler-32 checksum) + */ +uint32 get_checksum1(char *buf,int len) +{ + int i; + uint32 s1, s2; + + s1 = s2 = 0; + for (i = 0; i < len; i++) { + s1 += buf[i]; + s2 += s1; + } + return (s1 & 0xffff) + (s2 << 16); +} + + +void get_checksum2(char *buf,int len,char *sum) +{ + char buf2[64]; + int i; + MDstruct MD; + + MDbegin(&MD); + for(i = 0; i + 64 <= len; i += 64) { + bcopy(buf+i,buf2,64); + MDupdate(&MD, buf2, 512); + } + bcopy(buf+i,buf2,len-i); + MDupdate(&MD, buf2, (len-i)*8); + SIVAL(sum,0,MD.buffer[0]); + SIVAL(sum,4,MD.buffer[1]); + SIVAL(sum,8,MD.buffer[2]); + SIVAL(sum,12,MD.buffer[3]); +} + +void file_checksum(char *fname,char *sum,off_t size) +{ + char *buf; + int fd; + bzero(sum,SUM_LENGTH); + + fd = open(fname,O_RDONLY); + if (fd == -1) return; + + buf = map_file(fd,size); + if (!buf) { + close(fd); + return; + } + + get_checksum2(buf,size,sum); + close(fd); + unmap_file(buf,size); +} diff --git a/configure.in b/configure.in new file mode 100644 index 00000000..9a04b264 --- /dev/null +++ b/configure.in @@ -0,0 +1,48 @@ +dnl Process this file with autoconf to produce a configure script. +AC_INIT(byteorder.h) +AC_CONFIG_HEADER(config.h) + +dnl Checks for programs. +AC_PROG_CC +AC_PROG_INSTALL +AC_SUBST(SHELL) + +AC_HEADER_DIRENT +AC_HEADER_STDC +AC_HEADER_TIME +AC_HEADER_SYS_WAIT +AC_CHECK_HEADERS(sys/fcntl.h fcntl.h sys/time.h unistd.h utime.h grp.h) +AC_CHECK_HEADERS(compat.h sys/param.h ctype.h sys/wait.h) + +AC_CHECK_SIZEOF(int) +AC_CHECK_SIZEOF(long) +AC_CHECK_SIZEOF(short) + +AC_C_INLINE + +AC_TYPE_SIGNAL +AC_TYPE_UID_T +AC_TYPE_MODE_T +AC_TYPE_OFF_T +AC_TYPE_SIZE_T +AC_TYPE_PID_T +AC_STRUCT_ST_RDEV + +echo -n "checking for errno in errno.h... " +AC_TRY_COMPILE([#include ],[int i = errno], +echo yes; AC_DEFINE(HAVE_ERRNO_DECL), +echo no) + +AC_FUNC_MEMCMP +AC_FUNC_MMAP +AC_FUNC_UTIME_NULL +AC_CHECK_FUNCS(waitpid strtok pipe getcwd mkdir strdup strerror chown chmod mknod) +AC_CHECK_FUNCS(fchmod fstat strchr bcopy bzero readlink utime utimes getopt_long) + +echo -n "checking for working fnmatch... " +AC_TRY_RUN([#include +main() { exit(fnmatch("*.o", "x.o", 0) == 0? 0: 1); }], +echo yes;AC_DEFINE(HAVE_FNMATCH), +echo no) + +AC_OUTPUT(Makefile) diff --git a/exclude.c b/exclude.c new file mode 100644 index 00000000..b5b230b9 --- /dev/null +++ b/exclude.c @@ -0,0 +1,202 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +/* + a lot of this stuff was derived from GNU tar + */ + +#include "rsync.h" + +extern int verbose; + +static char **exclude_list = NULL; + +static int is_regex(char *str) +{ + return strchr(str, '*') || strchr(str, '[') || strchr(str, '?'); +} + + +static int check_one_exclude(char *name,char *pattern) +{ + char *str; + char *p; + + if (!strchr(pattern,'/') && (p=strrchr(name,'/'))) + name = p+1; + + if (!name[0]) return 0; + + if (is_regex(pattern)) { + if (fnmatch(pattern, name, 0) == 0) + return 1; + } else { + int l1 = strlen(name); + int l2 = strlen(pattern); + if (l2 <= l1 && + strcmp(name+(l1-l2),pattern) == 0 && + (l1==l2 || name[l1-(l2+1)] == '/')) + return 1; + } + + return 0; +} + + +int check_exclude(char *name,char **local_exclude_list) +{ + int n; + + if (exclude_list) { + for (n=0; exclude_list[n]; n++) + if (check_one_exclude(name,exclude_list[n])) + return 1; + } + + if (local_exclude_list) { + for (n=0; local_exclude_list[n]; n++) + if (check_one_exclude(name,local_exclude_list[n])) + return 1; + } + + return 0; +} + + +void add_exclude_list(char *pattern,char ***list) +{ + int len=0; + if (list && *list) + for (; (*list)[len]; len++) ; + + if (strcmp(pattern,"!") == 0) { + if (verbose > 2) + fprintf(stderr,"clearing exclude list\n"); + while ((len)--) + free((*list)[len]); + free((*list)); + *list = NULL; + return; + } + + if (!*list) { + *list = (char **)malloc(sizeof(char *)*2); + } else { + *list = (char **)realloc(*list,sizeof(char *)*(len+2)); + } + + if (!*list || !((*list)[len] = strdup(pattern))) + out_of_memory("add_exclude"); + + if (verbose > 2) + fprintf(stderr,"add_exclude(%s)\n",pattern); + + (*list)[len+1] = NULL; +} + +void add_exclude(char *pattern) +{ + add_exclude_list(pattern,&exclude_list); +} + +char **make_exclude_list(char *fname,char **list1,int fatal) +{ + char **list=list1; + FILE *f = fopen(fname,"r"); + char line[MAXPATHLEN]; + if (!f) { + if (fatal) { + fprintf(stderr,"%s : %s\n",fname,strerror(errno)); + exit(1); + } + return list; + } + + while (fgets(line,MAXPATHLEN,f)) { + int l = strlen(line); + if (l && line[l-1] == '\n') l--; + line[l] = 0; + if (line[0]) add_exclude_list(line,&list); + } + fclose(f); + return list; +} + + +void add_exclude_file(char *fname,int fatal) +{ + exclude_list = make_exclude_list(fname,exclude_list,fatal); +} + + +void send_exclude_list(int f) +{ + int i; + if (exclude_list) + for (i=0;exclude_list[i];i++) { + int l = strlen(exclude_list[i]); + if (l == 0) continue; + write_int(f,l); + write_buf(f,exclude_list[i],l); + } + write_int(f,0); +} + + +void recv_exclude_list(int f) +{ + char line[MAXPATHLEN]; + int l; + while ((l=read_int(f))) { + read_buf(f,line,l); + line[l] = 0; + add_exclude(line); + } +} + + +static char *cvs_ignore_list[] = { + "RCS","SCCS","CVS","CVS.adm","RCSLOG","cvslog.*", + "tags","TAGS",".make.state",".nse_depinfo", + "*~", "#*", ".#*", ",*", "*.old", "*.bak", "*.BAK", "*.orig", + "*.rej", ".del-*", "*.a", "*.o", "*.obj", "*.so", "*.Z", "*.elc", "*.ln", + "core",NULL}; + + + +void add_cvs_excludes(void) +{ + char fname[MAXPATHLEN]; + char *p; + int i; + + for (i=0; cvs_ignore_list[i]; i++) + add_exclude(cvs_ignore_list[i]); + + if ((p=getenv("HOME"))) { + sprintf(fname,"%s/.cvsignore",p); + add_exclude_file(fname,0); + } + + if ((p=getenv("CVSIGNORE"))) { + char *tok; + for (tok=strtok(p," "); tok; tok=strtok(NULL," ")) + add_exclude(tok); + } +} diff --git a/flist.c b/flist.c new file mode 100644 index 00000000..8bf964d2 --- /dev/null +++ b/flist.c @@ -0,0 +1,524 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +/* generate and receive file lists */ + +#include "rsync.h" + +extern int verbose; +extern int am_server; +extern int always_checksum; +extern off_t total_size; + +extern int cvs_exclude; + +extern int one_file_system; +extern int make_backups; +extern int preserve_links; +extern int preserve_perms; +extern int preserve_devices; +extern int preserve_uid; +extern int preserve_gid; +extern int preserve_times; + +static char **local_exclude_list = NULL; + +static void clean_fname(char *name); + + +/* + This function is used to check if a file should be included/excluded + from the list of files based on its name and type etc + */ +static int match_file_name(char *fname,struct stat *st) +{ + if (check_exclude(fname,local_exclude_list)) { + if (verbose > 2) + fprintf(stderr,"excluding file %s\n",fname); + return 0; + } + return 1; +} + +/* used by the one_file_system code */ +static dev_t filesystem_dev; + +static void set_filesystem(char *fname) +{ + struct stat st; + if (lstat(fname,&st) != 0) return; + filesystem_dev = st.st_dev; +} + + +static void send_directory(int f,struct file_list *flist,char *dir); + +static char *flist_dir = NULL; + +static void send_file_entry(struct file_struct *file,int f) +{ + if (f == -1) return; + + write_int(f,strlen(file->name)); + write_buf(f,file->name,strlen(file->name)); + write_int(f,(int)file->modtime); + write_int(f,(int)file->length); + write_int(f,(int)file->mode); + if (preserve_uid) + write_int(f,(int)file->uid); + if (preserve_gid) + write_int(f,(int)file->gid); + if (preserve_devices) { + if (verbose > 2) + fprintf(stderr,"dev=0x%x\n",(int)file->dev); + write_int(f,(int)file->dev); + } + +#if SUPPORT_LINKS + if (preserve_links && S_ISLNK(file->mode)) { + write_int(f,strlen(file->link)); + write_buf(f,file->link,strlen(file->link)); + } +#endif + + if (always_checksum) { + write_buf(f,file->sum,SUM_LENGTH); + } +} + + +static struct file_struct *make_file(int recurse,char *fname) +{ + static struct file_struct file; + struct stat st; + char sum[SUM_LENGTH]; + + bzero(sum,SUM_LENGTH); + + if (lstat(fname,&st) != 0) { + fprintf(stderr,"%s: %s\n", + fname,strerror(errno)); + return NULL; + } + + if (S_ISDIR(st.st_mode) && !recurse) { + fprintf(stderr,"skipping directory %s\n",fname); + return NULL; + } + + if (one_file_system && st.st_dev != filesystem_dev) + return NULL; + + if (!match_file_name(fname,&st)) + return NULL; + + if (verbose > 2) + fprintf(stderr,"make_file(%s)\n",fname); + + file.name = strdup(fname); + file.modtime = st.st_mtime; + file.length = st.st_size; + file.mode = st.st_mode; + file.uid = st.st_uid; + file.gid = st.st_gid; +#ifdef HAVE_ST_RDEV + file.dev = st.st_rdev; +#endif + +#if SUPPORT_LINKS + if (S_ISLNK(st.st_mode)) { + int l; + char lnk[MAXPATHLEN]; + if ((l=readlink(fname,lnk,MAXPATHLEN-1)) == -1) { + fprintf(stderr,"readlink %s : %s\n",fname,strerror(errno)); + return NULL; + } + lnk[l] = 0; + file.link = strdup(lnk); + } +#endif + + if (always_checksum && S_ISREG(st.st_mode)) { + file_checksum(fname,file.sum,st.st_size); + } + + if (flist_dir) + file.dir = strdup(flist_dir); + else + file.dir = NULL; + + if (!S_ISDIR(st.st_mode)) + total_size += st.st_size; + + return &file; +} + + + +static void send_file_name(int f,struct file_list *flist, + int recurse,char *fname) +{ + struct file_struct *file; + + file = make_file(recurse,fname); + + if (!file) return; + + if (flist->count >= flist->malloced) { + flist->malloced += 100; + flist->files = (struct file_struct *)realloc(flist->files, + sizeof(flist->files[0])* + flist->malloced); + if (!flist->files) + out_of_memory("send_file_name"); + } + + if (strcmp(file->name,".") && strcmp(file->name,"/")) { + flist->files[flist->count++] = *file; + send_file_entry(file,f); + } + + if (S_ISDIR(file->mode) && recurse) { + char **last_exclude_list = local_exclude_list; + send_directory(f,flist,file->name); + local_exclude_list = last_exclude_list; + return; + } +} + + + +static void send_directory(int f,struct file_list *flist,char *dir) +{ + DIR *d; + struct dirent *di; + char fname[MAXPATHLEN]; + int l; + char *p; + + d = opendir(dir); + if (!d) { + fprintf(stderr,"%s: %s\n", + dir,strerror(errno)); + return; + } + + strcpy(fname,dir); + l = strlen(fname); + if (fname[l-1] != '/') + strcat(fname,"/"); + p = fname + strlen(fname); + + if (cvs_exclude) { + strcpy(p,".cvsignore"); + local_exclude_list = make_exclude_list(fname,NULL,0); + } + + for (di=readdir(d); di; di=readdir(d)) { + if (strcmp(di->d_name,".")==0 || + strcmp(di->d_name,"..")==0) + continue; + strcpy(p,di->d_name); + send_file_name(f,flist,1,fname); + } + + closedir(d); +} + + + +struct file_list *send_file_list(int f,int recurse,int argc,char *argv[]) +{ + int i,l; + struct stat st; + char *p,*dir; + char dbuf[MAXPATHLEN]; + struct file_list *flist; + + if (verbose && recurse) { + fprintf(am_server?stderr:stdout,"building file list ... "); + fflush(am_server?stderr:stdout); + } + + flist = (struct file_list *)malloc(sizeof(flist[0])); + if (!flist) out_of_memory("send_file_list"); + + flist->count=0; + flist->malloced = 100; + flist->files = (struct file_struct *)malloc(sizeof(flist->files[0])* + flist->malloced); + if (!flist->files) out_of_memory("send_file_list"); + + for (i=0;i 2) + fprintf(stderr,"recv_file_list starting\n"); + + flist = (struct file_list *)malloc(sizeof(flist[0])); + if (!flist) + goto oom; + + flist->count=0; + flist->malloced=100; + flist->files = (struct file_struct *)malloc(sizeof(flist->files[0])* + flist->malloced); + if (!flist->files) + goto oom; + + + for (l=read_int(f); l; l=read_int(f)) { + int i = flist->count; + + if (i >= flist->malloced) { + flist->malloced += 100; + flist->files =(struct file_struct *)realloc(flist->files, + sizeof(flist->files[0])* + flist->malloced); + if (!flist->files) + goto oom; + } + + flist->files[i].name = (char *)malloc(l+1); + if (!flist->files[i].name) + goto oom; + + read_buf(f,flist->files[i].name,l); + flist->files[i].name[l] = 0; + flist->files[i].modtime = (time_t)read_int(f); + flist->files[i].length = (off_t)read_int(f); + flist->files[i].mode = (mode_t)read_int(f); + if (preserve_uid) + flist->files[i].uid = (uid_t)read_int(f); + if (preserve_gid) + flist->files[i].gid = (gid_t)read_int(f); + if (preserve_devices) + flist->files[i].dev = (dev_t)read_int(f); + +#if SUPPORT_LINKS + if (preserve_links && S_ISLNK(flist->files[i].mode)) { + int l = read_int(f); + flist->files[i].link = (char *)malloc(l+1); + read_buf(f,flist->files[i].link,l); + flist->files[i].link[l] = 0; + } +#endif + + if (always_checksum) + read_buf(f,flist->files[i].sum,SUM_LENGTH); + + if (S_ISREG(flist->files[i].mode)) + total_size += flist->files[i].length; + + flist->count++; + + if (verbose > 2) + fprintf(stderr,"recv_file_name(%s)\n",flist->files[i].name); + } + + + if (verbose > 2) + fprintf(stderr,"received %d names\n",flist->count); + + clean_flist(flist); + + return flist; + +oom: + out_of_memory("recv_file_list"); + return NULL; /* not reached */ +} + + +static int flist_compare(struct file_struct *f1,struct file_struct *f2) +{ + if (!f1->name && !f2->name) return 0; + if (!f1->name) return -1; + if (!f2->name) return 1; + return strcmp(f1->name,f2->name); +} + + +int flist_find(struct file_list *flist,struct file_struct *f) +{ + int low=0,high=flist->count; + + while (low != high) { + int mid = (low+high)/2; + int ret = flist_compare(&flist->files[mid],f); + if (ret == 0) return mid; + if (ret > 0) + high=mid; + else + low=mid+1; + } + if (flist_compare(&flist->files[low],f) == 0) + return low; + return -1; +} + + +static void clean_fname(char *name) +{ + char *p; + int l; + int modified = 1; + + if (!name) return; + + while (modified) { + modified = 0; + + if ((p=strstr(name,"/./"))) { + modified = 1; + while (*p) { + p[0] = p[2]; + p++; + } + } + + if ((p=strstr(name,"//"))) { + modified = 1; + while (*p) { + p[0] = p[1]; + p++; + } + } + + if (strncmp(p=name,"./",2) == 0) { + modified = 1; + while (*p) { + p[0] = p[2]; + p++; + } + } + + l = strlen(p=name); + if (l > 1 && p[l-1] == '/') { + modified = 1; + p[l-1] = 0; + } + } +} + + +/* + * This routine ensures we don't have any duplicate names in our file list. + * duplicate names can cause corruption because of the pipelining + */ +void clean_flist(struct file_list *flist) +{ + int i; + + if (!flist || flist->count == 0) + return; + + for (i=0;icount;i++) { + clean_fname(flist->files[i].name); + } + + qsort(flist->files,flist->count, + sizeof(flist->files[0]), + (int (*)())flist_compare); + + for (i=1;icount;i++) { + if (flist->files[i].name && + strcmp(flist->files[i].name,flist->files[i-1].name) == 0) { + if (verbose > 1 && !am_server) + fprintf(stderr,"removing duplicate name %s from file list\n", + flist->files[i].name); + free(flist->files[i-1].name); + flist->files[i-1].name = NULL; + } + } +} + diff --git a/install-sh b/install-sh new file mode 100755 index 00000000..58719246 --- /dev/null +++ b/install-sh @@ -0,0 +1,238 @@ +#! /bin/sh +# +# install - install a program, script, or datafile +# This comes from X11R5. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# `make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. +# + + +# set DOITPROG to echo to test this script + +# Don't use :- since 4.3BSD and earlier shells don't like it. +doit="${DOITPROG-}" + + +# put in absolute paths if you don't have them in your path; or use env. vars. + +mvprog="${MVPROG-mv}" +cpprog="${CPPROG-cp}" +chmodprog="${CHMODPROG-chmod}" +chownprog="${CHOWNPROG-chown}" +chgrpprog="${CHGRPPROG-chgrp}" +stripprog="${STRIPPROG-strip}" +rmprog="${RMPROG-rm}" +mkdirprog="${MKDIRPROG-mkdir}" + +transformbasename="" +transform_arg="" +instcmd="$mvprog" +chmodcmd="$chmodprog 0755" +chowncmd="" +chgrpcmd="" +stripcmd="" +rmcmd="$rmprog -f" +mvcmd="$mvprog" +src="" +dst="" +dir_arg="" + +while [ x"$1" != x ]; do + case $1 in + -c) instcmd="$cpprog" + shift + continue;; + + -d) dir_arg=true + shift + continue;; + + -m) chmodcmd="$chmodprog $2" + shift + shift + continue;; + + -o) chowncmd="$chownprog $2" + shift + shift + continue;; + + -g) chgrpcmd="$chgrpprog $2" + shift + shift + continue;; + + -s) stripcmd="$stripprog" + shift + continue;; + + -t=*) transformarg=`echo $1 | sed 's/-t=//'` + shift + continue;; + + -b=*) transformbasename=`echo $1 | sed 's/-b=//'` + shift + continue;; + + *) if [ x"$src" = x ] + then + src=$1 + else + # this colon is to work around a 386BSD /bin/sh bug + : + dst=$1 + fi + shift + continue;; + esac +done + +if [ x"$src" = x ] +then + echo "install: no input file specified" + exit 1 +else + true +fi + +if [ x"$dir_arg" != x ]; then + dst=$src + src="" + + if [ -d $dst ]; then + instcmd=: + else + instcmd=mkdir + fi +else + +# Waiting for this to be detected by the "$instcmd $src $dsttmp" command +# might cause directories to be created, which would be especially bad +# if $src (and thus $dsttmp) contains '*'. + + if [ -f $src -o -d $src ] + then + true + else + echo "install: $src does not exist" + exit 1 + fi + + if [ x"$dst" = x ] + then + echo "install: no destination specified" + exit 1 + else + true + fi + +# If destination is a directory, append the input filename; if your system +# does not like double slashes in filenames, you may need to add some logic + + if [ -d $dst ] + then + dst="$dst"/`basename $src` + else + true + fi +fi + +## this sed command emulates the dirname command +dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + +# Make sure that the destination directory exists. +# this part is taken from Noah Friedman's mkinstalldirs script + +# Skip lots of stat calls in the usual case. +if [ ! -d "$dstdir" ]; then +defaultIFS=' +' +IFS="${IFS-${defaultIFS}}" + +oIFS="${IFS}" +# Some sh's can't handle IFS=/ for some reason. +IFS='%' +set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` +IFS="${oIFS}" + +pathcomp='' + +while [ $# -ne 0 ] ; do + pathcomp="${pathcomp}${1}" + shift + + if [ ! -d "${pathcomp}" ] ; + then + $mkdirprog "${pathcomp}" + else + true + fi + + pathcomp="${pathcomp}/" +done +fi + +if [ x"$dir_arg" != x ] +then + $doit $instcmd $dst && + + if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && + if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && + if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && + if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi +else + +# If we're going to rename the final executable, determine the name now. + + if [ x"$transformarg" = x ] + then + dstfile=`basename $dst` + else + dstfile=`basename $dst $transformbasename | + sed $transformarg`$transformbasename + fi + +# don't allow the sed command to completely eliminate the filename + + if [ x"$dstfile" = x ] + then + dstfile=`basename $dst` + else + true + fi + +# Make a temp file name in the proper directory. + + dsttmp=$dstdir/#inst.$$# + +# Move or copy the file name to the temp name + + $doit $instcmd $src $dsttmp && + + trap "rm -f ${dsttmp}" 0 && + +# and set any options; do chmod last to preserve setuid bits + +# If any of these fail, we abort the whole thing. If we want to +# ignore errors from any of these, just make sure not to ignore +# errors from the above "$doit $instcmd $src $dsttmp" command. + + if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && + if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && + if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && + if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && + +# Now rename the file to the real destination. + + $doit $rmcmd -f $dstdir/$dstfile && + $doit $mvcmd $dsttmp $dstdir/$dstfile + +fi && + + +exit 0 diff --git a/lib/.cvsignore b/lib/.cvsignore new file mode 100644 index 00000000..33bab952 --- /dev/null +++ b/lib/.cvsignore @@ -0,0 +1,47 @@ +Makefile +a +b +config.cache +config.h +config.log +config.status +dist.tar.gz +rsync +rsync-0.1 +rsync-0.1 +rsync-0.1 +rsync-0.1.tar.gz +rsync-0.2 +rsync-0.2 +rsync-0.2.tar.gz +rsync-0.3 +rsync-0.3 +rsync-0.3.tar.gz +rsync-0.4 +rsync-0.4 +rsync-0.4.tar.gz +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5.tar.gz +rsync-ERSION +rsync.aux +rsync.dvi +rsync.log +tech_report.aux +tech_report.dvi +tech_report.log +tech_report.ps +test +rsync-0.6 +rsync-0.7 +rsync-0.7 +rsync-0.8 +rsync-0.8 +rsync-0.8 +rsync-0.8 diff --git a/lib/fnmatch.c b/lib/fnmatch.c new file mode 100644 index 00000000..6befc3b9 --- /dev/null +++ b/lib/fnmatch.c @@ -0,0 +1,204 @@ +#include "../rsync.h" +#ifndef HAVE_FNMATCH + +/* Copyright (C) 1991, 1992, 1993 Free Software Foundation, Inc. + +NOTE: The canonical source of this file is maintained with the GNU C Library. +Bugs can be reported to bug-glibc@prep.ai.mit.edu. + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#if defined (STDC_HEADERS) || !defined (isascii) +#define ISASCII(c) 1 +#else +#define ISASCII(c) isascii(c) +#endif + +#define ISUPPER(c) (ISASCII (c) && isupper (c)) + + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#if !defined(__GNU_LIBRARY__) && !defined(STDC_HEADERS) +extern int errno; +#endif + +/* Match STRING against the filename pattern PATTERN, returning zero if + it matches, nonzero if not. */ +int +fnmatch (pattern, string, flags) + const char *pattern; + const char *string; + int flags; +{ + register const char *p = pattern, *n = string; + register char c; + +/* Note that this evalutes C many times. */ +#define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c)) + + while ((c = *p++) != '\0') + { + c = FOLD (c); + + switch (c) + { + case '?': + if (*n == '\0') + return FNM_NOMATCH; + else if ((flags & FNM_FILE_NAME) && *n == '/') + return FNM_NOMATCH; + else if ((flags & FNM_PERIOD) && *n == '.' && + (n == string || ((flags & FNM_FILE_NAME) && n[-1] == '/'))) + return FNM_NOMATCH; + break; + + case '\\': + if (!(flags & FNM_NOESCAPE)) + { + c = *p++; + c = FOLD (c); + } + if (FOLD (*n) != c) + return FNM_NOMATCH; + break; + + case '*': + if ((flags & FNM_PERIOD) && *n == '.' && + (n == string || ((flags & FNM_FILE_NAME) && n[-1] == '/'))) + return FNM_NOMATCH; + + for (c = *p++; c == '?' || c == '*'; c = *p++, ++n) + if (((flags & FNM_FILE_NAME) && *n == '/') || + (c == '?' && *n == '\0')) + return FNM_NOMATCH; + + if (c == '\0') + return 0; + + { + char c1 = (!(flags & FNM_NOESCAPE) && c == '\\') ? *p : c; + c1 = FOLD (c1); + for (--p; *n != '\0'; ++n) + if ((c == '[' || FOLD (*n) == c1) && + fnmatch (p, n, flags & ~FNM_PERIOD) == 0) + return 0; + return FNM_NOMATCH; + } + + case '[': + { + /* Nonzero if the sense of the character class is inverted. */ + register int not; + + if (*n == '\0') + return FNM_NOMATCH; + + if ((flags & FNM_PERIOD) && *n == '.' && + (n == string || ((flags & FNM_FILE_NAME) && n[-1] == '/'))) + return FNM_NOMATCH; + + not = (*p == '!' || *p == '^'); + if (not) + ++p; + + c = *p++; + for (;;) + { + register char cstart = c, cend = c; + + if (!(flags & FNM_NOESCAPE) && c == '\\') + cstart = cend = *p++; + + cstart = cend = FOLD (cstart); + + if (c == '\0') + /* [ (unterminated) loses. */ + return FNM_NOMATCH; + + c = *p++; + c = FOLD (c); + + if ((flags & FNM_FILE_NAME) && c == '/') + /* [/] can never match. */ + return FNM_NOMATCH; + + if (c == '-' && *p != ']') + { + cend = *p++; + if (!(flags & FNM_NOESCAPE) && cend == '\\') + cend = *p++; + if (cend == '\0') + return FNM_NOMATCH; + cend = FOLD (cend); + + c = *p++; + } + + if (FOLD (*n) >= cstart && FOLD (*n) <= cend) + goto matched; + + if (c == ']') + break; + } + if (!not) + return FNM_NOMATCH; + break; + + matched:; + /* Skip the rest of the [...] that already matched. */ + while (c != ']') + { + if (c == '\0') + /* [... (unterminated) loses. */ + return FNM_NOMATCH; + + c = *p++; + if (!(flags & FNM_NOESCAPE) && c == '\\') + /* XXX 1003.2d11 is unclear if this is right. */ + ++p; + } + if (not) + return FNM_NOMATCH; + } + break; + + default: + if (c != FOLD (*n)) + return FNM_NOMATCH; + } + + ++n; + } + + if (*n == '\0') + return 0; + + if ((flags & FNM_LEADING_DIR) && *n == '/') + /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ + return 0; + + return FNM_NOMATCH; +} + +#else /* HAVE_FNMATCH */ +void fnmatch_dummy(void) {} +#endif diff --git a/lib/fnmatch.h b/lib/fnmatch.h new file mode 100644 index 00000000..ea420e26 --- /dev/null +++ b/lib/fnmatch.h @@ -0,0 +1,69 @@ +/* Copyright (C) 1991, 1992, 1993 Free Software Foundation, Inc. + +NOTE: The canonical source of this file is maintained with the GNU C Library. +Bugs can be reported to bug-glibc@prep.ai.mit.edu. + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _FNMATCH_H + +#define _FNMATCH_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined (__cplusplus) || (defined (__STDC__) && __STDC__) +#undef __P +#define __P(protos) protos +#else /* Not C++ or ANSI C. */ +#undef __P +#define __P(protos) () +/* We can get away without defining `const' here only because in this file + it is used only inside the prototype for `fnmatch', which is elided in + non-ANSI C where `const' is problematical. */ +#endif /* C++ or ANSI C. */ + + +/* We #undef these before defining them because some losing systems + (HP-UX A.08.07 for example) define these in . */ +#undef FNM_PATHNAME +#undef FNM_NOESCAPE +#undef FNM_PERIOD + +/* Bits set in the FLAGS argument to `fnmatch'. */ +#define FNM_PATHNAME (1 << 0) /* No wildcard can ever match `/'. */ +#define FNM_NOESCAPE (1 << 1) /* Backslashes don't quote special chars. */ +#define FNM_PERIOD (1 << 2) /* Leading `.' is matched only explicitly. */ + +#if !defined (_POSIX_C_SOURCE) || _POSIX_C_SOURCE < 2 || defined (_GNU_SOURCE) +#define FNM_FILE_NAME FNM_PATHNAME /* Preferred GNU name. */ +#define FNM_LEADING_DIR (1 << 3) /* Ignore `/...' after a match. */ +#define FNM_CASEFOLD (1 << 4) /* Compare without regard to case. */ +#endif + +/* Value returned by `fnmatch' if STRING does not match PATTERN. */ +#define FNM_NOMATCH 1 + +/* Match STRING against the filename pattern PATTERN, + returning zero if it matches, FNM_NOMATCH if not. */ +extern int fnmatch __P ((const char *__pattern, const char *__string, + int __flags)); + +#ifdef __cplusplus +} +#endif + +#endif /* fnmatch.h */ diff --git a/lib/getopt.c b/lib/getopt.c new file mode 100644 index 00000000..2270b568 --- /dev/null +++ b/lib/getopt.c @@ -0,0 +1,751 @@ +#include "../rsync.h" +#ifndef HAVE_GETOPT_LONG + +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu + before changing it! + + Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94 + Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +#define _NO_PROTO +#endif + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#if defined (_LIBC) || !defined (__GNU_LIBRARY__) + +/* This is for other GNU distributions with internationalized messages. + The GNU C Library itself does not yet support such messages. */ +#if HAVE_LIBINTL_H +# include +#else +# define gettext(msgid) (msgid) +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg = NULL; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns EOF, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* XXX 1003.2 says this must be 1 before any call. */ +int optind = 0; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return EOF with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +#include +#define my_index strchr +#else + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +char *getenv (); + +static char * +my_index (str, chr) + const char *str; + int chr; +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +#if !defined (__STDC__) || !__STDC__ +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +#endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +static void +exchange (argv) + char **argv; +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Initialize the internal data when the first call is made. */ + +static const char * +_getopt_initialize (optstring) + const char *optstring; +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind = 1; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns `EOF'. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + optarg = NULL; + + if (optind == 0) + optstring = _getopt_initialize (optstring); + + if (nextchar == NULL || *nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc + && (argv[optind][0] != '-' || argv[optind][1] == '\0')) + optind++; + last_nonopt = optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return EOF; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if ((argv[optind][0] != '-' || argv[optind][1] == '\0')) + { + if (ordering == REQUIRE_ORDER) + return EOF; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if (nameend - nextchar == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (opterr) + fprintf (stderr, gettext ("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); + nextchar += strlen (nextchar); + optind++; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (opterr) + if (argv[optind - 1][1] == '-') + /* --option */ + fprintf (stderr, + gettext ("%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); + else + /* +option or -option */ + fprintf (stderr, + gettext ("%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (opterr) + fprintf (stderr, + gettext ("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' + || my_index (optstring, *nextchar) == NULL) + { + if (opterr) + { + if (argv[optind][1] == '-') + /* --option */ + fprintf (stderr, gettext ("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); + else + /* +option or -option */ + fprintf (stderr, gettext ("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); + } + nextchar = (char *) ""; + optind++; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (opterr) + { + if (posixly_correct) + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, gettext ("%s: illegal option -- %c\n"), + argv[0], c); + else + fprintf (stderr, gettext ("%s: invalid option -- %c\n"), + argv[0], c); + } + optopt = c; + return '?'; + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = NULL; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (opterr) + { + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, + gettext ("%s: option requires an argument -- %c\n"), + argv[0], c); + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +int +getopt_long (argc, argv, options, long_options, opt_index) + int argc; + char *const *argv; + const char *options; + const struct option *long_options; + int *opt_index; +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 0); +} + +#endif /* _LIBC or not __GNU_LIBRARY__. */ + +#ifdef TEST + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ + +int +main (argc, argv) + int argc; + char **argv; +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + + c = getopt (argc, argv, "abc:d:0123456789"); + if (c == EOF) + break; + + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ +#else /* HAVE_GETOPT_LONG */ +void getopt_dummy(void) {} +#endif diff --git a/lib/getopt.h b/lib/getopt.h new file mode 100644 index 00000000..4ac33b71 --- /dev/null +++ b/lib/getopt.h @@ -0,0 +1,129 @@ +/* Declarations for getopt. + Copyright (C) 1989, 90, 91, 92, 93, 94 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _GETOPT_H +#define _GETOPT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns EOF, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +#if defined (__STDC__) && __STDC__ + const char *name; +#else + char *name; +#endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +#define no_argument 0 +#define required_argument 1 +#define optional_argument 2 + +#if defined (__STDC__) && __STDC__ +#ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int argc, char *const *argv, const char *shortopts); +#else /* not __GNU_LIBRARY__ */ +extern int getopt (); +#endif /* __GNU_LIBRARY__ */ +extern int getopt_long (int argc, char *const *argv, const char *shortopts, + const struct option *longopts, int *longind); +extern int getopt_long_only (int argc, char *const *argv, + const char *shortopts, + const struct option *longopts, int *longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int argc, char *const *argv, + const char *shortopts, + const struct option *longopts, int *longind, + int long_only); +#else /* not __STDC__ */ +extern int getopt (); +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +#endif /* _GETOPT_H */ diff --git a/main.c b/main.c new file mode 100644 index 00000000..feda8118 --- /dev/null +++ b/main.c @@ -0,0 +1,718 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "rsync.h" + +int verbose = 0; +int always_checksum = 0; +time_t starttime; +off_t total_size = 0; +int block_size=BLOCK_SIZE; + +char *backup_suffix = BACKUP_SUFFIX; + +static char *rsync_path = RSYNC_NAME; + +int make_backups = 0; +int preserve_links = 0; +int preserve_perms = 0; +int preserve_devices = 0; +int preserve_uid = 0; +int preserve_gid = 0; +int preserve_times = 0; +int update_only = 0; +int cvs_exclude = 0; +int dry_run=0; +int local_server=0; +int ignore_times=0; +int delete_mode=0; +int one_file_system=0; + +int am_server = 0; +static int sender = 0; +int recurse = 0; + +static void usage(FILE *f); + +static void report(int f) +{ + int in,out,tsize; + time_t t = time(NULL); + + if (!verbose) return; + + if (am_server && sender) { + write_int(f,read_total()); + write_int(f,write_total()); + write_int(f,total_size); + write_flush(f); + return; + } + + if (sender) { + in = read_total(); + out = write_total(); + tsize = (int)total_size; + } else { + in = read_int(f); + out = read_int(f); + tsize = read_int(f); + } + + printf("wrote %d bytes read %d bytes %g bytes/sec\n", + out,in,(in+out)/(0.5 + (t-starttime))); + printf("total size is %d speedup is %g\n", + tsize,(1.0*tsize)/(in+out)); +} + + +static void server_options(char **args,int *argc) +{ + int ac = *argc; + static char argstr[50]; + static char bsize[30]; + int i, x; + + args[ac++] = "--server"; + + if (!sender) + args[ac++] = "--sender"; + + x = 1; + argstr[0] = '-'; + for (i=0;i 3) { + fprintf(stderr,"cmd="); + for (i=0;icount > 1) { + fprintf(stderr,"ERROR: destination must be a directory when copying more than 1 file\n"); + exit(1); + } + return name; + } + + if (flist->count == 1) + return name; + + if (!name) + return NULL; + + if (mkdir(name,0777) != 0) { + fprintf(stderr,"mkdir %s : %s\n",name,strerror(errno)); + exit(1); + } + + if (chdir(name) != 0) { + fprintf(stderr,"chdir %s : %s\n",name,strerror(errno)); + exit(1); + } + + return NULL; +} + + + + +void do_server_sender(int argc,char *argv[]) +{ + int i; + char *dir = argv[0]; + struct file_list *flist; + + if (verbose > 2) + fprintf(stderr,"server_sender starting pid=%d\n",(int)getpid()); + + if (chdir(dir) != 0) { + fprintf(stderr,"chdir %s: %s\n",dir,strerror(errno)); + exit(1); + } + argc--; + argv++; + + if (strcmp(dir,".")) { + int l = strlen(dir); + for (i=0;i 2) + fprintf(stderr,"server_recv(%d) starting pid=%d\n",argc,(int)getpid()); + + if (argc > 0) { + dir = argv[0]; + argc--; + argv++; + if (chdir(dir) != 0) { + fprintf(stderr,"chdir %s : %s\n",dir,strerror(errno)); + exit(1); + } + } + + if (delete_mode) + recv_exclude_list(STDIN_FILENO); + + flist = recv_file_list(STDIN_FILENO); + if (!flist || flist->count == 0) { + fprintf(stderr,"nothing to do\n"); + exit(1); + } + + if (argc > 0) { + if (strcmp(dir,".")) { + argv[0] += strlen(dir); + if (argv[0][0] == '/') argv[0]++; + } + local_name = get_local_name(flist,argv[0]); + } + + if ((pid=fork()) == 0) { + recv_files(STDIN_FILENO,flist,local_name); + if (verbose > 2) + fprintf(stderr,"receiver read %d\n",read_total()); + exit(0); + } + + generate_files(STDOUT_FILENO,flist,local_name); + + waitpid(pid, &status, 0); + exit(status); +} + + +static void usage(FILE *f) +{ + fprintf(f,"rsync version %s Copyright Andrew Tridgell and Paul Mackerras\n\n", + VERSION); + fprintf(f,"Usage:\t%s [options] src user@host:dest\nOR",RSYNC_NAME); + fprintf(f,"\t%s [options] user@host:src dest\n\n",RSYNC_NAME); + fprintf(f,"Options:\n"); + fprintf(f,"-v, --verbose increase verbosity\n"); + fprintf(f,"-c, --checksum always checksum\n"); + fprintf(f,"-a, --archive archive mode (same as -rlptDog)\n"); + fprintf(f,"-r, --recursive recurse into directories\n"); + fprintf(f,"-b, --backup make backups (default ~ extension)\n"); + fprintf(f,"-u, --update update only (don't overwrite newer files)\n"); + fprintf(f,"-l, --links preserve soft links\n"); + fprintf(f,"-p, --perms preserve permissions\n"); + fprintf(f,"-o, --owner preserve owner (root only)\n"); + fprintf(f,"-g, --group preserve group\n"); + fprintf(f,"-D, --devices preserve devices (root only)\n"); + fprintf(f,"-t, --times preserve times\n"); + fprintf(f,"-n, --dry-run show what would have been transferred\n"); + fprintf(f,"-x, --one-file-system don't cross filesystem boundaries\n"); + fprintf(f,"-B, --block-size SIZE checksum blocking size\n"); + fprintf(f,"-e, --rsh COMMAND specify rsh replacement\n"); + fprintf(f," --rsync-path PATH specify path to rsync on the remote machine\n"); + fprintf(f,"-C, --cvs-exclude auto ignore files in the same way CVS does\n"); + fprintf(f," --delete delete files that don't exist on the sending side\n"); + fprintf(f,"-I, --ignore-times don't exclude files that match length and time\n"); + fprintf(f," --exclude FILE exclude file FILE\n"); + fprintf(f," --exclude-from FILE exclude files listed in FILE\n"); + fprintf(f," --suffix SUFFIX override backup suffix\n"); + fprintf(f," --version print version number\n"); + + fprintf(f,"\n"); + fprintf(f,"the backup suffix defaults to %s\n",BACKUP_SUFFIX); + fprintf(f,"the block size defaults to %d\n",BLOCK_SIZE); +} + +enum {OPT_VERSION,OPT_SUFFIX,OPT_SENDER,OPT_SERVER,OPT_EXCLUDE, + OPT_EXCLUDE_FROM,OPT_DELETE,OPT_RSYNC_PATH}; + +static char *short_options = "oblpguDCtcahvrIxne:B:"; + +static struct option long_options[] = { + {"version", 0, 0, OPT_VERSION}, + {"server", 0, 0, OPT_SERVER}, + {"sender", 0, 0, OPT_SENDER}, + {"delete", 0, 0, OPT_DELETE}, + {"exclude", 1, 0, OPT_EXCLUDE}, + {"exclude-from",1, 0, OPT_EXCLUDE_FROM}, + {"rsync-path", 1, 0, OPT_RSYNC_PATH}, + {"one-file-system",0, 0, 'x'}, + {"ignore-times",0, 0, 'I'}, + {"help", 0, 0, 'h'}, + {"dry-run", 0, 0, 'n'}, + {"cvs-exclude", 0, 0, 'C'}, + {"archive", 0, 0, 'a'}, + {"checksum", 0, 0, 'c'}, + {"backup", 0, 0, 'b'}, + {"update", 0, 0, 'u'}, + {"verbose", 0, 0, 'v'}, + {"recursive", 0, 0, 'r'}, + {"devices", 0, 0, 'D'}, + {"perms", 0, 0, 'p'}, + {"links", 0, 0, 'l'}, + {"owner", 0, 0, 'o'}, + {"group", 0, 0, 'g'}, + {"times", 0, 0, 't'}, + {"rsh", 1, 0, 'e'}, + {"suffix", 1, 0, OPT_SUFFIX}, + {"block-size", 1, 0, 'B'}, + {0,0,0,0}}; + +int main(int argc,char *argv[]) +{ + int pid, status, pid2, status2; + int opt; + int option_index; + char *shell_cmd = NULL; + char *shell_machine = NULL; + char *shell_path = NULL; + char *shell_user = NULL; + char *p; + int f_in,f_out; + struct file_list *flist; + char *local_name = NULL; + + starttime = time(NULL); + + while ((opt = getopt_long(argc, argv, + short_options, long_options, &option_index)) + != -1) { + switch (opt) + { + case OPT_VERSION: + printf("rsync version %s protocol version %d\n", + VERSION,PROTOCOL_VERSION); + exit(0); + + case OPT_SUFFIX: + backup_suffix = optarg; + break; + + case OPT_RSYNC_PATH: + rsync_path = optarg; + break; + + case 'I': + ignore_times = 1; + break; + + case 'x': + one_file_system=1; + break; + + case OPT_DELETE: + delete_mode = 1; + break; + + case OPT_EXCLUDE: + add_exclude(optarg); + break; + + case OPT_EXCLUDE_FROM: + add_exclude_file(optarg,1); + break; + + case 'h': + usage(stdout); + exit(0); + + case 'b': + make_backups=1; + break; + + case 'n': + dry_run=1; + break; + + case 'C': + cvs_exclude=1; + break; + + case 'u': + update_only=1; + break; + +#if SUPPORT_LINKS + case 'l': + preserve_links=1; + break; +#endif + + case 'p': + preserve_perms=1; + break; + + case 'o': + if (getuid() == 0) { + preserve_uid=1; + } else { + fprintf(stderr,"-o only allowed for root\n"); + exit(1); + } + break; + + case 'g': + preserve_gid=1; + break; + + case 'D': + if (getuid() == 0) { + preserve_devices=1; + } else { + fprintf(stderr,"-D only allowed for root\n"); + exit(1); + } + break; + + case 't': + preserve_times=1; + break; + + case 'c': + always_checksum=1; + break; + + case 'v': + verbose++; + break; + + case 'a': + recurse=1; +#if SUPPORT_LINKS + preserve_links=1; +#endif + preserve_perms=1; + preserve_times=1; + preserve_gid=1; + if (getuid() == 0) { + preserve_devices=1; + preserve_uid=1; + } + break; + + case OPT_SERVER: + am_server = 1; + break; + + case OPT_SENDER: + if (!am_server) { + usage(stderr); + exit(1); + } + sender = 1; + break; + + case 'r': + recurse = 1; + break; + + case 'e': + shell_cmd = optarg; + break; + + case 'B': + block_size = atoi(optarg); + break; + + default: + fprintf(stderr,"bad option -%c\n",opt); + exit(1); + } + } + + while (optind--) { + argc--; + argv++; + } + + if (dry_run) + verbose = MAX(verbose,1); + + if (am_server) { + int version = read_int(STDIN_FILENO); + if (version != PROTOCOL_VERSION) { + fprintf(stderr,"protocol version mismatch %d %d\n", + version,PROTOCOL_VERSION); + exit(1); + } + write_int(STDOUT_FILENO,PROTOCOL_VERSION); + write_flush(STDOUT_FILENO); + + if (sender) { + recv_exclude_list(STDIN_FILENO); + if (cvs_exclude) + add_cvs_excludes(); + do_server_sender(argc,argv); + } else { + do_server_recv(argc,argv); + } + exit(0); + } + + if (argc < 2) { + usage(stderr); + exit(1); + } + + p = strchr(argv[0],':'); + + if (p) { + sender = 0; + *p = 0; + shell_machine = argv[0]; + shell_path = p+1; + argc--; + argv++; + } else { + sender = 1; + + p = strchr(argv[argc-1],':'); + if (!p) { + local_server = 1; + } + + if (local_server) { + shell_machine = NULL; + shell_path = argv[argc-1]; + } else { + *p = 0; + shell_machine = argv[argc-1]; + shell_path = p+1; + } + argc--; + } + + if (shell_machine) { + p = strchr(shell_machine,'@'); + if (p) { + *p = 0; + shell_user = shell_machine; + shell_machine = p+1; + } + } + + if (verbose > 3) { + fprintf(stderr,"cmd=%s machine=%s user=%s path=%s\n", + shell_cmd?shell_cmd:"", + shell_machine?shell_machine:"", + shell_user?shell_user:"", + shell_path?shell_path:""); + } + + signal(SIGCHLD,SIG_IGN); + signal(SIGINT,sig_int); + + if (!sender && argc != 1) { + usage(stderr); + exit(1); + } + + pid = do_cmd(shell_cmd,shell_machine,shell_user,shell_path,&f_in,&f_out); + + write_int(f_out,PROTOCOL_VERSION); + write_flush(f_out); + { + int version = read_int(f_in); + if (version != PROTOCOL_VERSION) { + fprintf(stderr,"protocol version mismatch\n"); + exit(1); + } + } + + if (verbose > 3) + fprintf(stderr,"parent=%d child=%d sender=%d recurse=%d\n", + (int)getpid(),pid,sender,recurse); + + if (sender) { + if (cvs_exclude) + add_cvs_excludes(); + if (delete_mode) + send_exclude_list(f_out); + flist = send_file_list(f_out,recurse,argc,argv); + if (verbose > 3) + fprintf(stderr,"file list sent\n"); + send_files(flist,f_out,f_in); + if (verbose > 3) + fprintf(stderr,"waiting on %d\n",pid); + waitpid(pid, &status, 0); + report(-1); + exit(status); + } + + send_exclude_list(f_out); + + flist = recv_file_list(f_in); + if (!flist || flist->count == 0) { + fprintf(stderr,"nothing to do\n"); + exit(0); + } + + local_name = get_local_name(flist,argv[0]); + + if ((pid2=fork()) == 0) { + recv_files(f_in,flist,local_name); + if (verbose > 1) + fprintf(stderr,"receiver read %d\n",read_total()); + exit(0); + } + + generate_files(f_out,flist,local_name); + + waitpid(pid2, &status2, 0); + + report(f_in); + + waitpid(pid, &status, 0); + + return status | status2; +} diff --git a/match.c b/match.c new file mode 100644 index 00000000..1591fdc6 --- /dev/null +++ b/match.c @@ -0,0 +1,246 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "rsync.h" + +extern int verbose; +extern int am_server; + +typedef unsigned short tag; + +#define TABLESIZE (1<<16) +#define NULL_TAG ((tag)-1) + +static int false_alarms; +static int tag_hits; +static int matches; +static int data_transfer; + +static int total_false_alarms=0; +static int total_tag_hits=0; +static int total_matches=0; +static int total_data_transfer=0; + + +struct target { + tag t; + int i; +}; + +static struct target *targets=NULL; + +static tag *tag_table = NULL; + +#define gettag2(s1,s2) (((s1) + (s2)) & 0xFFFF) +#define gettag(sum) gettag2((sum)&0xFFFF,(sum)>>16) + +static int compare_targets(struct target *t1,struct target *t2) +{ + return(t1->t - t2->t); +} + + +static void build_hash_table(struct sum_struct *s) +{ + int i; + + if (!tag_table) + tag_table = (tag *)malloc(sizeof(tag)*TABLESIZE); + + targets = (struct target *)malloc(sizeof(targets[0])*s->count); + if (!tag_table || !targets) + out_of_memory("build_hash_table"); + + for (i=0;icount;i++) { + targets[i].i = i; + targets[i].t = gettag(s->sums[i].sum1); + } + + qsort(targets,s->count,sizeof(targets[0]),(int (*)())compare_targets); + + for (i=0;icount-1;i>=0;i--) { + tag_table[targets[i].t] = i; + } +} + + + +static off_t last_match; + + +static void matched(int f,struct sum_struct *s,char *buf,off_t len,int offset,int i) +{ + int n = offset - last_match; + + if (verbose > 2) + if (i != -1) + fprintf(stderr,"match at %d last_match=%d j=%d len=%d n=%d\n", + (int)offset,(int)last_match,i,(int)s->sums[i].len,n); + + if (n > 0) { + write_int(f,n); + write_buf(f,buf+last_match,n); + data_transfer += n; + } + write_int(f,-(i+1)); + if (i != -1) + last_match = offset + s->sums[i].len; + if (n > 0) + write_flush(f); +} + + +static void hash_search(int f,struct sum_struct *s,char *buf,off_t len) +{ + int offset,j,k; + int end; + char sum2[SUM_LENGTH]; + uint32 s1, s2, sum; + + if (verbose > 2) + fprintf(stderr,"hash search b=%d len=%d\n",s->n,(int)len); + + k = MIN(len, s->n); + sum = get_checksum1(buf, k); + s1 = sum & 0xFFFF; + s2 = sum >> 16; + if (verbose > 3) + fprintf(stderr, "sum=%.8x k=%d\n", sum, k); + + offset = 0; + + end = len + 1 - s->sums[s->count-1].len; + + if (verbose > 3) + fprintf(stderr,"hash search s->n=%d len=%d count=%d\n", + s->n,(int)len,s->count); + + do { + tag t = gettag2(s1,s2); + j = tag_table[t]; + if (verbose > 4) + fprintf(stderr,"offset=%d sum=%08x\n", + offset,sum); + + if (j != NULL_TAG) { + int done_csum2 = 0; + + sum = (s1 & 0xffff) | (s2 << 16); + tag_hits++; + do { + int i = targets[j].i; + + if (sum == s->sums[i].sum1) { + if (verbose > 3) + fprintf(stderr,"potential match at %d target=%d %d sum=%08x\n", + offset,j,i,sum); + + if (!done_csum2) { + get_checksum2(buf+offset,MIN(s->n,len-offset),sum2); + done_csum2 = 1; + } + if (memcmp(sum2,s->sums[i].sum2,SUM_LENGTH) == 0) { + matched(f,s,buf,len,offset,i); + offset += s->sums[i].len - 1; + k = MIN((len-offset), s->n); + sum = get_checksum1(buf+offset, k); + s1 = sum & 0xFFFF; + s2 = sum >> 16; + ++matches; + break; + } else { + false_alarms++; + } + } + j++; + } while (jcount && targets[j].t == t); + } + + /* Trim off the first byte from the checksum */ + s1 -= buf[offset]; + s2 -= k * buf[offset]; + + /* Add on the next byte (if there is one) to the checksum */ + if (k < (len-offset)) { + s1 += buf[offset+k]; + s2 += s1; + } else { + --k; + } + + if (verbose > 3) + fprintf(stderr,"s2:s1 = %.4x%.4x sum=%.8x k=%d offset=%d took %x added %x\n", + s2&0xffff, s1&0xffff, get_checksum1(buf+offset+1,k), + k, (int)offset, buf[offset], buf[offset+k]); + } while (++offset < end); + + matched(f,s,buf,len,len,-1); +} + + +void match_sums(int f,struct sum_struct *s,char *buf,off_t len) +{ + last_match = 0; + false_alarms = 0; + tag_hits = 0; + matches=0; + data_transfer=0; + + if (len > 0 && s->count>0) { + build_hash_table(s); + + if (verbose > 2) + fprintf(stderr,"built hash table\n"); + + hash_search(f,s,buf,len); + + if (verbose > 2) + fprintf(stderr,"done hash search\n"); + } else { + matched(f,s,buf,len,len,-1); + } + + if (targets) { + free(targets); + targets=NULL; + } + + if (verbose > 2) + fprintf(stderr, "false_alarms=%d tag_hits=%d matches=%d\n", + false_alarms, tag_hits, matches); + + total_tag_hits += tag_hits; + total_false_alarms += false_alarms; + total_matches += matches; + total_data_transfer += data_transfer; +} + +void match_report(void) +{ + if (verbose <= 1) + return; + + fprintf(am_server?stderr:stdout, + "total: matches=%d tag_hits=%d false_alarms=%d data=%d\n", + total_matches,total_tag_hits, + total_false_alarms,total_data_transfer); +} diff --git a/md4.c b/md4.c new file mode 100644 index 00000000..0a9d821f --- /dev/null +++ b/md4.c @@ -0,0 +1,263 @@ +/* + This code is from rfc1186. + + It has been modified to use the SIVAL() macro to make it + byte order and length independent, so we don't need the LOWBYTEFIRST define +*/ + + /* + ** ******************************************************************** + ** md4.c -- Implementation of MD4 Message Digest Algorithm ** + ** Updated: 2/16/90 by Ronald L. Rivest ** + ** (C) 1990 RSA Data Security, Inc. ** + ** ******************************************************************** + */ + + /* + ** To use MD4: + ** -- Include md4.h in your program + ** -- Declare an MDstruct MD to hold the state of the digest + ** computation. + ** -- Initialize MD using MDbegin(&MD) + ** -- For each full block (64 bytes) X you wish to process, call + ** MDupdate(&MD,X,512) + ** (512 is the number of bits in a full block.) + ** -- For the last block (less than 64 bytes) you wish to process, + ** MDupdate(&MD,X,n) + ** where n is the number of bits in the partial block. A partial + ** block terminates the computation, so every MD computation + ** should terminate by processing a partial block, even if it + ** has n = 0. + ** -- The message digest is available in MD.buffer[0] ... + ** MD.buffer[3]. (Least-significant byte of each word + ** should be output first.) + ** -- You can print out the digest using MDprint(&MD) + */ + +#define TRUE 1 +#define FALSE 0 + + /* Compile-time includes + */ + +#include "rsync.h" + + /* Compile-time declarations of MD4 "magic constants". + */ +#define I0 0x67452301 /* Initial values for MD buffer */ +#define I1 0xefcdab89 +#define I2 0x98badcfe +#define I3 0x10325476 +#define C2 013240474631 /* round 2 constant = sqrt(2) in octal */ +#define C3 015666365641 /* round 3 constant = sqrt(3) in octal */ + /* C2 and C3 are from Knuth, The Art of Programming, Volume 2 + ** (Seminumerical Algorithms), Second Edition (1981), Addison-Wesley. + ** Table 2, page 660. + */ + +#define fs1 3 /* round 1 shift amounts */ +#define fs2 7 +#define fs3 11 +#define fs4 19 +#define gs1 3 /* round 2 shift amounts */ +#define gs2 5 +#define gs3 9 +#define gs4 13 +#define hs1 3 /* round 3 shift amounts */ +#define hs2 9 +#define hs3 11 +#define hs4 15 + + /* Compile-time macro declarations for MD4. + ** Note: The "rot" operator uses the variable "tmp". + ** It assumes tmp is declared as unsigned int, so that the >> + ** operator will shift in zeros rather than extending the sign bit. + */ +#define f(X,Y,Z) ((X&Y) | ((~X)&Z)) +#define g(X,Y,Z) ((X&Y) | (X&Z) | (Y&Z)) +#define h(X,Y,Z) (X^Y^Z) +#define rot(X,S) (tmp=X,(tmp<>(32-S))) +#define ff(A,B,C,D,i,s) A = rot((A + f(B,C,D) + X[i]),s) +#define gg(A,B,C,D,i,s) A = rot((A + g(B,C,D) + X[i] + C2),s) +#define hh(A,B,C,D,i,s) A = rot((A + h(B,C,D) + X[i] + C3),s) + + /* MDbegin(MDp) + ** Initialize message digest buffer MDp. + ** This is a user-callable routine. + */ + void + MDbegin(MDp) + MDptr MDp; + { int i; + MDp->buffer[0] = I0; + MDp->buffer[1] = I1; + MDp->buffer[2] = I2; + MDp->buffer[3] = I3; + for (i=0;i<8;i++) MDp->count[i] = 0; + MDp->done = 0; + } + + /* MDreverse(X) + ** Reverse the byte-ordering of every int in X. + ** Assumes X is an array of 16 ints. + ** The macro revx reverses the byte-ordering of the next word of X. + */ + void MDreverse(X) + unsigned int32 *X; + { register unsigned int32 t; + register unsigned int i; + + for(i = 0; i < 16; i++) { + t = X[i]; + SIVAL(X,i*4,t); + } + } + + /* MDblock(MDp,X) + ** Update message digest buffer MDp->buffer using 16-word data block X. + ** Assumes all 16 words of X are full of data. + ** Does not update MDp->count. + ** This routine is not user-callable. + */ + static void + MDblock(MDp,X) + MDptr MDp; + unsigned int32 *X; + { + register unsigned int32 tmp, A, B, C, D; + MDreverse(X); + A = MDp->buffer[0]; + B = MDp->buffer[1]; + C = MDp->buffer[2]; + D = MDp->buffer[3]; + /* Update the message digest buffer */ + ff(A , B , C , D , 0 , fs1); /* Round 1 */ + ff(D , A , B , C , 1 , fs2); + ff(C , D , A , B , 2 , fs3); + ff(B , C , D , A , 3 , fs4); + ff(A , B , C , D , 4 , fs1); + ff(D , A , B , C , 5 , fs2); + ff(C , D , A , B , 6 , fs3); + ff(B , C , D , A , 7 , fs4); + ff(A , B , C , D , 8 , fs1); + ff(D , A , B , C , 9 , fs2); + ff(C , D , A , B , 10 , fs3); + ff(B , C , D , A , 11 , fs4); + ff(A , B , C , D , 12 , fs1); + ff(D , A , B , C , 13 , fs2); + ff(C , D , A , B , 14 , fs3); + ff(B , C , D , A , 15 , fs4); + gg(A , B , C , D , 0 , gs1); /* Round 2 */ + gg(D , A , B , C , 4 , gs2); + gg(C , D , A , B , 8 , gs3); + gg(B , C , D , A , 12 , gs4); + gg(A , B , C , D , 1 , gs1); + gg(D , A , B , C , 5 , gs2); + gg(C , D , A , B , 9 , gs3); + gg(B , C , D , A , 13 , gs4); + gg(A , B , C , D , 2 , gs1); + gg(D , A , B , C , 6 , gs2); + gg(C , D , A , B , 10 , gs3); + gg(B , C , D , A , 14 , gs4); + gg(A , B , C , D , 3 , gs1); + gg(D , A , B , C , 7 , gs2); + gg(C , D , A , B , 11 , gs3); + gg(B , C , D , A , 15 , gs4); + hh(A , B , C , D , 0 , hs1); /* Round 3 */ + hh(D , A , B , C , 8 , hs2); + hh(C , D , A , B , 4 , hs3); + hh(B , C , D , A , 12 , hs4); + hh(A , B , C , D , 2 , hs1); + hh(D , A , B , C , 10 , hs2); + hh(C , D , A , B , 6 , hs3); + hh(B , C , D , A , 14 , hs4); + hh(A , B , C , D , 1 , hs1); + hh(D , A , B , C , 9 , hs2); + hh(C , D , A , B , 5 , hs3); + hh(B , C , D , A , 13 , hs4); + hh(A , B , C , D , 3 , hs1); + hh(D , A , B , C , 11 , hs2); + hh(C , D , A , B , 7 , hs3); + hh(B , C , D , A , 15 , hs4); + MDp->buffer[0] += A; + MDp->buffer[1] += B; + MDp->buffer[2] += C; + MDp->buffer[3] += D; + } + + /* MDupdate(MDp,X,count) + ** Input: MDp -- an MDptr + ** X -- a pointer to an array of unsigned characters. + ** count -- the number of bits of X to use. + ** (if not a multiple of 8, uses high bits of last byte.) + ** Update MDp using the number of bits of X given by count. + ** This is the basic input routine for an MD4 user. + ** The routine completes the MD computation when count < 512, so + ** every MD computation should end with one call to MDupdate with a + ** count less than 512. A call with count 0 will be ignored if the + ** MD has already been terminated (done != 0), so an extra call with + ** count 0 can be given as a "courtesy close" to force termination + ** if desired. + */ + void + MDupdate(MDp,X,count) + MDptr MDp; + unsigned char *X; + unsigned int count; + { unsigned int32 i, tmp, bit, byte, mask; + unsigned char XX[64]; + unsigned char *p; + /* return with no error if this is a courtesy close with count + ** zero and MDp->done is true. + */ + if (count == 0 && MDp->done) return; + /* check to see if MD is already done and report error */ + if (MDp->done) + { printf("\nError: MDupdate MD already done."); return; } + /* Add count to MDp->count */ + tmp = count; + p = MDp->count; + while (tmp) + { tmp += *p; + *p++ = tmp; + tmp = tmp >> 8; + } + /* Process data */ + if (count == 512) + { /* Full block of data to handle */ + MDblock(MDp,(unsigned int *)X); + } + else if (count > 512) /* Check for count too large */ + { printf("\nError: MDupdate called with illegal count value %d." + ,count); + return; + } + else /* partial block -- must be last block so finish up */ + { /* Find out how many bytes and residual bits there are */ + byte = count >> 3; + bit = count & 7; + /* Copy X into XX since we need to modify it */ + for (i=0;i<=byte;i++) XX[i] = X[i]; + for (i=byte+1;i<64;i++) XX[i] = 0; + /* Add padding '1' bit and low-order zeros in last byte */ + mask = 1 << (7 - bit); + XX[byte] = (XX[byte] | mask) & ~( mask - 1); + /* If room for bit count, finish up with this block */ + if (byte <= 55) + { for (i=0;i<8;i++) XX[56+i] = MDp->count[i]; + MDblock(MDp,(unsigned int32 *)XX); + } + else /* need to do two blocks to finish up */ + { MDblock(MDp,(unsigned int32 *)XX); + for (i=0;i<56;i++) XX[i] = 0; + for (i=0;i<8;i++) XX[56+i] = MDp->count[i]; + MDblock(MDp,(unsigned int32 *)XX); + } + /* Set flag saying we're done with MD computation */ + MDp->done = 1; + } + } + + /* + ** End of md4.c + */ diff --git a/md4.h b/md4.h new file mode 100644 index 00000000..049c7ed9 --- /dev/null +++ b/md4.h @@ -0,0 +1,49 @@ +/* + This code is from rfc1186. +*/ + + /* + ** ******************************************************************** + ** md4.h -- Header file for implementation of ** + ** MD4 Message Digest Algorithm ** + ** Updated: 2/13/90 by Ronald L. Rivest ** + ** (C) 1990 RSA Data Security, Inc. ** + ** ******************************************************************** + */ + + /* MDstruct is the data structure for a message digest computation. + */ + typedef struct { + unsigned int32 buffer[4]; /* Holds 4-word result of MD computation */ + unsigned char count[8]; /* Number of bits processed so far */ + unsigned int done; /* Nonzero means MD computation finished */ + } MDstruct, *MDptr; + + /* MDbegin(MD) + + + + ** Input: MD -- an MDptr + ** Initialize the MDstruct prepatory to doing a message digest + ** computation. + */ + extern void MDbegin(); + + /* MDupdate(MD,X,count) + ** Input: MD -- an MDptr + ** X -- a pointer to an array of unsigned characters. + ** count -- the number of bits of X to use (an unsigned int). + ** Updates MD using the first "count" bits of X. + ** The array pointed to by X is not modified. + ** If count is not a multiple of 8, MDupdate uses high bits of + ** last byte. + ** This is the basic input routine for a user. + ** The routine terminates the MD computation when count < 512, so + ** every MD computation should end with one call to MDupdate with a + ** count less than 512. Zero is OK for a count. + */ + extern void MDupdate(); + + /* + ** End of md4.h + */ diff --git a/mkproto.awk b/mkproto.awk new file mode 100644 index 00000000..0eb03de6 --- /dev/null +++ b/mkproto.awk @@ -0,0 +1,41 @@ +# generate prototypes for Samba C code +# tridge, June 1996 + +BEGIN { + inheader=0; + print "/* This file is automatically generated with \"make proto\". DO NOT EDIT */" + print "" +} + +{ + if (inheader) { + if (match($0,"[)][ \t]*$")) { + inheader = 0; + printf "%s;\n",$0; + } else { + printf "%s\n",$0; + } + next; + } +} + +/^static|^extern/ || !/^[a-zA-Z]/ || /[;]/ { + next; +} + +!/^off_t|^unsigned|^mode_t|^DIR|^user|^int|^char|^uint|^struct|^BOOL|^void|^time/ { + next; +} + + +/[(].*[)][ \t]*$/ { + printf "%s;\n",$0; + next; +} + +/[(]/ { + inheader=1; + printf "%s\n",$0; + next; +} + diff --git a/rsync.c b/rsync.c new file mode 100644 index 00000000..874d8aa9 --- /dev/null +++ b/rsync.c @@ -0,0 +1,733 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "rsync.h" + +extern int verbose; +extern int am_server; +extern int always_checksum; +extern time_t starttime; + +extern char *backup_suffix; + +extern int block_size; +extern int update_only; +extern int make_backups; +extern int preserve_links; +extern int preserve_perms; +extern int preserve_devices; +extern int preserve_uid; +extern int preserve_gid; +extern int preserve_times; +extern int dry_run; +extern int ignore_times; +extern int recurse; +extern int delete_mode; +extern int cvs_exclude; + +/* + free a sums struct + */ +static void free_sums(struct sum_struct *s) +{ + if (s->sums) free(s->sums); + free(s); +} + + + +/* + send a sums struct down a fd + */ +static void send_sums(struct sum_struct *s,int f_out) +{ + int i; + + /* tell the other guy how many we are going to be doing and how many + bytes there are in the last chunk */ + write_int(f_out,s?s->count:0); + write_int(f_out,s?s->n:block_size); + write_int(f_out,s?s->remainder:0); + if (s) + for (i=0;icount;i++) { + write_int(f_out,s->sums[i].sum1); + write_buf(f_out,s->sums[i].sum2,SUM_LENGTH); + } + write_flush(f_out); +} + + +/* + generate a stream of signatures/checksums that describe a buffer + + generate approximately one checksum every n bytes + */ +static struct sum_struct *generate_sums(char *buf,off_t len,int n) +{ + int i; + struct sum_struct *s; + int count; + int block_len = n; + int remainder = (len%block_len); + off_t offset = 0; + + count = (len+(block_len-1))/block_len; + + s = (struct sum_struct *)malloc(sizeof(*s)); + if (!s) out_of_memory("generate_sums"); + + s->count = count; + s->remainder = remainder; + s->n = n; + s->flength = len; + + if (count==0) { + s->sums = NULL; + return s; + } + + if (verbose > 3) + fprintf(stderr,"count=%d rem=%d n=%d flength=%d\n", + s->count,s->remainder,s->n,(int)s->flength); + + s->sums = (struct sum_buf *)malloc(sizeof(s->sums[0])*s->count); + if (!s->sums) out_of_memory("generate_sums"); + + for (i=0;isums[i].sum1 = get_checksum1(buf,n1); + get_checksum2(buf,n1,s->sums[i].sum2); + + s->sums[i].offset = offset; + s->sums[i].len = n1; + s->sums[i].i = i; + + if (verbose > 3) + fprintf(stderr,"chunk[%d] offset=%d len=%d sum1=%08x\n", + i,(int)s->sums[i].offset,s->sums[i].len,s->sums[i].sum1); + + len -= n1; + buf += n1; + offset += n1; + } + + return s; +} + + +/* + receive the checksums for a buffer + */ +static struct sum_struct *receive_sums(int f) +{ + struct sum_struct *s; + int i; + off_t offset = 0; + int block_len; + + s = (struct sum_struct *)malloc(sizeof(*s)); + if (!s) out_of_memory("receive_sums"); + + s->count = read_int(f); + s->n = read_int(f); + s->remainder = read_int(f); + s->sums = NULL; + + if (verbose > 3) + fprintf(stderr,"count=%d n=%d rem=%d\n", + s->count,s->n,s->remainder); + + block_len = s->n; + + if (s->count == 0) + return(s); + + s->sums = (struct sum_buf *)malloc(sizeof(s->sums[0])*s->count); + if (!s->sums) out_of_memory("receive_sums"); + + for (i=0;icount;i++) { + s->sums[i].sum1 = read_int(f); + read_buf(f,s->sums[i].sum2,SUM_LENGTH); + + s->sums[i].offset = offset; + s->sums[i].i = i; + + if (i == s->count-1 && s->remainder != 0) { + s->sums[i].len = s->remainder; + } else { + s->sums[i].len = s->n; + } + offset += s->sums[i].len; + + if (verbose > 3) + fprintf(stderr,"chunk[%d] len=%d offset=%d sum1=%08x\n", + i,s->sums[i].len,(int)s->sums[i].offset,s->sums[i].sum1); + } + + s->flength = offset; + + return s; +} + + +static void set_perms(char *fname,struct file_struct *file,struct stat *st, + int report) +{ + int updated = 0; + struct stat st2; + + if (dry_run) return; + + if (!st) { + if (stat(fname,&st2) != 0) { + fprintf(stderr,"stat %s : %s\n",fname,strerror(errno)); + return; + } + st = &st2; + } + + if (preserve_times && st->st_mtime != file->modtime) { + updated = 1; + if (set_modtime(fname,file->modtime) != 0) { + fprintf(stderr,"failed to set times on %s : %s\n", + fname,strerror(errno)); + return; + } + } + +#ifdef HAVE_CHMOD + if (preserve_perms && st->st_mode != file->mode) { + updated = 1; + if (chmod(fname,file->mode) != 0) { + fprintf(stderr,"failed to set permissions on %s : %s\n", + fname,strerror(errno)); + return; + } + } +#endif + + if ((preserve_uid && st->st_uid != file->uid) || + (preserve_gid && st->st_gid != file->gid)) { + updated = 1; + if (chown(fname, + preserve_uid?file->uid:-1, + preserve_gid?file->gid:-1) != 0) { + if (verbose>1 || preserve_uid) + fprintf(stderr,"chown %s : %s\n",fname,strerror(errno)); + return; + } + } + + if (verbose > 1 && report) { + if (updated) + fprintf(am_server?stderr:stdout,"%s\n",fname); + else + fprintf(am_server?stderr:stdout,"%s is uptodate\n",fname); + } +} + + +void recv_generator(char *fname,struct file_list *flist,int i,int f_out) +{ + int fd; + struct stat st; + char *buf; + struct sum_struct *s; + char sum[SUM_LENGTH]; + int statret; + + if (verbose > 2) + fprintf(stderr,"recv_generator(%s)\n",fname); + + statret = lstat(fname,&st); + +#if SUPPORT_LINKS + if (preserve_links && S_ISLNK(flist->files[i].mode)) { + char lnk[MAXPATHLEN]; + int l; + if (statret == 0) { + l = readlink(fname,lnk,MAXPATHLEN-1); + if (l > 0) { + lnk[l] = 0; + if (strcmp(lnk,flist->files[i].link) == 0) { + if (verbose > 1) + fprintf(am_server?stderr:stdout,"%s is uptodate\n",fname); + return; + } + } + } + if (!dry_run) unlink(fname); + if (!dry_run && symlink(flist->files[i].link,fname) != 0) { + fprintf(stderr,"link %s -> %s : %s\n", + fname,flist->files[i].link,strerror(errno)); + } else { + if (verbose) + fprintf(am_server?stderr:stdout,"%s -> %s\n",fname,flist->files[i].link); + } + return; + } +#endif + +#ifdef HAVE_MKNOD + if (preserve_devices && + (S_ISCHR(flist->files[i].mode) || S_ISBLK(flist->files[i].mode))) { + if (statret != 0 || + st.st_mode != flist->files[i].mode || + st.st_rdev != flist->files[i].dev) { + if (!dry_run) unlink(fname); + if (verbose > 2) + fprintf(stderr,"mknod(%s,0%o,0x%x)\n", + fname,(int)flist->files[i].mode,(int)flist->files[i].dev); + if (!dry_run && + mknod(fname,flist->files[i].mode,flist->files[i].dev) != 0) { + fprintf(stderr,"mknod %s : %s\n",fname,strerror(errno)); + } else { + set_perms(fname,&flist->files[i],NULL,0); + if (verbose) + fprintf(am_server?stderr:stdout,"%s\n",fname); + } + } else { + set_perms(fname,&flist->files[i],&st,1); + } + return; + } +#endif + + if (!S_ISREG(flist->files[i].mode)) { + fprintf(stderr,"skipping non-regular file %s\n",fname); + return; + } + + if (statret == -1) { + if (errno == ENOENT) { + write_int(f_out,i); + if (!dry_run) send_sums(NULL,f_out); + } else { + if (verbose > 1) + fprintf(stderr,"recv_generator failed to open %s\n",fname); + } + return; + } + + if (!S_ISREG(st.st_mode)) { + fprintf(stderr,"%s : not a regular file\n",fname); + return; + } + + if (update_only && st.st_mtime >= flist->files[i].modtime) { + if (verbose > 1) + fprintf(stderr,"%s is newer\n",fname); + return; + } + + if (always_checksum && S_ISREG(st.st_mode)) { + file_checksum(fname,sum,st.st_size); + } + + if (st.st_size == flist->files[i].length && + ((!ignore_times && st.st_mtime == flist->files[i].modtime) || + (always_checksum && S_ISREG(st.st_mode) && + memcmp(sum,flist->files[i].sum,SUM_LENGTH) == 0))) { + set_perms(fname,&flist->files[i],&st,1); + return; + } + + if (dry_run) { + write_int(f_out,i); + return; + } + + /* open the file */ + fd = open(fname,O_RDONLY); + + if (fd == -1) { + fprintf(stderr,"failed to open %s : %s\n",fname,strerror(errno)); + return; + } + + if (st.st_size > 0) { + buf = map_file(fd,st.st_size); + if (!buf) { + fprintf(stderr,"mmap : %s\n",strerror(errno)); + close(fd); + return; + } + } else { + buf = NULL; + } + + if (verbose > 3) + fprintf(stderr,"mapped %s of size %d\n",fname,(int)st.st_size); + + s = generate_sums(buf,st.st_size,block_size); + + write_int(f_out,i); + send_sums(s,f_out); + write_flush(f_out); + + close(fd); + unmap_file(buf,st.st_size); + + free_sums(s); +} + + + +static void receive_data(int f_in,char *buf,int fd,char *fname) +{ + int i,n,remainder,len,count; + off_t offset = 0; + off_t offset2; + + count = read_int(f_in); + n = read_int(f_in); + remainder = read_int(f_in); + + for (i=read_int(f_in); i != 0; i=read_int(f_in)) { + if (i > 0) { + if (verbose > 3) + fprintf(stderr,"data recv %d at %d\n",i,(int)offset); + + if (read_write(f_in,fd,i) != i) { + fprintf(stderr,"write failed on %s : %s\n",fname,strerror(errno)); + exit(1); + } + offset += i; + } else { + i = -(i+1); + offset2 = i*n; + len = n; + if (i == count-1 && remainder != 0) + len = remainder; + + if (verbose > 3) + fprintf(stderr,"chunk[%d] of size %d at %d offset=%d\n", + i,len,(int)offset2,(int)offset); + + if (write(fd,buf+offset2,len) != len) { + fprintf(stderr,"write failed on %s : %s\n",fname,strerror(errno)); + exit(1); + } + offset += len; + } + } +} + + +static void delete_one(struct file_struct *f) +{ + if (!S_ISDIR(f->mode)) { + if (!dry_run && unlink(f->name) != 0) { + fprintf(stderr,"unlink %s : %s\n",f->name,strerror(errno)); + } else if (verbose) { + fprintf(stderr,"deleting %s\n",f->name); + } + } else { + if (!dry_run && rmdir(f->name) != 0) { + if (errno != ENOTEMPTY) + fprintf(stderr,"rmdir %s : %s\n",f->name,strerror(errno)); + } else if (verbose) { + fprintf(stderr,"deleting directory %s\n",f->name); + } + } +} + + +static void delete_files(struct file_list *flist) +{ + struct file_list *local_file_list; + char *dot="."; + int i; + + if (!(local_file_list = send_file_list(-1,recurse,1,&dot))) + return; + + for (i=local_file_list->count;i>=0;i--) { + if (!local_file_list->files[i].name) continue; + if (-1 == flist_find(flist,&local_file_list->files[i])) { + delete_one(&local_file_list->files[i]); + } + } +} + +static char *cleanup_fname = NULL; + +int sig_int(void) +{ + if (cleanup_fname) + unlink(cleanup_fname); + exit(1); +} + + +int recv_files(int f_in,struct file_list *flist,char *local_name) +{ + int fd1,fd2; + struct stat st; + char *fname; + char fnametmp[MAXPATHLEN]; + char *buf; + int i; + + if (verbose > 2) + fprintf(stderr,"recv_files(%d) starting\n",flist->count); + + if (recurse && delete_mode && !local_name && flist->count>0) { + delete_files(flist); + } + + while (1) + { + i = read_int(f_in); + if (i == -1) break; + + fname = flist->files[i].name; + + if (local_name) + fname = local_name; + + if (dry_run) { + if (!am_server && verbose) + printf("%s\n",fname); + continue; + } + + if (verbose > 2) + fprintf(stderr,"recv_files(%s)\n",fname); + + /* open the file */ + if ((fd1 = open(fname,O_RDONLY)) == -1 && + (fd1 = open(fname,O_RDONLY|O_CREAT,flist->files[i].mode)) == -1) { + fprintf(stderr,"recv_files failed to open %s\n",fname); + return -1; + } + + if (fstat(fd1,&st) != 0) { + fprintf(stderr,"fstat %s : %s\n",fname,strerror(errno)); + close(fd1); + return -1; + } + + if (!S_ISREG(st.st_mode)) { + fprintf(stderr,"%s : not a regular file\n",fname); + close(fd1); + return -1; + } + + if (st.st_size > 0) { + buf = map_file(fd1,st.st_size); + if (!buf) { + fprintf(stderr,"map_file failed\n"); + return -1; + } + } else { + buf = NULL; + } + + if (verbose > 2) + fprintf(stderr,"mapped %s of size %d\n",fname,(int)st.st_size); + + /* open tmp file */ + sprintf(fnametmp,"%s.XXXXXX",fname); + if (NULL == mktemp(fnametmp)) { + fprintf(stderr,"mktemp %s failed\n",fnametmp); + return -1; + } + fd2 = open(fnametmp,O_WRONLY|O_CREAT,st.st_mode); + if (fd2 == -1) { + fprintf(stderr,"open %s : %s\n",fnametmp,strerror(errno)); + return -1; + } + + cleanup_fname = fnametmp; + + if (!am_server && verbose) + printf("%s\n",fname); + + /* recv file data */ + receive_data(f_in,buf,fd2,fname); + + close(fd1); + close(fd2); + + if (verbose > 2) + fprintf(stderr,"renaming %s to %s\n",fnametmp,fname); + + if (make_backups) { + char fnamebak[MAXPATHLEN]; + sprintf(fnamebak,"%s%s",fname,backup_suffix); + if (rename(fname,fnamebak) != 0) { + fprintf(stderr,"rename %s %s : %s\n",fname,fnamebak,strerror(errno)); + exit(1); + } + } + + /* move tmp file over real file */ + if (rename(fnametmp,fname) != 0) { + fprintf(stderr,"rename %s -> %s : %s\n", + fnametmp,fname,strerror(errno)); + } + + cleanup_fname = NULL; + + unmap_file(buf,st.st_size); + + set_perms(fname,&flist->files[i],NULL,0); + } + + if (verbose > 2) + fprintf(stderr,"recv_files finished\n"); + + return 0; +} + + + +off_t send_files(struct file_list *flist,int f_out,int f_in) +{ + int fd; + struct sum_struct *s; + char *buf; + struct stat st; + char fname[MAXPATHLEN]; + off_t total=0; + int i; + + if (verbose > 2) + fprintf(stderr,"send_files starting\n"); + + while (1) + { + i = read_int(f_in); + if (i == -1) break; + + fname[0] = 0; + if (flist->files[i].dir) { + strcpy(fname,flist->files[i].dir); + strcat(fname,"/"); + } + strcat(fname,flist->files[i].name); + + if (verbose > 2) + fprintf(stderr,"send_files(%d,%s)\n",i,fname); + + if (dry_run) { + if (!am_server && verbose) + printf("%s\n",fname); + write_int(f_out,i); + continue; + } + + s = receive_sums(f_in); + if (!s) { + fprintf(stderr,"receive_sums failed\n"); + return -1; + } + + fd = open(fname,O_RDONLY); + if (fd == -1) { + fprintf(stderr,"send_files failed to open %s: %s\n", + fname,strerror(errno)); + continue; + } + + /* map the local file */ + if (fstat(fd,&st) != 0) { + fprintf(stderr,"fstat failed : %s\n",strerror(errno)); + return -1; + } + + if (st.st_size > 0) { + buf = map_file(fd,st.st_size); + if (!buf) { + fprintf(stderr,"map_file failed : %s\n",strerror(errno)); + return -1; + } + } else { + buf = NULL; + } + + if (verbose > 2) + fprintf(stderr,"send_files mapped %s of size %d\n", + fname,(int)st.st_size); + + write_int(f_out,i); + + write_int(f_out,s->count); + write_int(f_out,s->n); + write_int(f_out,s->remainder); + + if (verbose > 2) + fprintf(stderr,"calling match_sums %s\n",fname); + + if (!am_server && verbose) + printf("%s\n",fname); + + match_sums(f_out,s,buf,st.st_size); + write_flush(f_out); + + unmap_file(buf,st.st_size); + close(fd); + + free_sums(s); + + if (verbose > 2) + fprintf(stderr,"sender finished %s\n",fname); + + total += st.st_size; + } + + match_report(); + + write_int(f_out,-1); + write_flush(f_out); + + return total; +} + + + +void generate_files(int f,struct file_list *flist,char *local_name) +{ + int i; + + if (verbose > 2) + fprintf(stderr,"generator starting pid=%d count=%d\n", + (int)getpid(),flist->count); + + for (i = 0; i < flist->count; i++) { + if (!flist->files[i].name) continue; + if (S_ISDIR(flist->files[i].mode)) { + if (dry_run) continue; + if (mkdir(flist->files[i].name,flist->files[i].mode) != 0 && + errno != EEXIST) { + fprintf(stderr,"mkdir %s : %s\n", + flist->files[i].name,strerror(errno)); + } + continue; + } + recv_generator(local_name?local_name:flist->files[i].name, + flist,i,f); + } + write_int(f,-1); + write_flush(f); + if (verbose > 2) + fprintf(stderr,"generator wrote %d\n",write_total()); +} diff --git a/rsync.h b/rsync.h new file mode 100644 index 00000000..87b96730 --- /dev/null +++ b/rsync.h @@ -0,0 +1,228 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#define BLOCK_SIZE 700 +#define RSYNC_RSH_ENV "RSYNC_RSH" +#define RSYNC_RSH "rsh" +#define RSYNC_NAME "rsync" +#define BACKUP_SUFFIX "~" + +/* update this if you make incompatible changes */ +#define PROTOCOL_VERSION 9 + +/* block size to write files in */ +#define WRITE_BLOCK_SIZE (32*1024) + +#include "config.h" + +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#ifdef HAVE_SYS_PARAM_H +#include +#endif + +#ifdef STDC_HEADERS +# include +# include +#endif + +#ifdef HAVE_COMPAT_H +#include +#endif + +#ifdef HAVE_MALLOC_H +#include +#endif + +#ifdef TIME_WITH_SYS_TIME +#include +#include +#else +#ifdef HAVE_SYS_TIME_H +#include +#else +#include +#endif +#endif + +#ifdef HAVE_FCNTL_H +#include +#else +#ifdef HAVE_SYS_FCNTL_H +#include +#endif +#endif + +#include + +#include +#ifdef HAVE_SYS_WAIT_H +#include +#endif +#ifdef HAVE_CTYPE_H +#include +#endif +#ifdef HAVE_GRP_H +#include +#endif +#include + +#include +#ifdef HAVE_UTIME_H +#include +#endif + +#ifdef HAVE_FNMATCH +#include +#else +#include "lib/fnmatch.h" +#endif + +#ifdef HAVE_GETOPT_LONG +#include +#else +#include "lib/getopt.h" +#endif + + +#ifndef S_ISLNK +#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK) +#endif + +#ifndef uchar +#define uchar unsigned char +#endif + +#ifndef int32 +#if (SIZEOF_INT == 4) +#define int32 int +#elif (SIZEOF_LONG == 4) +#define int32 long +#elif (SIZEOF_SHORT == 4) +#define int32 short +#endif +#endif + +#ifndef uint32 +#define uint32 unsigned int32 +#endif + + +#ifndef MIN +#define MIN(a,b) ((a)<(b)?(a):(b)) +#endif + +#ifndef MAX +#define MAX(a,b) ((a)>(b)?(a):(b)) +#endif + +/* the length of the md4 checksum */ +#define SUM_LENGTH 16 + +#ifndef MAXPATHLEN +#define MAXPATHLEN 1024 +#endif + +struct file_struct { + time_t modtime; + off_t length; + mode_t mode; + dev_t dev; + uid_t uid; + gid_t gid; + char *name; + char *dir; + char *link; + char sum[SUM_LENGTH]; +}; + +struct file_list { + int count; + int malloced; + struct file_struct *files; +}; + +struct sum_buf { + off_t offset; /* offset in file of this chunk */ + int len; /* length of chunk of file */ + int i; /* index of this chunk */ + uint32 sum1; /* simple checksum */ + char sum2[SUM_LENGTH]; /* md4 checksum */ +}; + +struct sum_struct { + off_t flength; /* total file length */ + int count; /* how many chunks */ + int remainder; /* flength % block_length */ + int n; /* block_length */ + struct sum_buf *sums; /* points to info for each chunk */ +}; + + +#include "byteorder.h" +#include "version.h" +#include "proto.h" +#include "md4.h" + +#if !HAVE_STRERROR +extern char *sys_errlist[]; +#define strerror(i) sys_errlist[i] +#endif + +#ifndef HAVE_STRCHR +# define strchr index +# define strrchr rindex +#endif + +#if HAVE_DIRENT_H +# include +#else +# define dirent direct +# if HAVE_SYS_NDIR_H +# include +# endif +# if HAVE_SYS_DIR_H +# include +# endif +# if HAVE_NDIR_H +# include +# endif +#endif + +#ifndef HAVE_ERRNO_DECL +extern int errno; +#endif + +#ifndef HAVE_BCOPY +#define bcopy(src,dest,n) memcpy(dest,src,n) +#endif + +#ifndef HAVE_BZERO +#define bzero(buf,n) memset(buf,0,n) +#endif + +#define SUPPORT_LINKS (HAVE_READLINK && defined(S_ISLNK)) + +#if !SUPPORT_LINKS +#define lstat stat +#endif diff --git a/tech_report.tex b/tech_report.tex new file mode 100644 index 00000000..e1ea5c72 --- /dev/null +++ b/tech_report.tex @@ -0,0 +1,310 @@ +\documentclass[a4paper]{article} +\begin{document} + + +\title{The rsync algorithm} + +\author{Andrew Tridgell \quad\quad Paul Mackerras\\ +Department of Computer Science \\ +Australian National University \\ +Canberra, ACT 0200, Australia} + +\maketitle + +\begin{abstract} + This report presents an algorithm for updating a file on one machine + to be identical to a file on another machine. We assume that the + two machines are connected by a low-bandwidth high-latency + bi-directional communications link. The algorithm identifies parts + of the source file which are identical to some part of the + destination file, and only sends those parts which cannot be matched + in this way. Effectively, the algorithm computes a set of + differences without having both files on the same machine. The + algorithm works best when the files are similar, but will also + function correctly and reasonably efficiently when the files are + quite different. +\end{abstract} + +\section{The problem} + +Imagine you have two files, $A$ and $B$, and you wish to update $B$ to be +the same as $A$. The obvious method is to copy $A$ onto $B$. + +Now imagine that the two files are on machines connected by a slow +communications link, for example a dial up IP link. If $A$ is large, +copying $A$ onto $B$ will be slow. To make it faster you could +compress $A$ before sending it, but that will usually only gain a +factor of 2 to 4. + +Now assume that $A$ and $B$ are quite similar, perhaps both derived +from the same original file. To really speed things up you would need +to take advantage of this similarity. A common method is to send just +the differences between $A$ and $B$ down the link and then use this +list of differences to reconstruct the file. + +The problem is that the normal methods for creating a set of +differences between two files rely on being able to read both files. +Thus they require that both files are available beforehand at one end +of the link. If they are not both available on the same machine, +these algorithms cannot be used (once you had copied the file over, +you wouldn't need the differences). This is the problem that rsync +addresses. + +The rsync algorithm efficiently computes which parts of a source file +match some part of an existing destination file. These parts need not +be sent across the link; all that is needed is a reference to the part +of the destination file. Only parts of the source file which are not +matched in this way need to be sent verbatim. The receiver can then +construct a copy of the source file using the references to parts of +the existing destination file and the verbatim material. + +Trivially, the data sent to the receiver can be compressed using any +of a range of common compression algorithms, for further speed +improvements. + +\section{The rsync algorithm} + +Suppose we have two general purpose computers $\alpha$ and $\beta$. +Computer $\alpha$ has access to a file $A$ and $\beta$ has access to +file $B$, where $A$ and $B$ are ``similar''. There is a slow +communications link between $\alpha$ and $\beta$. + +The rsync algorithm consists of the following steps: + +\begin{enumerate} +\item $\beta$ splits the file $B$ into a series of non-overlapping + fixed-sized blocks of size S bytes\footnote{We have found that + values of S between 500 and 1000 are quite good for most purposes}. + The last block may be shorter than $S$ bytes. + +\item For each of these blocks $\beta$ calculates two checksums: + a weak ``rolling'' 32-bit checksum (described below) and a strong + 128-bit MD4 checksum. + +\item $\beta$ sends these checksums to $\alpha$. + +\item $\alpha$ searches through $A$ to find all blocks of length $S$ + bytes (at any offset, not just multiples of $S$) that have the same + weak and strong checksum as one of the blocks of $B$. This can be + done in a single pass very quickly using a special property of the + rolling checksum described below. + +\item $\alpha$ sends $\beta$ a sequence of instructions for + constructing a copy of $A$. Each instruction is either a reference + to a block of $B$, or literal data. Literal data is sent only for + those sections of $A$ which did not match any of the blocks of $B$. +\end{enumerate} + +The end result is that $\beta$ gets a copy of $A$, but only the pieces +of $A$ that are not found in $B$ (plus a small amount of data for +checksums and block indexes) are sent over the link. The algorithm +also only requires one round trip, which minimises the impact of the +link latency. + +The most important details of the algorithm are the rolling checksum +and the associated multi-alternate search mechanism which allows the +all-offsets checksum search to proceed very quickly. These will be +discussed in greater detail below. + +\section{Rolling checksum} + +The weak rolling checksum used in the rsync algorithm needs to have +the property that it is very cheap to calculate the checksum of a +buffer $X_2 .. X_{n+1}$ given the checksum of buffer $X_1 .. X_n$ and +the values of the bytes $X_1$ and $X_{n+1}$. + +The weak checksum algorithm we used in our implementation was inspired +by Mark Adler's adler-32 checksum. Our checksum is defined by +$$ a(k,l) = (\sum_{i=k}^l X_i) \bmod M $$ +$$ b(k,l) = (\sum_{i=k}^l (l-i+1)X_i) \bmod M $$ +$$ s(k,l) = a(k,l) + 2^{16} b(k,l) $$ + +where $s(k,l)$ is the rolling checksum of the bytes $X_k \ldots X_l$. +For simplicity and speed, we use $M = 2^{16}$. + +The important property of this checksum is that successive values can +be computed very efficiently using the recurrence relations + +$$ a(k+1,l+1) = (a(k,l) - X_k + X_{l+1}) \bmod M $$ +$$ b(k+1,l+1) = (b(k,l) - (l-k+1) X_k + a(k+1,l+1)) \bmod M $$ + +Thus the checksum can be calculated for blocks of length S at all +possible offsets within a file in a ``rolling'' fashion, with very +little computation at each point. + +Despite its simplicity, this checksum was found to be quite adequate as +a first level check for a match of two file blocks. We have found in +practice that the probability of this checksum matching when the +blocks are not equal is quite low. This is important because the much +more expensive strong checksum must be calculated for each block where +the weak checksum matches. + +\section{Checksum searching} + +Once $\alpha$ has received the list of checksums of the blocks of $B$, +it must search $A$ for any blocks at any offset that match the +checksum of some block of $B$. The basic strategy is to compute the +32-bit rolling checksum for a block of length $S$ starting at each +byte of $A$ in turn, and for each checksum, search the list for a +match. To do this our implementation uses a +simple 3 level searching scheme. + +The first level uses a 16-bit hash of the 32-bit rolling checksum and +a $2^{16}$ entry hash table. The list of checksum values (i.e., the +checksums from the blocks of $B$) is sorted according to the 16-bit +hash of the 32-bit rolling checksum. Each entry in the hash table +points to the first element of the list for that hash value, or +contains a null value if no element of the list has that hash value. + +At each offset in the file the 32-bit rolling checksum and its 16-bit +hash are calculated. If the hash table entry for that hash value is +not a null value, the second level check is invoked. + +The second level check involves scanning the sorted checksum list +starting with the entry pointed to by the hash table entry, looking +for an entry whose 32-bit rolling checksum matches the current value. +The scan terminates when it reaches an entry whose 16-bit hash +differs. If this search finds a match, the third level check is +invoked. + +The third level check involves calculating the strong checksum for the +current offset in the file and comparing it with the strong checksum +value in the current list entry. If the two strong checksums match, +we assume that we have found a block of $A$ which matches a block of +$B$. In fact the blocks could be different, but the probability of +this is microscopic, and in practice this is a reasonable assumption. + +When a match is found, $\alpha$ sends $\beta$ the data in $A$ between +the current file offset and the end of the previous match, followed by +the index of the block in $B$ that matched. This data is sent +immediately a match is found, which allows us to overlap the +communication with further computation. + +If no match is found at a given offset in the file, the rolling +checksum is updated to the next offset and the search proceeds. If a +match is found, the search is restarted at the end of the matched +block. This strategy saves a considerable amount of computation for +the common case where the two files are nearly identical. In +addition, it would be a simple matter to encode the block indexes as +runs, for the common case where a portion of $A$ matches a series of +blocks of $B$ in order. + +\section{Pipelining} + +The above sections describe the process for constructing a copy of one +file on a remote system. If we have a several files to copy, we can +gain a considerable latency advantage by pipelining the process. + +This involves $\beta$ initiating two independent processes. One of the +processes generates and sends the checksums to $\alpha$ while the +other receives the difference information from $\alpha$ and +reconstructs the files. + +If the communications link is buffered then these two processes can +proceed independently and the link should be kept fully utilised in +both directions for most of the time. + +\section{Results} + +To test the algorithm, tar files were created of the Linux kernel +sources for two versions of the kernel. The two kernel versions were +1.99.10 and 2.0.0. These tar files are approximately 24MB in size and +are separated by 5 released patch levels. + +Out of the 2441 files in the 1.99.10 release, 291 files had changed in +the 2.0.0 release, 19 files had been removed and 25 files had been +added. + +A ``diff'' of the two tar files using the standard GNU diff utility +produced over 32 thousand lines of output totalling 2.1 MB. + +The following table shows the results for rsync between the two files +with a varying block size.\footnote{All the tests in this section were + carried out using rsync version 0.5} + +\vspace*{5mm} +\begin{tabular}{|l|l|l|l|l|l|l|} \hline +{\bf block} & {\bf matches} & {\bf tag} & {\bf false} & {\bf data} & {\bf written} & {\bf read} \\ +{\bf size} & & {\bf hits} & {\bf alarms} & & & \\ \hline \hline + +300 & 64247 & 3817434 & 948 & 5312200 & 5629158 & 1632284 \\ \hline +500 & 46989 & 620013 & 64 & 1091900 & 1283906 & 979384 \\ \hline +700 & 33255 & 571970 & 22 & 1307800 & 1444346 & 699564 \\ \hline +900 & 25686 & 525058 & 24 & 1469500 & 1575438 & 544124 \\ \hline +1100 & 20848 & 496844 & 21 & 1654500 & 1740838 & 445204 \\ \hline +\end{tabular} +\vspace*{5mm} + +In each case, the CPU time taken was less than the +time it takes to run ``diff'' on the two files.\footnote{The wall + clock time was approximately 2 minutes per run on a 50 MHz SPARC 10 + running SunOS, using rsh over loopback for communication. GNU diff + took about 4 minutes.} + +The columns in the table are as follows: + +\begin{description} +\item [block size] The size in bytes of the checksummed blocks. +\item [matches] The number of times a block of $B$ was found in $A$. +\item [tag hits] The number of times the 16 bit hash of the rolling + checksum matched a hash of one of the checksums from $B$. +\item [false alarms] The number of times the 32 bit rolling checksum + matched but the strong checksum didn't. +\item [data] The amount of file data transferred verbatim, in bytes. +\item [written] The total number of bytes written by $\alpha$ + including protocol overheads. This is almost all file data. +\item [read] The total number of bytes read by $\alpha$ including + protocol overheads. This is almost all checksum information. +\end{description} + +The results demonstrate that for block sizes above 300 bytes, only a +small fraction (around 5\%) of the file was transferred. The amount +transferred was also considerably less than the size of the diff file +that would have been transferred if the diff/patch method of updating +a remote file was used. + +The checksums themselves took up a considerable amount of space, +although much less than the size of the data transferred in each +case. Each pair of checksums consumes 20 bytes: 4 bytes for the +rolling checksum plus 16 bytes for the 128-bit MD4 checksum. + +The number of false alarms was less than $1/1000$ of the number of +true matches, indicating that the 32 bit rolling checksum is quite +good at screening out false matches. + +The number of tag hits indicates that the second level of the +checksum search algorithm was invoked about once every 50 +characters. This is quite high because the total number of blocks in +the file is a large fraction of the size of the tag hash table. For +smaller files we would expect the tag hit rate to be much closer to +the number of matches. For extremely large files, we should probably +increase the size of the hash table. + +The next table shows similar results for a much smaller set of files. +In this case the files were not packed into a tar file first. Rather, +rsync was invoked with an option to recursively descend the directory +tree. The files used were from two source releases of another software +package called Samba. The total source code size is 1.7 MB and the +diff between the two releases is 4155 lines long totalling 120 kB. + +\vspace*{5mm} +\begin{tabular}{|l|l|l|l|l|l|l|} \hline +{\bf block} & {\bf matches} & {\bf tag} & {\bf false} & {\bf data} & {\bf written} & {\bf read} \\ +{\bf size} & & {\bf hits} & {\bf alarms} & & & \\ \hline \hline + +300 & 3727 & 3899 & 0 & 129775 & 153999 & 83948 \\ \hline +500 & 2158 & 2325 & 0 & 171574 & 189330 & 50908 \\ \hline +700 & 1517 & 1649 & 0 & 195024 & 210144 & 36828 \\ \hline +900 & 1156 & 1281 & 0 & 222847 & 236471 & 29048 \\ \hline +1100 & 921 & 1049 & 0 & 250073 & 262725 & 23988 \\ \hline +\end{tabular} +\vspace*{5mm} + + +\section{Availability} + +An implementation of rsync which provides a convenient interface +similar to the common UNIX command rcp has been written and is +available for download from ftp://samba.anu.edu.au/pub/rsync. + +\end{document} diff --git a/util.c b/util.c new file mode 100644 index 00000000..ad4dec95 --- /dev/null +++ b/util.c @@ -0,0 +1,229 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +/* + Utilities used in rsync + + tridge, June 1996 + */ +#include "rsync.h" + +static int total_written = 0; +static int total_read = 0; + +extern int verbose; + +int write_total(void) +{ + return total_written; +} + +int read_total(void) +{ + return total_read; +} + +void write_int(int f,int x) +{ + char b[4]; + SIVAL(b,0,x); + if (write(f,b,4) != 4) { + fprintf(stderr,"write_int failed : %s\n",strerror(errno)); + exit(1); + } + total_written += 4; +} + +void write_buf(int f,char *buf,int len) +{ + if (write(f,buf,len) != len) { + fprintf(stderr,"write_buf failed : %s\n",strerror(errno)); + exit(1); + } + total_written += len; +} + +void write_flush(int f) +{ +} + + +int readfd(int fd,char *buffer,int N) +{ + int ret; + int total=0; + + while (total < N) + { + ret = read(fd,buffer + total,N - total); + + if (ret <= 0) + return total; + total += ret; + } + return total; +} + + +int read_int(int f) +{ + char b[4]; + if (readfd(f,b,4) != 4) { + if (verbose > 1) + fprintf(stderr,"Error reading %d bytes : %s\n",4,strerror(errno)); + exit(1); + } + total_read += 4; + return IVAL(b,0); +} + +void read_buf(int f,char *buf,int len) +{ + if (readfd(f,buf,len) != len) { + if (verbose > 1) + fprintf(stderr,"Error reading %d bytes : %s\n",len,strerror(errno)); + exit(1); + } + total_read += len; +} + + +char *map_file(int fd,off_t len) +{ + char *ret = (char *)mmap(NULL,len,PROT_READ,MAP_SHARED,fd,0); + return ret; +} + +void unmap_file(char *buf,off_t len) +{ + if (len > 0) + munmap(buf,len); +} + + +int read_write(int fd_in,int fd_out,int size) +{ + static char *buf=NULL; + static int bufsize = WRITE_BLOCK_SIZE; + int total=0; + + if (!buf) { + buf = (char *)malloc(bufsize); + if (!buf) out_of_memory("read_write"); + } + + while (total < size) { + int n = MIN(size-total,bufsize); + read_buf(fd_in,buf,n); + if (write(fd_out,buf,n) != n) + return total; + total += n; + } + return total; +} + + +/* this is taken from CVS */ +int piped_child(char **command,int *f_in,int *f_out) +{ + int pid; + int to_child_pipe[2]; + int from_child_pipe[2]; + + if (pipe(to_child_pipe) < 0 || + pipe(from_child_pipe) < 0) { + fprintf(stderr,"pipe: %s\n",strerror(errno)); + exit(1); + } + + + pid = fork(); + if (pid < 0) { + fprintf(stderr,"fork: %s\n",strerror(errno)); + exit(1); + } + + if (pid == 0) + { + if (dup2(to_child_pipe[0], STDIN_FILENO) < 0 || + close(to_child_pipe[1]) < 0 || + close(from_child_pipe[0]) < 0 || + dup2(from_child_pipe[1], STDOUT_FILENO) < 0) { + fprintf(stderr,"Failed to dup/close : %s\n",strerror(errno)); + exit(1); + } + execvp(command[0], command); + fprintf(stderr,"Failed to exec %s : %s\n", + command[0],strerror(errno)); + exit(1); + } + + if (close(from_child_pipe[1]) < 0 || + close(to_child_pipe[0]) < 0) { + fprintf(stderr,"Failed to close : %s\n",strerror(errno)); + exit(1); + } + + *f_in = from_child_pipe[0]; + *f_out = to_child_pipe[1]; + + return pid; +} + + +void out_of_memory(char *str) +{ + fprintf(stderr,"out of memory in %s\n",str); + exit(1); +} + + +#ifndef HAVE_STRDUP + char *strdup(char *s) +{ + int l = strlen(s) + 1; + char *ret = (char *)malloc(l); + if (ret) + strcpy(ret,s); + return ret; +} +#endif + + +int set_modtime(char *fname,time_t modtime) +{ +#ifdef HAVE_UTIME_H + struct utimbuf tbuf; + tbuf.actime = time(NULL); + tbuf.modtime = modtime; + return utime(fname,&tbuf); +#elif defined(HAVE_UTIME) + time_t t[2]; + t[0] = time(NULL); + t[1] = modtime; + return utime(fname,t); +#else + struct timeval t[2]; + t[0].tv_sec = time(NULL); + t[0].tv_usec = 0; + t[1].tv_sec = modtime; + t[1].tv_usec = 0; + return utimes(fname,t); +#endif +} diff --git a/version.h b/version.h new file mode 100644 index 00000000..02927cfe --- /dev/null +++ b/version.h @@ -0,0 +1 @@ +#define VERSION "0.9" -- 2.34.1