From: Andrew Tridgell Date: Sat, 22 Jun 1996 05:04:20 +0000 (+0000) Subject: Initial revision X-Git-Url: https://mattmccutchen.net/rsync/rsync.git/commitdiff_plain/c627d61324e9dcd5df833ee6236dd10415f5bac4 Initial revision --- c627d61324e9dcd5df833ee6236dd10415f5bac4 diff --git a/.cvsignore b/.cvsignore new file mode 100644 index 00000000..5dfdd137 --- /dev/null +++ b/.cvsignore @@ -0,0 +1,50 @@ +Makefile +a +b +config.cache +config.h +config.log +config.status +dist.tar.gz +rsync +rsync-0.1 +rsync-0.1 +rsync-0.1 +rsync-0.1.tar.gz +rsync-0.2 +rsync-0.2 +rsync-0.2.tar.gz +rsync-0.3 +rsync-0.3 +rsync-0.3.tar.gz +rsync-0.4 +rsync-0.4 +rsync-0.4.tar.gz +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5.tar.gz +rsync-0.6 +rsync-0.7 +rsync-0.7 +rsync-0.8 +rsync-0.8 +rsync-0.8 +rsync-0.8 +rsync-0.8.tar.gz +rsync-0.9 +rsync-0.9.tar.gz +rsync-ERSION +rsync.aux +rsync.dvi +rsync.log +tech_report.aux +tech_report.dvi +tech_report.log +tech_report.ps +test diff --git a/.ignore b/.ignore new file mode 100644 index 00000000..dd4c59c1 --- /dev/null +++ b/.ignore @@ -0,0 +1,19 @@ +.#* +*.log +Makefile +config.h +*.o +CVS +.ignore +.cvsignore +*~ +rsync +config.status +config.cache +TAGS +config.log +test +*.gz +rsync-* +*.dvi +*.aux diff --git a/COPYING b/COPYING new file mode 100644 index 00000000..a43ea212 --- /dev/null +++ b/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 00000000..9402dc8c --- /dev/null +++ b/Makefile.in @@ -0,0 +1,49 @@ +# Makefile for rsync. This is processed by configure to produce the final +# Makefile + +INSTALL_BIN=@prefix@/bin +INSTALL_MAN=@prefix@/man + +CCOPTFLAGS = -O + +LIBS=@LIBS@ +CC=@CC@ $(CCOPTFLAGS) + +INSTALLCMD=@INSTALL@ + + +SRC=@srcdir@ +SHELL=/bin/sh + + +.SUFFIXES: +.SUFFIXES: .c .o + +LIBOBJ=lib/getopt.o lib/fnmatch.o +OBJS=rsync.o exclude.o util.o md4.o main.o checksum.o match.o flist.o $(LIBOBJ) + +.c.o: + $(CC) $(CFLAGS) -c $*.c -o $*.o + +all: rsync + +install: all + ${INSTALLCMD} -m 755 rsync ${INSTALL_BIN} + ${INSTALLCMD} -m 644 rsync.1 ${INSTALL_MAN}/man1 + +rsync: $(OBJS) + $(CC) $(CFLAGS) -o rsync $(OBJS) $(LIBS) + +proto: + cat *.c | awk -f mkproto.awk > proto.h + +clean: + rm -f *~ $(OBJS) rsync config.cache config.log config.status + +dist: + tar --exclude-from .ignore -czf dist.tar.gz . + -mkdir rsync-$(VERSION) + (cd rsync-$(VERSION) ; tar xzf ../dist.tar.gz) + tar -czf rsync-$(VERSION).tar.gz rsync-$(VERSION) + rm -f dist.tar.gz + echo rsync-$(VERSION) >> .cvsignore diff --git a/README b/README new file mode 100644 index 00000000..1440c343 --- /dev/null +++ b/README @@ -0,0 +1,82 @@ +WHAT IS RSYNC? +-------------- + +rsync is a replacement for rcp that has many more features. + +rsyns uses the "rsync algorithm" which provides a very fast method for +bringing remote files into sync. It does this by sending just the +differences in the files across the link, without requiring that both +sets of files are present at one of the ends of the link beforehand. +At first glance this may seem impossible because the calculation of +diffs between two files normally requires local access to both +files. + +A technical report describing the rsync algorithm is included with +this package. + + +USAGE +----- + +Basically you use rsync just like rcp, but rsync has many additional options. + +Here is a brief description of available options: + +-v, --verbose increase verbosity +-c, --checksum always checksum +-a, --archive archive mode (same as -rlptDog) +-r, --recursive recurse into directories +-b, --backup make backups (default ~ extension) +-u, --update update only (don't overwrite newer files) +-l, --links preserve soft links +-p, --perms preserve permissions +-o, --owner preserve owner (root only) +-g, --group preserve group +-D, --devices preserve devices (root only) +-t, --times preserve times +-n, --dry-run show what would have been transferred +-x, --one-file-system don't cross filesystem boundaries +-B, --block-size SIZE checksum blocking size +-e, --rsh COMMAND specify rsh replacement + --rsync-path PATH specify path to rsync on the remote machine +-C, --cvs-exclude auto ignore files in the same way CVS does + --delete delete files that don't exist on the sending side +-I, --ignore-times don't exclude files that match length and time + --exclude FILE exclude file FILE + --exclude-from FILE exclude files listed in FILE + --suffix SUFFIX override backup suffix + --version print version number + + + +SETUP +----- + +Rsync uses rsh or ssh for communication. It does not need to be setuid +and requires no special privilages for installation. It does not +require a inetd entry or a daemon. You must, however, have a working +rsh or ssh system. Using ssh is recommended for its security and +compression features. + +To install rsync, first run the "configure" script. This will create a +Makefile and config.h appropriate for your system. Then type +"make". + +Once built put a copy of rsync in your search path on the local and +remote systems (or use "make install"). That's it! + + +COPYRIGHT +--------- + +Rsync was written by Andrew Tridgell and Paul Mackerras, and is +available under the GPL. + +Andrew.Tridgell@anu.edu.au +paulus@cs.anu.edu.au + + +AVAILABILITY +------------ + +The main ftp site for rsync is ftp://samba.anu.edu.au/pub/rsync diff --git a/byteorder.h b/byteorder.h new file mode 100644 index 00000000..5e0a171b --- /dev/null +++ b/byteorder.h @@ -0,0 +1,54 @@ +/* + simple byteorder handling + Copyright (C) Andrew Tridgell 1992-1995 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#undef CAREFUL_ALIGNMENT + +/* we know that the x86 can handle misalignment and has the "right" + byteorder */ +#ifdef __i386__ +#define CAREFUL_ALIGNMENT 0 +#endif + +#ifndef CAREFUL_ALIGNMENT +#define CAREFUL_ALIGNMENT 1 +#endif + +#define CVAL(buf,pos) (((unsigned char *)(buf))[pos]) +#define PVAL(buf,pos) ((unsigned)CVAL(buf,pos)) +#define SCVAL(buf,pos,val) (CVAL(buf,pos) = (val)) + + +#if CAREFUL_ALIGNMENT +#define SVAL(buf,pos) (PVAL(buf,pos)|PVAL(buf,(pos)+1)<<8) +#define IVAL(buf,pos) (SVAL(buf,pos)|SVAL(buf,(pos)+2)<<16) +#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8) +#define SIVALX(buf,pos,val) (SSVALX(buf,pos,val&0xFFFF),SSVALX(buf,pos+2,val>>16)) +#define SIVAL(buf,pos,val) SIVALX((buf),(pos),((uint32)(val))) +#else +/* this handles things for architectures like the 386 that can handle + alignment errors */ +/* + WARNING: This section is dependent on the length of int32 + being correct. set CAREFUL_ALIGNMENT if it is not. +*/ +#define IVAL(buf,pos) (*(uint32 *)((char *)(buf) + (pos))) +#define SIVAL(buf,pos,val) IVAL(buf,pos)=((uint32)(val)) +#endif + + diff --git a/checksum.c b/checksum.c new file mode 100644 index 00000000..cdd554fd --- /dev/null +++ b/checksum.c @@ -0,0 +1,78 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "rsync.h" + + +/* + a simple 32 bit checksum that can be upadted from either end + (inspired by Mark Adler's Adler-32 checksum) + */ +uint32 get_checksum1(char *buf,int len) +{ + int i; + uint32 s1, s2; + + s1 = s2 = 0; + for (i = 0; i < len; i++) { + s1 += buf[i]; + s2 += s1; + } + return (s1 & 0xffff) + (s2 << 16); +} + + +void get_checksum2(char *buf,int len,char *sum) +{ + char buf2[64]; + int i; + MDstruct MD; + + MDbegin(&MD); + for(i = 0; i + 64 <= len; i += 64) { + bcopy(buf+i,buf2,64); + MDupdate(&MD, buf2, 512); + } + bcopy(buf+i,buf2,len-i); + MDupdate(&MD, buf2, (len-i)*8); + SIVAL(sum,0,MD.buffer[0]); + SIVAL(sum,4,MD.buffer[1]); + SIVAL(sum,8,MD.buffer[2]); + SIVAL(sum,12,MD.buffer[3]); +} + +void file_checksum(char *fname,char *sum,off_t size) +{ + char *buf; + int fd; + bzero(sum,SUM_LENGTH); + + fd = open(fname,O_RDONLY); + if (fd == -1) return; + + buf = map_file(fd,size); + if (!buf) { + close(fd); + return; + } + + get_checksum2(buf,size,sum); + close(fd); + unmap_file(buf,size); +} diff --git a/configure.in b/configure.in new file mode 100644 index 00000000..9a04b264 --- /dev/null +++ b/configure.in @@ -0,0 +1,48 @@ +dnl Process this file with autoconf to produce a configure script. +AC_INIT(byteorder.h) +AC_CONFIG_HEADER(config.h) + +dnl Checks for programs. +AC_PROG_CC +AC_PROG_INSTALL +AC_SUBST(SHELL) + +AC_HEADER_DIRENT +AC_HEADER_STDC +AC_HEADER_TIME +AC_HEADER_SYS_WAIT +AC_CHECK_HEADERS(sys/fcntl.h fcntl.h sys/time.h unistd.h utime.h grp.h) +AC_CHECK_HEADERS(compat.h sys/param.h ctype.h sys/wait.h) + +AC_CHECK_SIZEOF(int) +AC_CHECK_SIZEOF(long) +AC_CHECK_SIZEOF(short) + +AC_C_INLINE + +AC_TYPE_SIGNAL +AC_TYPE_UID_T +AC_TYPE_MODE_T +AC_TYPE_OFF_T +AC_TYPE_SIZE_T +AC_TYPE_PID_T +AC_STRUCT_ST_RDEV + +echo -n "checking for errno in errno.h... " +AC_TRY_COMPILE([#include ],[int i = errno], +echo yes; AC_DEFINE(HAVE_ERRNO_DECL), +echo no) + +AC_FUNC_MEMCMP +AC_FUNC_MMAP +AC_FUNC_UTIME_NULL +AC_CHECK_FUNCS(waitpid strtok pipe getcwd mkdir strdup strerror chown chmod mknod) +AC_CHECK_FUNCS(fchmod fstat strchr bcopy bzero readlink utime utimes getopt_long) + +echo -n "checking for working fnmatch... " +AC_TRY_RUN([#include +main() { exit(fnmatch("*.o", "x.o", 0) == 0? 0: 1); }], +echo yes;AC_DEFINE(HAVE_FNMATCH), +echo no) + +AC_OUTPUT(Makefile) diff --git a/exclude.c b/exclude.c new file mode 100644 index 00000000..b5b230b9 --- /dev/null +++ b/exclude.c @@ -0,0 +1,202 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +/* + a lot of this stuff was derived from GNU tar + */ + +#include "rsync.h" + +extern int verbose; + +static char **exclude_list = NULL; + +static int is_regex(char *str) +{ + return strchr(str, '*') || strchr(str, '[') || strchr(str, '?'); +} + + +static int check_one_exclude(char *name,char *pattern) +{ + char *str; + char *p; + + if (!strchr(pattern,'/') && (p=strrchr(name,'/'))) + name = p+1; + + if (!name[0]) return 0; + + if (is_regex(pattern)) { + if (fnmatch(pattern, name, 0) == 0) + return 1; + } else { + int l1 = strlen(name); + int l2 = strlen(pattern); + if (l2 <= l1 && + strcmp(name+(l1-l2),pattern) == 0 && + (l1==l2 || name[l1-(l2+1)] == '/')) + return 1; + } + + return 0; +} + + +int check_exclude(char *name,char **local_exclude_list) +{ + int n; + + if (exclude_list) { + for (n=0; exclude_list[n]; n++) + if (check_one_exclude(name,exclude_list[n])) + return 1; + } + + if (local_exclude_list) { + for (n=0; local_exclude_list[n]; n++) + if (check_one_exclude(name,local_exclude_list[n])) + return 1; + } + + return 0; +} + + +void add_exclude_list(char *pattern,char ***list) +{ + int len=0; + if (list && *list) + for (; (*list)[len]; len++) ; + + if (strcmp(pattern,"!") == 0) { + if (verbose > 2) + fprintf(stderr,"clearing exclude list\n"); + while ((len)--) + free((*list)[len]); + free((*list)); + *list = NULL; + return; + } + + if (!*list) { + *list = (char **)malloc(sizeof(char *)*2); + } else { + *list = (char **)realloc(*list,sizeof(char *)*(len+2)); + } + + if (!*list || !((*list)[len] = strdup(pattern))) + out_of_memory("add_exclude"); + + if (verbose > 2) + fprintf(stderr,"add_exclude(%s)\n",pattern); + + (*list)[len+1] = NULL; +} + +void add_exclude(char *pattern) +{ + add_exclude_list(pattern,&exclude_list); +} + +char **make_exclude_list(char *fname,char **list1,int fatal) +{ + char **list=list1; + FILE *f = fopen(fname,"r"); + char line[MAXPATHLEN]; + if (!f) { + if (fatal) { + fprintf(stderr,"%s : %s\n",fname,strerror(errno)); + exit(1); + } + return list; + } + + while (fgets(line,MAXPATHLEN,f)) { + int l = strlen(line); + if (l && line[l-1] == '\n') l--; + line[l] = 0; + if (line[0]) add_exclude_list(line,&list); + } + fclose(f); + return list; +} + + +void add_exclude_file(char *fname,int fatal) +{ + exclude_list = make_exclude_list(fname,exclude_list,fatal); +} + + +void send_exclude_list(int f) +{ + int i; + if (exclude_list) + for (i=0;exclude_list[i];i++) { + int l = strlen(exclude_list[i]); + if (l == 0) continue; + write_int(f,l); + write_buf(f,exclude_list[i],l); + } + write_int(f,0); +} + + +void recv_exclude_list(int f) +{ + char line[MAXPATHLEN]; + int l; + while ((l=read_int(f))) { + read_buf(f,line,l); + line[l] = 0; + add_exclude(line); + } +} + + +static char *cvs_ignore_list[] = { + "RCS","SCCS","CVS","CVS.adm","RCSLOG","cvslog.*", + "tags","TAGS",".make.state",".nse_depinfo", + "*~", "#*", ".#*", ",*", "*.old", "*.bak", "*.BAK", "*.orig", + "*.rej", ".del-*", "*.a", "*.o", "*.obj", "*.so", "*.Z", "*.elc", "*.ln", + "core",NULL}; + + + +void add_cvs_excludes(void) +{ + char fname[MAXPATHLEN]; + char *p; + int i; + + for (i=0; cvs_ignore_list[i]; i++) + add_exclude(cvs_ignore_list[i]); + + if ((p=getenv("HOME"))) { + sprintf(fname,"%s/.cvsignore",p); + add_exclude_file(fname,0); + } + + if ((p=getenv("CVSIGNORE"))) { + char *tok; + for (tok=strtok(p," "); tok; tok=strtok(NULL," ")) + add_exclude(tok); + } +} diff --git a/flist.c b/flist.c new file mode 100644 index 00000000..8bf964d2 --- /dev/null +++ b/flist.c @@ -0,0 +1,524 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +/* generate and receive file lists */ + +#include "rsync.h" + +extern int verbose; +extern int am_server; +extern int always_checksum; +extern off_t total_size; + +extern int cvs_exclude; + +extern int one_file_system; +extern int make_backups; +extern int preserve_links; +extern int preserve_perms; +extern int preserve_devices; +extern int preserve_uid; +extern int preserve_gid; +extern int preserve_times; + +static char **local_exclude_list = NULL; + +static void clean_fname(char *name); + + +/* + This function is used to check if a file should be included/excluded + from the list of files based on its name and type etc + */ +static int match_file_name(char *fname,struct stat *st) +{ + if (check_exclude(fname,local_exclude_list)) { + if (verbose > 2) + fprintf(stderr,"excluding file %s\n",fname); + return 0; + } + return 1; +} + +/* used by the one_file_system code */ +static dev_t filesystem_dev; + +static void set_filesystem(char *fname) +{ + struct stat st; + if (lstat(fname,&st) != 0) return; + filesystem_dev = st.st_dev; +} + + +static void send_directory(int f,struct file_list *flist,char *dir); + +static char *flist_dir = NULL; + +static void send_file_entry(struct file_struct *file,int f) +{ + if (f == -1) return; + + write_int(f,strlen(file->name)); + write_buf(f,file->name,strlen(file->name)); + write_int(f,(int)file->modtime); + write_int(f,(int)file->length); + write_int(f,(int)file->mode); + if (preserve_uid) + write_int(f,(int)file->uid); + if (preserve_gid) + write_int(f,(int)file->gid); + if (preserve_devices) { + if (verbose > 2) + fprintf(stderr,"dev=0x%x\n",(int)file->dev); + write_int(f,(int)file->dev); + } + +#if SUPPORT_LINKS + if (preserve_links && S_ISLNK(file->mode)) { + write_int(f,strlen(file->link)); + write_buf(f,file->link,strlen(file->link)); + } +#endif + + if (always_checksum) { + write_buf(f,file->sum,SUM_LENGTH); + } +} + + +static struct file_struct *make_file(int recurse,char *fname) +{ + static struct file_struct file; + struct stat st; + char sum[SUM_LENGTH]; + + bzero(sum,SUM_LENGTH); + + if (lstat(fname,&st) != 0) { + fprintf(stderr,"%s: %s\n", + fname,strerror(errno)); + return NULL; + } + + if (S_ISDIR(st.st_mode) && !recurse) { + fprintf(stderr,"skipping directory %s\n",fname); + return NULL; + } + + if (one_file_system && st.st_dev != filesystem_dev) + return NULL; + + if (!match_file_name(fname,&st)) + return NULL; + + if (verbose > 2) + fprintf(stderr,"make_file(%s)\n",fname); + + file.name = strdup(fname); + file.modtime = st.st_mtime; + file.length = st.st_size; + file.mode = st.st_mode; + file.uid = st.st_uid; + file.gid = st.st_gid; +#ifdef HAVE_ST_RDEV + file.dev = st.st_rdev; +#endif + +#if SUPPORT_LINKS + if (S_ISLNK(st.st_mode)) { + int l; + char lnk[MAXPATHLEN]; + if ((l=readlink(fname,lnk,MAXPATHLEN-1)) == -1) { + fprintf(stderr,"readlink %s : %s\n",fname,strerror(errno)); + return NULL; + } + lnk[l] = 0; + file.link = strdup(lnk); + } +#endif + + if (always_checksum && S_ISREG(st.st_mode)) { + file_checksum(fname,file.sum,st.st_size); + } + + if (flist_dir) + file.dir = strdup(flist_dir); + else + file.dir = NULL; + + if (!S_ISDIR(st.st_mode)) + total_size += st.st_size; + + return &file; +} + + + +static void send_file_name(int f,struct file_list *flist, + int recurse,char *fname) +{ + struct file_struct *file; + + file = make_file(recurse,fname); + + if (!file) return; + + if (flist->count >= flist->malloced) { + flist->malloced += 100; + flist->files = (struct file_struct *)realloc(flist->files, + sizeof(flist->files[0])* + flist->malloced); + if (!flist->files) + out_of_memory("send_file_name"); + } + + if (strcmp(file->name,".") && strcmp(file->name,"/")) { + flist->files[flist->count++] = *file; + send_file_entry(file,f); + } + + if (S_ISDIR(file->mode) && recurse) { + char **last_exclude_list = local_exclude_list; + send_directory(f,flist,file->name); + local_exclude_list = last_exclude_list; + return; + } +} + + + +static void send_directory(int f,struct file_list *flist,char *dir) +{ + DIR *d; + struct dirent *di; + char fname[MAXPATHLEN]; + int l; + char *p; + + d = opendir(dir); + if (!d) { + fprintf(stderr,"%s: %s\n", + dir,strerror(errno)); + return; + } + + strcpy(fname,dir); + l = strlen(fname); + if (fname[l-1] != '/') + strcat(fname,"/"); + p = fname + strlen(fname); + + if (cvs_exclude) { + strcpy(p,".cvsignore"); + local_exclude_list = make_exclude_list(fname,NULL,0); + } + + for (di=readdir(d); di; di=readdir(d)) { + if (strcmp(di->d_name,".")==0 || + strcmp(di->d_name,"..")==0) + continue; + strcpy(p,di->d_name); + send_file_name(f,flist,1,fname); + } + + closedir(d); +} + + + +struct file_list *send_file_list(int f,int recurse,int argc,char *argv[]) +{ + int i,l; + struct stat st; + char *p,*dir; + char dbuf[MAXPATHLEN]; + struct file_list *flist; + + if (verbose && recurse) { + fprintf(am_server?stderr:stdout,"building file list ... "); + fflush(am_server?stderr:stdout); + } + + flist = (struct file_list *)malloc(sizeof(flist[0])); + if (!flist) out_of_memory("send_file_list"); + + flist->count=0; + flist->malloced = 100; + flist->files = (struct file_struct *)malloc(sizeof(flist->files[0])* + flist->malloced); + if (!flist->files) out_of_memory("send_file_list"); + + for (i=0;i 2) + fprintf(stderr,"recv_file_list starting\n"); + + flist = (struct file_list *)malloc(sizeof(flist[0])); + if (!flist) + goto oom; + + flist->count=0; + flist->malloced=100; + flist->files = (struct file_struct *)malloc(sizeof(flist->files[0])* + flist->malloced); + if (!flist->files) + goto oom; + + + for (l=read_int(f); l; l=read_int(f)) { + int i = flist->count; + + if (i >= flist->malloced) { + flist->malloced += 100; + flist->files =(struct file_struct *)realloc(flist->files, + sizeof(flist->files[0])* + flist->malloced); + if (!flist->files) + goto oom; + } + + flist->files[i].name = (char *)malloc(l+1); + if (!flist->files[i].name) + goto oom; + + read_buf(f,flist->files[i].name,l); + flist->files[i].name[l] = 0; + flist->files[i].modtime = (time_t)read_int(f); + flist->files[i].length = (off_t)read_int(f); + flist->files[i].mode = (mode_t)read_int(f); + if (preserve_uid) + flist->files[i].uid = (uid_t)read_int(f); + if (preserve_gid) + flist->files[i].gid = (gid_t)read_int(f); + if (preserve_devices) + flist->files[i].dev = (dev_t)read_int(f); + +#if SUPPORT_LINKS + if (preserve_links && S_ISLNK(flist->files[i].mode)) { + int l = read_int(f); + flist->files[i].link = (char *)malloc(l+1); + read_buf(f,flist->files[i].link,l); + flist->files[i].link[l] = 0; + } +#endif + + if (always_checksum) + read_buf(f,flist->files[i].sum,SUM_LENGTH); + + if (S_ISREG(flist->files[i].mode)) + total_size += flist->files[i].length; + + flist->count++; + + if (verbose > 2) + fprintf(stderr,"recv_file_name(%s)\n",flist->files[i].name); + } + + + if (verbose > 2) + fprintf(stderr,"received %d names\n",flist->count); + + clean_flist(flist); + + return flist; + +oom: + out_of_memory("recv_file_list"); + return NULL; /* not reached */ +} + + +static int flist_compare(struct file_struct *f1,struct file_struct *f2) +{ + if (!f1->name && !f2->name) return 0; + if (!f1->name) return -1; + if (!f2->name) return 1; + return strcmp(f1->name,f2->name); +} + + +int flist_find(struct file_list *flist,struct file_struct *f) +{ + int low=0,high=flist->count; + + while (low != high) { + int mid = (low+high)/2; + int ret = flist_compare(&flist->files[mid],f); + if (ret == 0) return mid; + if (ret > 0) + high=mid; + else + low=mid+1; + } + if (flist_compare(&flist->files[low],f) == 0) + return low; + return -1; +} + + +static void clean_fname(char *name) +{ + char *p; + int l; + int modified = 1; + + if (!name) return; + + while (modified) { + modified = 0; + + if ((p=strstr(name,"/./"))) { + modified = 1; + while (*p) { + p[0] = p[2]; + p++; + } + } + + if ((p=strstr(name,"//"))) { + modified = 1; + while (*p) { + p[0] = p[1]; + p++; + } + } + + if (strncmp(p=name,"./",2) == 0) { + modified = 1; + while (*p) { + p[0] = p[2]; + p++; + } + } + + l = strlen(p=name); + if (l > 1 && p[l-1] == '/') { + modified = 1; + p[l-1] = 0; + } + } +} + + +/* + * This routine ensures we don't have any duplicate names in our file list. + * duplicate names can cause corruption because of the pipelining + */ +void clean_flist(struct file_list *flist) +{ + int i; + + if (!flist || flist->count == 0) + return; + + for (i=0;icount;i++) { + clean_fname(flist->files[i].name); + } + + qsort(flist->files,flist->count, + sizeof(flist->files[0]), + (int (*)())flist_compare); + + for (i=1;icount;i++) { + if (flist->files[i].name && + strcmp(flist->files[i].name,flist->files[i-1].name) == 0) { + if (verbose > 1 && !am_server) + fprintf(stderr,"removing duplicate name %s from file list\n", + flist->files[i].name); + free(flist->files[i-1].name); + flist->files[i-1].name = NULL; + } + } +} + diff --git a/install-sh b/install-sh new file mode 100755 index 00000000..58719246 --- /dev/null +++ b/install-sh @@ -0,0 +1,238 @@ +#! /bin/sh +# +# install - install a program, script, or datafile +# This comes from X11R5. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# `make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. +# + + +# set DOITPROG to echo to test this script + +# Don't use :- since 4.3BSD and earlier shells don't like it. +doit="${DOITPROG-}" + + +# put in absolute paths if you don't have them in your path; or use env. vars. + +mvprog="${MVPROG-mv}" +cpprog="${CPPROG-cp}" +chmodprog="${CHMODPROG-chmod}" +chownprog="${CHOWNPROG-chown}" +chgrpprog="${CHGRPPROG-chgrp}" +stripprog="${STRIPPROG-strip}" +rmprog="${RMPROG-rm}" +mkdirprog="${MKDIRPROG-mkdir}" + +transformbasename="" +transform_arg="" +instcmd="$mvprog" +chmodcmd="$chmodprog 0755" +chowncmd="" +chgrpcmd="" +stripcmd="" +rmcmd="$rmprog -f" +mvcmd="$mvprog" +src="" +dst="" +dir_arg="" + +while [ x"$1" != x ]; do + case $1 in + -c) instcmd="$cpprog" + shift + continue;; + + -d) dir_arg=true + shift + continue;; + + -m) chmodcmd="$chmodprog $2" + shift + shift + continue;; + + -o) chowncmd="$chownprog $2" + shift + shift + continue;; + + -g) chgrpcmd="$chgrpprog $2" + shift + shift + continue;; + + -s) stripcmd="$stripprog" + shift + continue;; + + -t=*) transformarg=`echo $1 | sed 's/-t=//'` + shift + continue;; + + -b=*) transformbasename=`echo $1 | sed 's/-b=//'` + shift + continue;; + + *) if [ x"$src" = x ] + then + src=$1 + else + # this colon is to work around a 386BSD /bin/sh bug + : + dst=$1 + fi + shift + continue;; + esac +done + +if [ x"$src" = x ] +then + echo "install: no input file specified" + exit 1 +else + true +fi + +if [ x"$dir_arg" != x ]; then + dst=$src + src="" + + if [ -d $dst ]; then + instcmd=: + else + instcmd=mkdir + fi +else + +# Waiting for this to be detected by the "$instcmd $src $dsttmp" command +# might cause directories to be created, which would be especially bad +# if $src (and thus $dsttmp) contains '*'. + + if [ -f $src -o -d $src ] + then + true + else + echo "install: $src does not exist" + exit 1 + fi + + if [ x"$dst" = x ] + then + echo "install: no destination specified" + exit 1 + else + true + fi + +# If destination is a directory, append the input filename; if your system +# does not like double slashes in filenames, you may need to add some logic + + if [ -d $dst ] + then + dst="$dst"/`basename $src` + else + true + fi +fi + +## this sed command emulates the dirname command +dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + +# Make sure that the destination directory exists. +# this part is taken from Noah Friedman's mkinstalldirs script + +# Skip lots of stat calls in the usual case. +if [ ! -d "$dstdir" ]; then +defaultIFS=' +' +IFS="${IFS-${defaultIFS}}" + +oIFS="${IFS}" +# Some sh's can't handle IFS=/ for some reason. +IFS='%' +set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` +IFS="${oIFS}" + +pathcomp='' + +while [ $# -ne 0 ] ; do + pathcomp="${pathcomp}${1}" + shift + + if [ ! -d "${pathcomp}" ] ; + then + $mkdirprog "${pathcomp}" + else + true + fi + + pathcomp="${pathcomp}/" +done +fi + +if [ x"$dir_arg" != x ] +then + $doit $instcmd $dst && + + if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && + if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && + if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && + if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi +else + +# If we're going to rename the final executable, determine the name now. + + if [ x"$transformarg" = x ] + then + dstfile=`basename $dst` + else + dstfile=`basename $dst $transformbasename | + sed $transformarg`$transformbasename + fi + +# don't allow the sed command to completely eliminate the filename + + if [ x"$dstfile" = x ] + then + dstfile=`basename $dst` + else + true + fi + +# Make a temp file name in the proper directory. + + dsttmp=$dstdir/#inst.$$# + +# Move or copy the file name to the temp name + + $doit $instcmd $src $dsttmp && + + trap "rm -f ${dsttmp}" 0 && + +# and set any options; do chmod last to preserve setuid bits + +# If any of these fail, we abort the whole thing. If we want to +# ignore errors from any of these, just make sure not to ignore +# errors from the above "$doit $instcmd $src $dsttmp" command. + + if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && + if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && + if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && + if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && + +# Now rename the file to the real destination. + + $doit $rmcmd -f $dstdir/$dstfile && + $doit $mvcmd $dsttmp $dstdir/$dstfile + +fi && + + +exit 0 diff --git a/lib/.cvsignore b/lib/.cvsignore new file mode 100644 index 00000000..33bab952 --- /dev/null +++ b/lib/.cvsignore @@ -0,0 +1,47 @@ +Makefile +a +b +config.cache +config.h +config.log +config.status +dist.tar.gz +rsync +rsync-0.1 +rsync-0.1 +rsync-0.1 +rsync-0.1.tar.gz +rsync-0.2 +rsync-0.2 +rsync-0.2.tar.gz +rsync-0.3 +rsync-0.3 +rsync-0.3.tar.gz +rsync-0.4 +rsync-0.4 +rsync-0.4.tar.gz +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5 +rsync-0.5.tar.gz +rsync-ERSION +rsync.aux +rsync.dvi +rsync.log +tech_report.aux +tech_report.dvi +tech_report.log +tech_report.ps +test +rsync-0.6 +rsync-0.7 +rsync-0.7 +rsync-0.8 +rsync-0.8 +rsync-0.8 +rsync-0.8 diff --git a/lib/fnmatch.c b/lib/fnmatch.c new file mode 100644 index 00000000..6befc3b9 --- /dev/null +++ b/lib/fnmatch.c @@ -0,0 +1,204 @@ +#include "../rsync.h" +#ifndef HAVE_FNMATCH + +/* Copyright (C) 1991, 1992, 1993 Free Software Foundation, Inc. + +NOTE: The canonical source of this file is maintained with the GNU C Library. +Bugs can be reported to bug-glibc@prep.ai.mit.edu. + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#if defined (STDC_HEADERS) || !defined (isascii) +#define ISASCII(c) 1 +#else +#define ISASCII(c) isascii(c) +#endif + +#define ISUPPER(c) (ISASCII (c) && isupper (c)) + + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#if !defined(__GNU_LIBRARY__) && !defined(STDC_HEADERS) +extern int errno; +#endif + +/* Match STRING against the filename pattern PATTERN, returning zero if + it matches, nonzero if not. */ +int +fnmatch (pattern, string, flags) + const char *pattern; + const char *string; + int flags; +{ + register const char *p = pattern, *n = string; + register char c; + +/* Note that this evalutes C many times. */ +#define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c)) + + while ((c = *p++) != '\0') + { + c = FOLD (c); + + switch (c) + { + case '?': + if (*n == '\0') + return FNM_NOMATCH; + else if ((flags & FNM_FILE_NAME) && *n == '/') + return FNM_NOMATCH; + else if ((flags & FNM_PERIOD) && *n == '.' && + (n == string || ((flags & FNM_FILE_NAME) && n[-1] == '/'))) + return FNM_NOMATCH; + break; + + case '\\': + if (!(flags & FNM_NOESCAPE)) + { + c = *p++; + c = FOLD (c); + } + if (FOLD (*n) != c) + return FNM_NOMATCH; + break; + + case '*': + if ((flags & FNM_PERIOD) && *n == '.' && + (n == string || ((flags & FNM_FILE_NAME) && n[-1] == '/'))) + return FNM_NOMATCH; + + for (c = *p++; c == '?' || c == '*'; c = *p++, ++n) + if (((flags & FNM_FILE_NAME) && *n == '/') || + (c == '?' && *n == '\0')) + return FNM_NOMATCH; + + if (c == '\0') + return 0; + + { + char c1 = (!(flags & FNM_NOESCAPE) && c == '\\') ? *p : c; + c1 = FOLD (c1); + for (--p; *n != '\0'; ++n) + if ((c == '[' || FOLD (*n) == c1) && + fnmatch (p, n, flags & ~FNM_PERIOD) == 0) + return 0; + return FNM_NOMATCH; + } + + case '[': + { + /* Nonzero if the sense of the character class is inverted. */ + register int not; + + if (*n == '\0') + return FNM_NOMATCH; + + if ((flags & FNM_PERIOD) && *n == '.' && + (n == string || ((flags & FNM_FILE_NAME) && n[-1] == '/'))) + return FNM_NOMATCH; + + not = (*p == '!' || *p == '^'); + if (not) + ++p; + + c = *p++; + for (;;) + { + register char cstart = c, cend = c; + + if (!(flags & FNM_NOESCAPE) && c == '\\') + cstart = cend = *p++; + + cstart = cend = FOLD (cstart); + + if (c == '\0') + /* [ (unterminated) loses. */ + return FNM_NOMATCH; + + c = *p++; + c = FOLD (c); + + if ((flags & FNM_FILE_NAME) && c == '/') + /* [/] can never match. */ + return FNM_NOMATCH; + + if (c == '-' && *p != ']') + { + cend = *p++; + if (!(flags & FNM_NOESCAPE) && cend == '\\') + cend = *p++; + if (cend == '\0') + return FNM_NOMATCH; + cend = FOLD (cend); + + c = *p++; + } + + if (FOLD (*n) >= cstart && FOLD (*n) <= cend) + goto matched; + + if (c == ']') + break; + } + if (!not) + return FNM_NOMATCH; + break; + + matched:; + /* Skip the rest of the [...] that already matched. */ + while (c != ']') + { + if (c == '\0') + /* [... (unterminated) loses. */ + return FNM_NOMATCH; + + c = *p++; + if (!(flags & FNM_NOESCAPE) && c == '\\') + /* XXX 1003.2d11 is unclear if this is right. */ + ++p; + } + if (not) + return FNM_NOMATCH; + } + break; + + default: + if (c != FOLD (*n)) + return FNM_NOMATCH; + } + + ++n; + } + + if (*n == '\0') + return 0; + + if ((flags & FNM_LEADING_DIR) && *n == '/') + /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ + return 0; + + return FNM_NOMATCH; +} + +#else /* HAVE_FNMATCH */ +void fnmatch_dummy(void) {} +#endif diff --git a/lib/fnmatch.h b/lib/fnmatch.h new file mode 100644 index 00000000..ea420e26 --- /dev/null +++ b/lib/fnmatch.h @@ -0,0 +1,69 @@ +/* Copyright (C) 1991, 1992, 1993 Free Software Foundation, Inc. + +NOTE: The canonical source of this file is maintained with the GNU C Library. +Bugs can be reported to bug-glibc@prep.ai.mit.edu. + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _FNMATCH_H + +#define _FNMATCH_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined (__cplusplus) || (defined (__STDC__) && __STDC__) +#undef __P +#define __P(protos) protos +#else /* Not C++ or ANSI C. */ +#undef __P +#define __P(protos) () +/* We can get away without defining `const' here only because in this file + it is used only inside the prototype for `fnmatch', which is elided in + non-ANSI C where `const' is problematical. */ +#endif /* C++ or ANSI C. */ + + +/* We #undef these before defining them because some losing systems + (HP-UX A.08.07 for example) define these in . */ +#undef FNM_PATHNAME +#undef FNM_NOESCAPE +#undef FNM_PERIOD + +/* Bits set in the FLAGS argument to `fnmatch'. */ +#define FNM_PATHNAME (1 << 0) /* No wildcard can ever match `/'. */ +#define FNM_NOESCAPE (1 << 1) /* Backslashes don't quote special chars. */ +#define FNM_PERIOD (1 << 2) /* Leading `.' is matched only explicitly. */ + +#if !defined (_POSIX_C_SOURCE) || _POSIX_C_SOURCE < 2 || defined (_GNU_SOURCE) +#define FNM_FILE_NAME FNM_PATHNAME /* Preferred GNU name. */ +#define FNM_LEADING_DIR (1 << 3) /* Ignore `/...' after a match. */ +#define FNM_CASEFOLD (1 << 4) /* Compare without regard to case. */ +#endif + +/* Value returned by `fnmatch' if STRING does not match PATTERN. */ +#define FNM_NOMATCH 1 + +/* Match STRING against the filename pattern PATTERN, + returning zero if it matches, FNM_NOMATCH if not. */ +extern int fnmatch __P ((const char *__pattern, const char *__string, + int __flags)); + +#ifdef __cplusplus +} +#endif + +#endif /* fnmatch.h */ diff --git a/lib/getopt.c b/lib/getopt.c new file mode 100644 index 00000000..2270b568 --- /dev/null +++ b/lib/getopt.c @@ -0,0 +1,751 @@ +#include "../rsync.h" +#ifndef HAVE_GETOPT_LONG + +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu + before changing it! + + Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94 + Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +#define _NO_PROTO +#endif + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#if defined (_LIBC) || !defined (__GNU_LIBRARY__) + +/* This is for other GNU distributions with internationalized messages. + The GNU C Library itself does not yet support such messages. */ +#if HAVE_LIBINTL_H +# include +#else +# define gettext(msgid) (msgid) +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg = NULL; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns EOF, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* XXX 1003.2 says this must be 1 before any call. */ +int optind = 0; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return EOF with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +#include +#define my_index strchr +#else + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +char *getenv (); + +static char * +my_index (str, chr) + const char *str; + int chr; +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +#if !defined (__STDC__) || !__STDC__ +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +#endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +static void +exchange (argv) + char **argv; +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Initialize the internal data when the first call is made. */ + +static const char * +_getopt_initialize (optstring) + const char *optstring; +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind = 1; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns `EOF'. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + optarg = NULL; + + if (optind == 0) + optstring = _getopt_initialize (optstring); + + if (nextchar == NULL || *nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc + && (argv[optind][0] != '-' || argv[optind][1] == '\0')) + optind++; + last_nonopt = optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return EOF; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if ((argv[optind][0] != '-' || argv[optind][1] == '\0')) + { + if (ordering == REQUIRE_ORDER) + return EOF; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if (nameend - nextchar == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (opterr) + fprintf (stderr, gettext ("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); + nextchar += strlen (nextchar); + optind++; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (opterr) + if (argv[optind - 1][1] == '-') + /* --option */ + fprintf (stderr, + gettext ("%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); + else + /* +option or -option */ + fprintf (stderr, + gettext ("%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (opterr) + fprintf (stderr, + gettext ("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' + || my_index (optstring, *nextchar) == NULL) + { + if (opterr) + { + if (argv[optind][1] == '-') + /* --option */ + fprintf (stderr, gettext ("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); + else + /* +option or -option */ + fprintf (stderr, gettext ("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); + } + nextchar = (char *) ""; + optind++; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (opterr) + { + if (posixly_correct) + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, gettext ("%s: illegal option -- %c\n"), + argv[0], c); + else + fprintf (stderr, gettext ("%s: invalid option -- %c\n"), + argv[0], c); + } + optopt = c; + return '?'; + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = NULL; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (opterr) + { + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, + gettext ("%s: option requires an argument -- %c\n"), + argv[0], c); + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +int +getopt_long (argc, argv, options, long_options, opt_index) + int argc; + char *const *argv; + const char *options; + const struct option *long_options; + int *opt_index; +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 0); +} + +#endif /* _LIBC or not __GNU_LIBRARY__. */ + +#ifdef TEST + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ + +int +main (argc, argv) + int argc; + char **argv; +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + + c = getopt (argc, argv, "abc:d:0123456789"); + if (c == EOF) + break; + + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ +#else /* HAVE_GETOPT_LONG */ +void getopt_dummy(void) {} +#endif diff --git a/lib/getopt.h b/lib/getopt.h new file mode 100644 index 00000000..4ac33b71 --- /dev/null +++ b/lib/getopt.h @@ -0,0 +1,129 @@ +/* Declarations for getopt. + Copyright (C) 1989, 90, 91, 92, 93, 94 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _GETOPT_H +#define _GETOPT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns EOF, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +#if defined (__STDC__) && __STDC__ + const char *name; +#else + char *name; +#endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +#define no_argument 0 +#define required_argument 1 +#define optional_argument 2 + +#if defined (__STDC__) && __STDC__ +#ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int argc, char *const *argv, const char *shortopts); +#else /* not __GNU_LIBRARY__ */ +extern int getopt (); +#endif /* __GNU_LIBRARY__ */ +extern int getopt_long (int argc, char *const *argv, const char *shortopts, + const struct option *longopts, int *longind); +extern int getopt_long_only (int argc, char *const *argv, + const char *shortopts, + const struct option *longopts, int *longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int argc, char *const *argv, + const char *shortopts, + const struct option *longopts, int *longind, + int long_only); +#else /* not __STDC__ */ +extern int getopt (); +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +#endif /* _GETOPT_H */ diff --git a/main.c b/main.c new file mode 100644 index 00000000..feda8118 --- /dev/null +++ b/main.c @@ -0,0 +1,718 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "rsync.h" + +int verbose = 0; +int always_checksum = 0; +time_t starttime; +off_t total_size = 0; +int block_size=BLOCK_SIZE; + +char *backup_suffix = BACKUP_SUFFIX; + +static char *rsync_path = RSYNC_NAME; + +int make_backups = 0; +int preserve_links = 0; +int preserve_perms = 0; +int preserve_devices = 0; +int preserve_uid = 0; +int preserve_gid = 0; +int preserve_times = 0; +int update_only = 0; +int cvs_exclude = 0; +int dry_run=0; +int local_server=0; +int ignore_times=0; +int delete_mode=0; +int one_file_system=0; + +int am_server = 0; +static int sender = 0; +int recurse = 0; + +static void usage(FILE *f); + +static void report(int f) +{ + int in,out,tsize; + time_t t = time(NULL); + + if (!verbose) return; + + if (am_server && sender) { + write_int(f,read_total()); + write_int(f,write_total()); + write_int(f,total_size); + write_flush(f); + return; + } + + if (sender) { + in = read_total(); + out = write_total(); + tsize = (int)total_size; + } else { + in = read_int(f); + out = read_int(f); + tsize = read_int(f); + } + + printf("wrote %d bytes read %d bytes %g bytes/sec\n", + out,in,(in+out)/(0.5 + (t-starttime))); + printf("total size is %d speedup is %g\n", + tsize,(1.0*tsize)/(in+out)); +} + + +static void server_options(char **args,int *argc) +{ + int ac = *argc; + static char argstr[50]; + static char bsize[30]; + int i, x; + + args[ac++] = "--server"; + + if (!sender) + args[ac++] = "--sender"; + + x = 1; + argstr[0] = '-'; + for (i=0;i 3) { + fprintf(stderr,"cmd="); + for (i=0;icount > 1) { + fprintf(stderr,"ERROR: destination must be a directory when copying more than 1 file\n"); + exit(1); + } + return name; + } + + if (flist->count == 1) + return name; + + if (!name) + return NULL; + + if (mkdir(name,0777) != 0) { + fprintf(stderr,"mkdir %s : %s\n",name,strerror(errno)); + exit(1); + } + + if (chdir(name) != 0) { + fprintf(stderr,"chdir %s : %s\n",name,strerror(errno)); + exit(1); + } + + return NULL; +} + + + + +void do_server_sender(int argc,char *argv[]) +{ + int i; + char *dir = argv[0]; + struct file_list *flist; + + if (verbose > 2) + fprintf(stderr,"server_sender starting pid=%d\n",(int)getpid()); + + if (chdir(dir) != 0) { + fprintf(stderr,"chdir %s: %s\n",dir,strerror(errno)); + exit(1); + } + argc--; + argv++; + + if (strcmp(dir,".")) { + int l = strlen(dir); + for (i=0;i 2) + fprintf(stderr,"server_recv(%d) starting pid=%d\n",argc,(int)getpid()); + + if (argc > 0) { + dir = argv[0]; + argc--; + argv++; + if (chdir(dir) != 0) { + fprintf(stderr,"chdir %s : %s\n",dir,strerror(errno)); + exit(1); + } + } + + if (delete_mode) + recv_exclude_list(STDIN_FILENO); + + flist = recv_file_list(STDIN_FILENO); + if (!flist || flist->count == 0) { + fprintf(stderr,"nothing to do\n"); + exit(1); + } + + if (argc > 0) { + if (strcmp(dir,".")) { + argv[0] += strlen(dir); + if (argv[0][0] == '/') argv[0]++; + } + local_name = get_local_name(flist,argv[0]); + } + + if ((pid=fork()) == 0) { + recv_files(STDIN_FILENO,flist,local_name); + if (verbose > 2) + fprintf(stderr,"receiver read %d\n",read_total()); + exit(0); + } + + generate_files(STDOUT_FILENO,flist,local_name); + + waitpid(pid, &status, 0); + exit(status); +} + + +static void usage(FILE *f) +{ + fprintf(f,"rsync version %s Copyright Andrew Tridgell and Paul Mackerras\n\n", + VERSION); + fprintf(f,"Usage:\t%s [options] src user@host:dest\nOR",RSYNC_NAME); + fprintf(f,"\t%s [options] user@host:src dest\n\n",RSYNC_NAME); + fprintf(f,"Options:\n"); + fprintf(f,"-v, --verbose increase verbosity\n"); + fprintf(f,"-c, --checksum always checksum\n"); + fprintf(f,"-a, --archive archive mode (same as -rlptDog)\n"); + fprintf(f,"-r, --recursive recurse into directories\n"); + fprintf(f,"-b, --backup make backups (default ~ extension)\n"); + fprintf(f,"-u, --update update only (don't overwrite newer files)\n"); + fprintf(f,"-l, --links preserve soft links\n"); + fprintf(f,"-p, --perms preserve permissions\n"); + fprintf(f,"-o, --owner preserve owner (root only)\n"); + fprintf(f,"-g, --group preserve group\n"); + fprintf(f,"-D, --devices preserve devices (root only)\n"); + fprintf(f,"-t, --times preserve times\n"); + fprintf(f,"-n, --dry-run show what would have been transferred\n"); + fprintf(f,"-x, --one-file-system don't cross filesystem boundaries\n"); + fprintf(f,"-B, --block-size SIZE checksum blocking size\n"); + fprintf(f,"-e, --rsh COMMAND specify rsh replacement\n"); + fprintf(f," --rsync-path PATH specify path to rsync on the remote machine\n"); + fprintf(f,"-C, --cvs-exclude auto ignore files in the same way CVS does\n"); + fprintf(f," --delete delete files that don't exist on the sending side\n"); + fprintf(f,"-I, --ignore-times don't exclude files that match length and time\n"); + fprintf(f," --exclude FILE exclude file FILE\n"); + fprintf(f," --exclude-from FILE exclude files listed in FILE\n"); + fprintf(f," --suffix SUFFIX override backup suffix\n"); + fprintf(f," --version print version number\n"); + + fprintf(f,"\n"); + fprintf(f,"the backup suffix defaults to %s\n",BACKUP_SUFFIX); + fprintf(f,"the block size defaults to %d\n",BLOCK_SIZE); +} + +enum {OPT_VERSION,OPT_SUFFIX,OPT_SENDER,OPT_SERVER,OPT_EXCLUDE, + OPT_EXCLUDE_FROM,OPT_DELETE,OPT_RSYNC_PATH}; + +static char *short_options = "oblpguDCtcahvrIxne:B:"; + +static struct option long_options[] = { + {"version", 0, 0, OPT_VERSION}, + {"server", 0, 0, OPT_SERVER}, + {"sender", 0, 0, OPT_SENDER}, + {"delete", 0, 0, OPT_DELETE}, + {"exclude", 1, 0, OPT_EXCLUDE}, + {"exclude-from",1, 0, OPT_EXCLUDE_FROM}, + {"rsync-path", 1, 0, OPT_RSYNC_PATH}, + {"one-file-system",0, 0, 'x'}, + {"ignore-times",0, 0, 'I'}, + {"help", 0, 0, 'h'}, + {"dry-run", 0, 0, 'n'}, + {"cvs-exclude", 0, 0, 'C'}, + {"archive", 0, 0, 'a'}, + {"checksum", 0, 0, 'c'}, + {"backup", 0, 0, 'b'}, + {"update", 0, 0, 'u'}, + {"verbose", 0, 0, 'v'}, + {"recursive", 0, 0, 'r'}, + {"devices", 0, 0, 'D'}, + {"perms", 0, 0, 'p'}, + {"links", 0, 0, 'l'}, + {"owner", 0, 0, 'o'}, + {"group", 0, 0, 'g'}, + {"times", 0, 0, 't'}, + {"rsh", 1, 0, 'e'}, + {"suffix", 1, 0, OPT_SUFFIX}, + {"block-size", 1, 0, 'B'}, + {0,0,0,0}}; + +int main(int argc,char *argv[]) +{ + int pid, status, pid2, status2; + int opt; + int option_index; + char *shell_cmd = NULL; + char *shell_machine = NULL; + char *shell_path = NULL; + char *shell_user = NULL; + char *p; + int f_in,f_out; + struct file_list *flist; + char *local_name = NULL; + + starttime = time(NULL); + + while ((opt = getopt_long(argc, argv, + short_options, long_options, &option_index)) + != -1) { + switch (opt) + { + case OPT_VERSION: + printf("rsync version %s protocol version %d\n", + VERSION,PROTOCOL_VERSION); + exit(0); + + case OPT_SUFFIX: + backup_suffix = optarg; + break; + + case OPT_RSYNC_PATH: + rsync_path = optarg; + break; + + case 'I': + ignore_times = 1; + break; + + case 'x': + one_file_system=1; + break; + + case OPT_DELETE: + delete_mode = 1; + break; + + case OPT_EXCLUDE: + add_exclude(optarg); + break; + + case OPT_EXCLUDE_FROM: + add_exclude_file(optarg,1); + break; + + case 'h': + usage(stdout); + exit(0); + + case 'b': + make_backups=1; + break; + + case 'n': + dry_run=1; + break; + + case 'C': + cvs_exclude=1; + break; + + case 'u': + update_only=1; + break; + +#if SUPPORT_LINKS + case 'l': + preserve_links=1; + break; +#endif + + case 'p': + preserve_perms=1; + break; + + case 'o': + if (getuid() == 0) { + preserve_uid=1; + } else { + fprintf(stderr,"-o only allowed for root\n"); + exit(1); + } + break; + + case 'g': + preserve_gid=1; + break; + + case 'D': + if (getuid() == 0) { + preserve_devices=1; + } else { + fprintf(stderr,"-D only allowed for root\n"); + exit(1); + } + break; + + case 't': + preserve_times=1; + break; + + case 'c': + always_checksum=1; + break; + + case 'v': + verbose++; + break; + + case 'a': + recurse=1; +#if SUPPORT_LINKS + preserve_links=1; +#endif + preserve_perms=1; + preserve_times=1; + preserve_gid=1; + if (getuid() == 0) { + preserve_devices=1; + preserve_uid=1; + } + break; + + case OPT_SERVER: + am_server = 1; + break; + + case OPT_SENDER: + if (!am_server) { + usage(stderr); + exit(1); + } + sender = 1; + break; + + case 'r': + recurse = 1; + break; + + case 'e': + shell_cmd = optarg; + break; + + case 'B': + block_size = atoi(optarg); + break; + + default: + fprintf(stderr,"bad option -%c\n",opt); + exit(1); + } + } + + while (optind--) { + argc--; + argv++; + } + + if (dry_run) + verbose = MAX(verbose,1); + + if (am_server) { + int version = read_int(STDIN_FILENO); + if (version != PROTOCOL_VERSION) { + fprintf(stderr,"protocol version mismatch %d %d\n", + version,PROTOCOL_VERSION); + exit(1); + } + write_int(STDOUT_FILENO,PROTOCOL_VERSION); + write_flush(STDOUT_FILENO); + + if (sender) { + recv_exclude_list(STDIN_FILENO); + if (cvs_exclude) + add_cvs_excludes(); + do_server_sender(argc,argv); + } else { + do_server_recv(argc,argv); + } + exit(0); + } + + if (argc < 2) { + usage(stderr); + exit(1); + } + + p = strchr(argv[0],':'); + + if (p) { + sender = 0; + *p = 0; + shell_machine = argv[0]; + shell_path = p+1; + argc--; + argv++; + } else { + sender = 1; + + p = strchr(argv[argc-1],':'); + if (!p) { + local_server = 1; + } + + if (local_server) { + shell_machine = NULL; + shell_path = argv[argc-1]; + } else { + *p = 0; + shell_machine = argv[argc-1]; + shell_path = p+1; + } + argc--; + } + + if (shell_machine) { + p = strchr(shell_machine,'@'); + if (p) { + *p = 0; + shell_user = shell_machine; + shell_machine = p+1; + } + } + + if (verbose > 3) { + fprintf(stderr,"cmd=%s machine=%s user=%s path=%s\n", + shell_cmd?shell_cmd:"", + shell_machine?shell_machine:"", + shell_user?shell_user:"", + shell_path?shell_path:""); + } + + signal(SIGCHLD,SIG_IGN); + signal(SIGINT,sig_int); + + if (!sender && argc != 1) { + usage(stderr); + exit(1); + } + + pid = do_cmd(shell_cmd,shell_machine,shell_user,shell_path,&f_in,&f_out); + + write_int(f_out,PROTOCOL_VERSION); + write_flush(f_out); + { + int version = read_int(f_in); + if (version != PROTOCOL_VERSION) { + fprintf(stderr,"protocol version mismatch\n"); + exit(1); + } + } + + if (verbose > 3) + fprintf(stderr,"parent=%d child=%d sender=%d recurse=%d\n", + (int)getpid(),pid,sender,recurse); + + if (sender) { + if (cvs_exclude) + add_cvs_excludes(); + if (delete_mode) + send_exclude_list(f_out); + flist = send_file_list(f_out,recurse,argc,argv); + if (verbose > 3) + fprintf(stderr,"file list sent\n"); + send_files(flist,f_out,f_in); + if (verbose > 3) + fprintf(stderr,"waiting on %d\n",pid); + waitpid(pid, &status, 0); + report(-1); + exit(status); + } + + send_exclude_list(f_out); + + flist = recv_file_list(f_in); + if (!flist || flist->count == 0) { + fprintf(stderr,"nothing to do\n"); + exit(0); + } + + local_name = get_local_name(flist,argv[0]); + + if ((pid2=fork()) == 0) { + recv_files(f_in,flist,local_name); + if (verbose > 1) + fprintf(stderr,"receiver read %d\n",read_total()); + exit(0); + } + + generate_files(f_out,flist,local_name); + + waitpid(pid2, &status2, 0); + + report(f_in); + + waitpid(pid, &status, 0); + + return status | status2; +} diff --git a/match.c b/match.c new file mode 100644 index 00000000..1591fdc6 --- /dev/null +++ b/match.c @@ -0,0 +1,246 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "rsync.h" + +extern int verbose; +extern int am_server; + +typedef unsigned short tag; + +#define TABLESIZE (1<<16) +#define NULL_TAG ((tag)-1) + +static int false_alarms; +static int tag_hits; +static int matches; +static int data_transfer; + +static int total_false_alarms=0; +static int total_tag_hits=0; +static int total_matches=0; +static int total_data_transfer=0; + + +struct target { + tag t; + int i; +}; + +static struct target *targets=NULL; + +static tag *tag_table = NULL; + +#define gettag2(s1,s2) (((s1) + (s2)) & 0xFFFF) +#define gettag(sum) gettag2((sum)&0xFFFF,(sum)>>16) + +static int compare_targets(struct target *t1,struct target *t2) +{ + return(t1->t - t2->t); +} + + +static void build_hash_table(struct sum_struct *s) +{ + int i; + + if (!tag_table) + tag_table = (tag *)malloc(sizeof(tag)*TABLESIZE); + + targets = (struct target *)malloc(sizeof(targets[0])*s->count); + if (!tag_table || !targets) + out_of_memory("build_hash_table"); + + for (i=0;icount;i++) { + targets[i].i = i; + targets[i].t = gettag(s->sums[i].sum1); + } + + qsort(targets,s->count,sizeof(targets[0]),(int (*)())compare_targets); + + for (i=0;icount-1;i>=0;i--) { + tag_table[targets[i].t] = i; + } +} + + + +static off_t last_match; + + +static void matched(int f,struct sum_struct *s,char *buf,off_t len,int offset,int i) +{ + int n = offset - last_match; + + if (verbose > 2) + if (i != -1) + fprintf(stderr,"match at %d last_match=%d j=%d len=%d n=%d\n", + (int)offset,(int)last_match,i,(int)s->sums[i].len,n); + + if (n > 0) { + write_int(f,n); + write_buf(f,buf+last_match,n); + data_transfer += n; + } + write_int(f,-(i+1)); + if (i != -1) + last_match = offset + s->sums[i].len; + if (n > 0) + write_flush(f); +} + + +static void hash_search(int f,struct sum_struct *s,char *buf,off_t len) +{ + int offset,j,k; + int end; + char sum2[SUM_LENGTH]; + uint32 s1, s2, sum; + + if (verbose > 2) + fprintf(stderr,"hash search b=%d len=%d\n",s->n,(int)len); + + k = MIN(len, s->n); + sum = get_checksum1(buf, k); + s1 = sum & 0xFFFF; + s2 = sum >> 16; + if (verbose > 3) + fprintf(stderr, "sum=%.8x k=%d\n", sum, k); + + offset = 0; + + end = len + 1 - s->sums[s->count-1].len; + + if (verbose > 3) + fprintf(stderr,"hash search s->n=%d len=%d count=%d\n", + s->n,(int)len,s->count); + + do { + tag t = gettag2(s1,s2); + j = tag_table[t]; + if (verbose > 4) + fprintf(stderr,"offset=%d sum=%08x\n", + offset,sum); + + if (j != NULL_TAG) { + int done_csum2 = 0; + + sum = (s1 & 0xffff) | (s2 << 16); + tag_hits++; + do { + int i = targets[j].i; + + if (sum == s->sums[i].sum1) { + if (verbose > 3) + fprintf(stderr,"potential match at %d target=%d %d sum=%08x\n", + offset,j,i,sum); + + if (!done_csum2) { + get_checksum2(buf+offset,MIN(s->n,len-offset),sum2); + done_csum2 = 1; + } + if (memcmp(sum2,s->sums[i].sum2,SUM_LENGTH) == 0) { + matched(f,s,buf,len,offset,i); + offset += s->sums[i].len - 1; + k = MIN((len-offset), s->n); + sum = get_checksum1(buf+offset, k); + s1 = sum & 0xFFFF; + s2 = sum >> 16; + ++matches; + break; + } else { + false_alarms++; + } + } + j++; + } while (jcount && targets[j].t == t); + } + + /* Trim off the first byte from the checksum */ + s1 -= buf[offset]; + s2 -= k * buf[offset]; + + /* Add on the next byte (if there is one) to the checksum */ + if (k < (len-offset)) { + s1 += buf[offset+k]; + s2 += s1; + } else { + --k; + } + + if (verbose > 3) + fprintf(stderr,"s2:s1 = %.4x%.4x sum=%.8x k=%d offset=%d took %x added %x\n", + s2&0xffff, s1&0xffff, get_checksum1(buf+offset+1,k), + k, (int)offset, buf[offset], buf[offset+k]); + } while (++offset < end); + + matched(f,s,buf,len,len,-1); +} + + +void match_sums(int f,struct sum_struct *s,char *buf,off_t len) +{ + last_match = 0; + false_alarms = 0; + tag_hits = 0; + matches=0; + data_transfer=0; + + if (len > 0 && s->count>0) { + build_hash_table(s); + + if (verbose > 2) + fprintf(stderr,"built hash table\n"); + + hash_search(f,s,buf,len); + + if (verbose > 2) + fprintf(stderr,"done hash search\n"); + } else { + matched(f,s,buf,len,len,-1); + } + + if (targets) { + free(targets); + targets=NULL; + } + + if (verbose > 2) + fprintf(stderr, "false_alarms=%d tag_hits=%d matches=%d\n", + false_alarms, tag_hits, matches); + + total_tag_hits += tag_hits; + total_false_alarms += false_alarms; + total_matches += matches; + total_data_transfer += data_transfer; +} + +void match_report(void) +{ + if (verbose <= 1) + return; + + fprintf(am_server?stderr:stdout, + "total: matches=%d tag_hits=%d false_alarms=%d data=%d\n", + total_matches,total_tag_hits, + total_false_alarms,total_data_transfer); +} diff --git a/md4.c b/md4.c new file mode 100644 index 00000000..0a9d821f --- /dev/null +++ b/md4.c @@ -0,0 +1,263 @@ +/* + This code is from rfc1186. + + It has been modified to use the SIVAL() macro to make it + byte order and length independent, so we don't need the LOWBYTEFIRST define +*/ + + /* + ** ******************************************************************** + ** md4.c -- Implementation of MD4 Message Digest Algorithm ** + ** Updated: 2/16/90 by Ronald L. Rivest ** + ** (C) 1990 RSA Data Security, Inc. ** + ** ******************************************************************** + */ + + /* + ** To use MD4: + ** -- Include md4.h in your program + ** -- Declare an MDstruct MD to hold the state of the digest + ** computation. + ** -- Initialize MD using MDbegin(&MD) + ** -- For each full block (64 bytes) X you wish to process, call + ** MDupdate(&MD,X,512) + ** (512 is the number of bits in a full block.) + ** -- For the last block (less than 64 bytes) you wish to process, + ** MDupdate(&MD,X,n) + ** where n is the number of bits in the partial block. A partial + ** block terminates the computation, so every MD computation + ** should terminate by processing a partial block, even if it + ** has n = 0. + ** -- The message digest is available in MD.buffer[0] ... + ** MD.buffer[3]. (Least-significant byte of each word + ** should be output first.) + ** -- You can print out the digest using MDprint(&MD) + */ + +#define TRUE 1 +#define FALSE 0 + + /* Compile-time includes + */ + +#include "rsync.h" + + /* Compile-time declarations of MD4 "magic constants". + */ +#define I0 0x67452301 /* Initial values for MD buffer */ +#define I1 0xefcdab89 +#define I2 0x98badcfe +#define I3 0x10325476 +#define C2 013240474631 /* round 2 constant = sqrt(2) in octal */ +#define C3 015666365641 /* round 3 constant = sqrt(3) in octal */ + /* C2 and C3 are from Knuth, The Art of Programming, Volume 2 + ** (Seminumerical Algorithms), Second Edition (1981), Addison-Wesley. + ** Table 2, page 660. + */ + +#define fs1 3 /* round 1 shift amounts */ +#define fs2 7 +#define fs3 11 +#define fs4 19 +#define gs1 3 /* round 2 shift amounts */ +#define gs2 5 +#define gs3 9 +#define gs4 13 +#define hs1 3 /* round 3 shift amounts */ +#define hs2 9 +#define hs3 11 +#define hs4 15 + + /* Compile-time macro declarations for MD4. + ** Note: The "rot" operator uses the variable "tmp". + ** It assumes tmp is declared as unsigned int, so that the >> + ** operator will shift in zeros rather than extending the sign bit. + */ +#define f(X,Y,Z) ((X&Y) | ((~X)&Z)) +#define g(X,Y,Z) ((X&Y) | (X&Z) | (Y&Z)) +#define h(X,Y,Z) (X^Y^Z) +#define rot(X,S) (tmp=X,(tmp<>(32-S))) +#define ff(A,B,C,D,i,s) A = rot((A + f(B,C,D) + X[i]),s) +#define gg(A,B,C,D,i,s) A = rot((A + g(B,C,D) + X[i] + C2),s) +#define hh(A,B,C,D,i,s) A = rot((A + h(B,C,D) + X[i] + C3),s) + + /* MDbegin(MDp) + ** Initialize message digest buffer MDp. + ** This is a user-callable routine. + */ + void + MDbegin(MDp) + MDptr MDp; + { int i; + MDp->buffer[0] = I0; + MDp->buffer[1] = I1; + MDp->buffer[2] = I2; + MDp->buffer[3] = I3; + for (i=0;i<8;i++) MDp->count[i] = 0; + MDp->done = 0; + } + + /* MDreverse(X) + ** Reverse the byte-ordering of every int in X. + ** Assumes X is an array of 16 ints. + ** The macro revx reverses the byte-ordering of the next word of X. + */ + void MDreverse(X) + unsigned int32 *X; + { register unsigned int32 t; + register unsigned int i; + + for(i = 0; i < 16; i++) { + t = X[i]; + SIVAL(X,i*4,t); + } + } + + /* MDblock(MDp,X) + ** Update message digest buffer MDp->buffer using 16-word data block X. + ** Assumes all 16 words of X are full of data. + ** Does not update MDp->count. + ** This routine is not user-callable. + */ + static void + MDblock(MDp,X) + MDptr MDp; + unsigned int32 *X; + { + register unsigned int32 tmp, A, B, C, D; + MDreverse(X); + A = MDp->buffer[0]; + B = MDp->buffer[1]; + C = MDp->buffer[2]; + D = MDp->buffer[3]; + /* Update the message digest buffer */ + ff(A , B , C , D , 0 , fs1); /* Round 1 */ + ff(D , A , B , C , 1 , fs2); + ff(C , D , A , B , 2 , fs3); + ff(B , C , D , A , 3 , fs4); + ff(A , B , C , D , 4 , fs1); + ff(D , A , B , C , 5 , fs2); + ff(C , D , A , B , 6 , fs3); + ff(B , C , D , A , 7 , fs4); + ff(A , B , C , D , 8 , fs1); + ff(D , A , B , C , 9 , fs2); + ff(C , D , A , B , 10 , fs3); + ff(B , C , D , A , 11 , fs4); + ff(A , B , C , D , 12 , fs1); + ff(D , A , B , C , 13 , fs2); + ff(C , D , A , B , 14 , fs3); + ff(B , C , D , A , 15 , fs4); + gg(A , B , C , D , 0 , gs1); /* Round 2 */ + gg(D , A , B , C , 4 , gs2); + gg(C , D , A , B , 8 , gs3); + gg(B , C , D , A , 12 , gs4); + gg(A , B , C , D , 1 , gs1); + gg(D , A , B , C , 5 , gs2); + gg(C , D , A , B , 9 , gs3); + gg(B , C , D , A , 13 , gs4); + gg(A , B , C , D , 2 , gs1); + gg(D , A , B , C , 6 , gs2); + gg(C , D , A , B , 10 , gs3); + gg(B , C , D , A , 14 , gs4); + gg(A , B , C , D , 3 , gs1); + gg(D , A , B , C , 7 , gs2); + gg(C , D , A , B , 11 , gs3); + gg(B , C , D , A , 15 , gs4); + hh(A , B , C , D , 0 , hs1); /* Round 3 */ + hh(D , A , B , C , 8 , hs2); + hh(C , D , A , B , 4 , hs3); + hh(B , C , D , A , 12 , hs4); + hh(A , B , C , D , 2 , hs1); + hh(D , A , B , C , 10 , hs2); + hh(C , D , A , B , 6 , hs3); + hh(B , C , D , A , 14 , hs4); + hh(A , B , C , D , 1 , hs1); + hh(D , A , B , C , 9 , hs2); + hh(C , D , A , B , 5 , hs3); + hh(B , C , D , A , 13 , hs4); + hh(A , B , C , D , 3 , hs1); + hh(D , A , B , C , 11 , hs2); + hh(C , D , A , B , 7 , hs3); + hh(B , C , D , A , 15 , hs4); + MDp->buffer[0] += A; + MDp->buffer[1] += B; + MDp->buffer[2] += C; + MDp->buffer[3] += D; + } + + /* MDupdate(MDp,X,count) + ** Input: MDp -- an MDptr + ** X -- a pointer to an array of unsigned characters. + ** count -- the number of bits of X to use. + ** (if not a multiple of 8, uses high bits of last byte.) + ** Update MDp using the number of bits of X given by count. + ** This is the basic input routine for an MD4 user. + ** The routine completes the MD computation when count < 512, so + ** every MD computation should end with one call to MDupdate with a + ** count less than 512. A call with count 0 will be ignored if the + ** MD has already been terminated (done != 0), so an extra call with + ** count 0 can be given as a "courtesy close" to force termination + ** if desired. + */ + void + MDupdate(MDp,X,count) + MDptr MDp; + unsigned char *X; + unsigned int count; + { unsigned int32 i, tmp, bit, byte, mask; + unsigned char XX[64]; + unsigned char *p; + /* return with no error if this is a courtesy close with count + ** zero and MDp->done is true. + */ + if (count == 0 && MDp->done) return; + /* check to see if MD is already done and report error */ + if (MDp->done) + { printf("\nError: MDupdate MD already done."); return; } + /* Add count to MDp->count */ + tmp = count; + p = MDp->count; + while (tmp) + { tmp += *p; + *p++ = tmp; + tmp = tmp >> 8; + } + /* Process data */ + if (count == 512) + { /* Full block of data to handle */ + MDblock(MDp,(unsigned int *)X); + } + else if (count > 512) /* Check for count too large */ + { printf("\nError: MDupdate called with illegal count value %d." + ,count); + return; + } + else /* partial block -- must be last block so finish up */ + { /* Find out how many bytes and residual bits there are */ + byte = count >> 3; + bit = count & 7; + /* Copy X into XX since we need to modify it */ + for (i=0;i<=byte;i++) XX[i] = X[i]; + for (i=byte+1;i<64;i++) XX[i] = 0; + /* Add padding '1' bit and low-order zeros in last byte */ + mask = 1 << (7 - bit); + XX[byte] = (XX[byte] | mask) & ~( mask - 1); + /* If room for bit count, finish up with this block */ + if (byte <= 55) + { for (i=0;i<8;i++) XX[56+i] = MDp->count[i]; + MDblock(MDp,(unsigned int32 *)XX); + } + else /* need to do two blocks to finish up */ + { MDblock(MDp,(unsigned int32 *)XX); + for (i=0;i<56;i++) XX[i] = 0; + for (i=0;i<8;i++) XX[56+i] = MDp->count[i]; + MDblock(MDp,(unsigned int32 *)XX); + } + /* Set flag saying we're done with MD computation */ + MDp->done = 1; + } + } + + /* + ** End of md4.c + */ diff --git a/md4.h b/md4.h new file mode 100644 index 00000000..049c7ed9 --- /dev/null +++ b/md4.h @@ -0,0 +1,49 @@ +/* + This code is from rfc1186. +*/ + + /* + ** ******************************************************************** + ** md4.h -- Header file for implementation of ** + ** MD4 Message Digest Algorithm ** + ** Updated: 2/13/90 by Ronald L. Rivest ** + ** (C) 1990 RSA Data Security, Inc. ** + ** ******************************************************************** + */ + + /* MDstruct is the data structure for a message digest computation. + */ + typedef struct { + unsigned int32 buffer[4]; /* Holds 4-word result of MD computation */ + unsigned char count[8]; /* Number of bits processed so far */ + unsigned int done; /* Nonzero means MD computation finished */ + } MDstruct, *MDptr; + + /* MDbegin(MD) + + + + ** Input: MD -- an MDptr + ** Initialize the MDstruct prepatory to doing a message digest + ** computation. + */ + extern void MDbegin(); + + /* MDupdate(MD,X,count) + ** Input: MD -- an MDptr + ** X -- a pointer to an array of unsigned characters. + ** count -- the number of bits of X to use (an unsigned int). + ** Updates MD using the first "count" bits of X. + ** The array pointed to by X is not modified. + ** If count is not a multiple of 8, MDupdate uses high bits of + ** last byte. + ** This is the basic input routine for a user. + ** The routine terminates the MD computation when count < 512, so + ** every MD computation should end with one call to MDupdate with a + ** count less than 512. Zero is OK for a count. + */ + extern void MDupdate(); + + /* + ** End of md4.h + */ diff --git a/mkproto.awk b/mkproto.awk new file mode 100644 index 00000000..0eb03de6 --- /dev/null +++ b/mkproto.awk @@ -0,0 +1,41 @@ +# generate prototypes for Samba C code +# tridge, June 1996 + +BEGIN { + inheader=0; + print "/* This file is automatically generated with \"make proto\". DO NOT EDIT */" + print "" +} + +{ + if (inheader) { + if (match($0,"[)][ \t]*$")) { + inheader = 0; + printf "%s;\n",$0; + } else { + printf "%s\n",$0; + } + next; + } +} + +/^static|^extern/ || !/^[a-zA-Z]/ || /[;]/ { + next; +} + +!/^off_t|^unsigned|^mode_t|^DIR|^user|^int|^char|^uint|^struct|^BOOL|^void|^time/ { + next; +} + + +/[(].*[)][ \t]*$/ { + printf "%s;\n",$0; + next; +} + +/[(]/ { + inheader=1; + printf "%s\n",$0; + next; +} + diff --git a/rsync.c b/rsync.c new file mode 100644 index 00000000..874d8aa9 --- /dev/null +++ b/rsync.c @@ -0,0 +1,733 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "rsync.h" + +extern int verbose; +extern int am_server; +extern int always_checksum; +extern time_t starttime; + +extern char *backup_suffix; + +extern int block_size; +extern int update_only; +extern int make_backups; +extern int preserve_links; +extern int preserve_perms; +extern int preserve_devices; +extern int preserve_uid; +extern int preserve_gid; +extern int preserve_times; +extern int dry_run; +extern int ignore_times; +extern int recurse; +extern int delete_mode; +extern int cvs_exclude; + +/* + free a sums struct + */ +static void free_sums(struct sum_struct *s) +{ + if (s->sums) free(s->sums); + free(s); +} + + + +/* + send a sums struct down a fd + */ +static void send_sums(struct sum_struct *s,int f_out) +{ + int i; + + /* tell the other guy how many we are going to be doing and how many + bytes there are in the last chunk */ + write_int(f_out,s?s->count:0); + write_int(f_out,s?s->n:block_size); + write_int(f_out,s?s->remainder:0); + if (s) + for (i=0;icount;i++) { + write_int(f_out,s->sums[i].sum1); + write_buf(f_out,s->sums[i].sum2,SUM_LENGTH); + } + write_flush(f_out); +} + + +/* + generate a stream of signatures/checksums that describe a buffer + + generate approximately one checksum every n bytes + */ +static struct sum_struct *generate_sums(char *buf,off_t len,int n) +{ + int i; + struct sum_struct *s; + int count; + int block_len = n; + int remainder = (len%block_len); + off_t offset = 0; + + count = (len+(block_len-1))/block_len; + + s = (struct sum_struct *)malloc(sizeof(*s)); + if (!s) out_of_memory("generate_sums"); + + s->count = count; + s->remainder = remainder; + s->n = n; + s->flength = len; + + if (count==0) { + s->sums = NULL; + return s; + } + + if (verbose > 3) + fprintf(stderr,"count=%d rem=%d n=%d flength=%d\n", + s->count,s->remainder,s->n,(int)s->flength); + + s->sums = (struct sum_buf *)malloc(sizeof(s->sums[0])*s->count); + if (!s->sums) out_of_memory("generate_sums"); + + for (i=0;isums[i].sum1 = get_checksum1(buf,n1); + get_checksum2(buf,n1,s->sums[i].sum2); + + s->sums[i].offset = offset; + s->sums[i].len = n1; + s->sums[i].i = i; + + if (verbose > 3) + fprintf(stderr,"chunk[%d] offset=%d len=%d sum1=%08x\n", + i,(int)s->sums[i].offset,s->sums[i].len,s->sums[i].sum1); + + len -= n1; + buf += n1; + offset += n1; + } + + return s; +} + + +/* + receive the checksums for a buffer + */ +static struct sum_struct *receive_sums(int f) +{ + struct sum_struct *s; + int i; + off_t offset = 0; + int block_len; + + s = (struct sum_struct *)malloc(sizeof(*s)); + if (!s) out_of_memory("receive_sums"); + + s->count = read_int(f); + s->n = read_int(f); + s->remainder = read_int(f); + s->sums = NULL; + + if (verbose > 3) + fprintf(stderr,"count=%d n=%d rem=%d\n", + s->count,s->n,s->remainder); + + block_len = s->n; + + if (s->count == 0) + return(s); + + s->sums = (struct sum_buf *)malloc(sizeof(s->sums[0])*s->count); + if (!s->sums) out_of_memory("receive_sums"); + + for (i=0;icount;i++) { + s->sums[i].sum1 = read_int(f); + read_buf(f,s->sums[i].sum2,SUM_LENGTH); + + s->sums[i].offset = offset; + s->sums[i].i = i; + + if (i == s->count-1 && s->remainder != 0) { + s->sums[i].len = s->remainder; + } else { + s->sums[i].len = s->n; + } + offset += s->sums[i].len; + + if (verbose > 3) + fprintf(stderr,"chunk[%d] len=%d offset=%d sum1=%08x\n", + i,s->sums[i].len,(int)s->sums[i].offset,s->sums[i].sum1); + } + + s->flength = offset; + + return s; +} + + +static void set_perms(char *fname,struct file_struct *file,struct stat *st, + int report) +{ + int updated = 0; + struct stat st2; + + if (dry_run) return; + + if (!st) { + if (stat(fname,&st2) != 0) { + fprintf(stderr,"stat %s : %s\n",fname,strerror(errno)); + return; + } + st = &st2; + } + + if (preserve_times && st->st_mtime != file->modtime) { + updated = 1; + if (set_modtime(fname,file->modtime) != 0) { + fprintf(stderr,"failed to set times on %s : %s\n", + fname,strerror(errno)); + return; + } + } + +#ifdef HAVE_CHMOD + if (preserve_perms && st->st_mode != file->mode) { + updated = 1; + if (chmod(fname,file->mode) != 0) { + fprintf(stderr,"failed to set permissions on %s : %s\n", + fname,strerror(errno)); + return; + } + } +#endif + + if ((preserve_uid && st->st_uid != file->uid) || + (preserve_gid && st->st_gid != file->gid)) { + updated = 1; + if (chown(fname, + preserve_uid?file->uid:-1, + preserve_gid?file->gid:-1) != 0) { + if (verbose>1 || preserve_uid) + fprintf(stderr,"chown %s : %s\n",fname,strerror(errno)); + return; + } + } + + if (verbose > 1 && report) { + if (updated) + fprintf(am_server?stderr:stdout,"%s\n",fname); + else + fprintf(am_server?stderr:stdout,"%s is uptodate\n",fname); + } +} + + +void recv_generator(char *fname,struct file_list *flist,int i,int f_out) +{ + int fd; + struct stat st; + char *buf; + struct sum_struct *s; + char sum[SUM_LENGTH]; + int statret; + + if (verbose > 2) + fprintf(stderr,"recv_generator(%s)\n",fname); + + statret = lstat(fname,&st); + +#if SUPPORT_LINKS + if (preserve_links && S_ISLNK(flist->files[i].mode)) { + char lnk[MAXPATHLEN]; + int l; + if (statret == 0) { + l = readlink(fname,lnk,MAXPATHLEN-1); + if (l > 0) { + lnk[l] = 0; + if (strcmp(lnk,flist->files[i].link) == 0) { + if (verbose > 1) + fprintf(am_server?stderr:stdout,"%s is uptodate\n",fname); + return; + } + } + } + if (!dry_run) unlink(fname); + if (!dry_run && symlink(flist->files[i].link,fname) != 0) { + fprintf(stderr,"link %s -> %s : %s\n", + fname,flist->files[i].link,strerror(errno)); + } else { + if (verbose) + fprintf(am_server?stderr:stdout,"%s -> %s\n",fname,flist->files[i].link); + } + return; + } +#endif + +#ifdef HAVE_MKNOD + if (preserve_devices && + (S_ISCHR(flist->files[i].mode) || S_ISBLK(flist->files[i].mode))) { + if (statret != 0 || + st.st_mode != flist->files[i].mode || + st.st_rdev != flist->files[i].dev) { + if (!dry_run) unlink(fname); + if (verbose > 2) + fprintf(stderr,"mknod(%s,0%o,0x%x)\n", + fname,(int)flist->files[i].mode,(int)flist->files[i].dev); + if (!dry_run && + mknod(fname,flist->files[i].mode,flist->files[i].dev) != 0) { + fprintf(stderr,"mknod %s : %s\n",fname,strerror(errno)); + } else { + set_perms(fname,&flist->files[i],NULL,0); + if (verbose) + fprintf(am_server?stderr:stdout,"%s\n",fname); + } + } else { + set_perms(fname,&flist->files[i],&st,1); + } + return; + } +#endif + + if (!S_ISREG(flist->files[i].mode)) { + fprintf(stderr,"skipping non-regular file %s\n",fname); + return; + } + + if (statret == -1) { + if (errno == ENOENT) { + write_int(f_out,i); + if (!dry_run) send_sums(NULL,f_out); + } else { + if (verbose > 1) + fprintf(stderr,"recv_generator failed to open %s\n",fname); + } + return; + } + + if (!S_ISREG(st.st_mode)) { + fprintf(stderr,"%s : not a regular file\n",fname); + return; + } + + if (update_only && st.st_mtime >= flist->files[i].modtime) { + if (verbose > 1) + fprintf(stderr,"%s is newer\n",fname); + return; + } + + if (always_checksum && S_ISREG(st.st_mode)) { + file_checksum(fname,sum,st.st_size); + } + + if (st.st_size == flist->files[i].length && + ((!ignore_times && st.st_mtime == flist->files[i].modtime) || + (always_checksum && S_ISREG(st.st_mode) && + memcmp(sum,flist->files[i].sum,SUM_LENGTH) == 0))) { + set_perms(fname,&flist->files[i],&st,1); + return; + } + + if (dry_run) { + write_int(f_out,i); + return; + } + + /* open the file */ + fd = open(fname,O_RDONLY); + + if (fd == -1) { + fprintf(stderr,"failed to open %s : %s\n",fname,strerror(errno)); + return; + } + + if (st.st_size > 0) { + buf = map_file(fd,st.st_size); + if (!buf) { + fprintf(stderr,"mmap : %s\n",strerror(errno)); + close(fd); + return; + } + } else { + buf = NULL; + } + + if (verbose > 3) + fprintf(stderr,"mapped %s of size %d\n",fname,(int)st.st_size); + + s = generate_sums(buf,st.st_size,block_size); + + write_int(f_out,i); + send_sums(s,f_out); + write_flush(f_out); + + close(fd); + unmap_file(buf,st.st_size); + + free_sums(s); +} + + + +static void receive_data(int f_in,char *buf,int fd,char *fname) +{ + int i,n,remainder,len,count; + off_t offset = 0; + off_t offset2; + + count = read_int(f_in); + n = read_int(f_in); + remainder = read_int(f_in); + + for (i=read_int(f_in); i != 0; i=read_int(f_in)) { + if (i > 0) { + if (verbose > 3) + fprintf(stderr,"data recv %d at %d\n",i,(int)offset); + + if (read_write(f_in,fd,i) != i) { + fprintf(stderr,"write failed on %s : %s\n",fname,strerror(errno)); + exit(1); + } + offset += i; + } else { + i = -(i+1); + offset2 = i*n; + len = n; + if (i == count-1 && remainder != 0) + len = remainder; + + if (verbose > 3) + fprintf(stderr,"chunk[%d] of size %d at %d offset=%d\n", + i,len,(int)offset2,(int)offset); + + if (write(fd,buf+offset2,len) != len) { + fprintf(stderr,"write failed on %s : %s\n",fname,strerror(errno)); + exit(1); + } + offset += len; + } + } +} + + +static void delete_one(struct file_struct *f) +{ + if (!S_ISDIR(f->mode)) { + if (!dry_run && unlink(f->name) != 0) { + fprintf(stderr,"unlink %s : %s\n",f->name,strerror(errno)); + } else if (verbose) { + fprintf(stderr,"deleting %s\n",f->name); + } + } else { + if (!dry_run && rmdir(f->name) != 0) { + if (errno != ENOTEMPTY) + fprintf(stderr,"rmdir %s : %s\n",f->name,strerror(errno)); + } else if (verbose) { + fprintf(stderr,"deleting directory %s\n",f->name); + } + } +} + + +static void delete_files(struct file_list *flist) +{ + struct file_list *local_file_list; + char *dot="."; + int i; + + if (!(local_file_list = send_file_list(-1,recurse,1,&dot))) + return; + + for (i=local_file_list->count;i>=0;i--) { + if (!local_file_list->files[i].name) continue; + if (-1 == flist_find(flist,&local_file_list->files[i])) { + delete_one(&local_file_list->files[i]); + } + } +} + +static char *cleanup_fname = NULL; + +int sig_int(void) +{ + if (cleanup_fname) + unlink(cleanup_fname); + exit(1); +} + + +int recv_files(int f_in,struct file_list *flist,char *local_name) +{ + int fd1,fd2; + struct stat st; + char *fname; + char fnametmp[MAXPATHLEN]; + char *buf; + int i; + + if (verbose > 2) + fprintf(stderr,"recv_files(%d) starting\n",flist->count); + + if (recurse && delete_mode && !local_name && flist->count>0) { + delete_files(flist); + } + + while (1) + { + i = read_int(f_in); + if (i == -1) break; + + fname = flist->files[i].name; + + if (local_name) + fname = local_name; + + if (dry_run) { + if (!am_server && verbose) + printf("%s\n",fname); + continue; + } + + if (verbose > 2) + fprintf(stderr,"recv_files(%s)\n",fname); + + /* open the file */ + if ((fd1 = open(fname,O_RDONLY)) == -1 && + (fd1 = open(fname,O_RDONLY|O_CREAT,flist->files[i].mode)) == -1) { + fprintf(stderr,"recv_files failed to open %s\n",fname); + return -1; + } + + if (fstat(fd1,&st) != 0) { + fprintf(stderr,"fstat %s : %s\n",fname,strerror(errno)); + close(fd1); + return -1; + } + + if (!S_ISREG(st.st_mode)) { + fprintf(stderr,"%s : not a regular file\n",fname); + close(fd1); + return -1; + } + + if (st.st_size > 0) { + buf = map_file(fd1,st.st_size); + if (!buf) { + fprintf(stderr,"map_file failed\n"); + return -1; + } + } else { + buf = NULL; + } + + if (verbose > 2) + fprintf(stderr,"mapped %s of size %d\n",fname,(int)st.st_size); + + /* open tmp file */ + sprintf(fnametmp,"%s.XXXXXX",fname); + if (NULL == mktemp(fnametmp)) { + fprintf(stderr,"mktemp %s failed\n",fnametmp); + return -1; + } + fd2 = open(fnametmp,O_WRONLY|O_CREAT,st.st_mode); + if (fd2 == -1) { + fprintf(stderr,"open %s : %s\n",fnametmp,strerror(errno)); + return -1; + } + + cleanup_fname = fnametmp; + + if (!am_server && verbose) + printf("%s\n",fname); + + /* recv file data */ + receive_data(f_in,buf,fd2,fname); + + close(fd1); + close(fd2); + + if (verbose > 2) + fprintf(stderr,"renaming %s to %s\n",fnametmp,fname); + + if (make_backups) { + char fnamebak[MAXPATHLEN]; + sprintf(fnamebak,"%s%s",fname,backup_suffix); + if (rename(fname,fnamebak) != 0) { + fprintf(stderr,"rename %s %s : %s\n",fname,fnamebak,strerror(errno)); + exit(1); + } + } + + /* move tmp file over real file */ + if (rename(fnametmp,fname) != 0) { + fprintf(stderr,"rename %s -> %s : %s\n", + fnametmp,fname,strerror(errno)); + } + + cleanup_fname = NULL; + + unmap_file(buf,st.st_size); + + set_perms(fname,&flist->files[i],NULL,0); + } + + if (verbose > 2) + fprintf(stderr,"recv_files finished\n"); + + return 0; +} + + + +off_t send_files(struct file_list *flist,int f_out,int f_in) +{ + int fd; + struct sum_struct *s; + char *buf; + struct stat st; + char fname[MAXPATHLEN]; + off_t total=0; + int i; + + if (verbose > 2) + fprintf(stderr,"send_files starting\n"); + + while (1) + { + i = read_int(f_in); + if (i == -1) break; + + fname[0] = 0; + if (flist->files[i].dir) { + strcpy(fname,flist->files[i].dir); + strcat(fname,"/"); + } + strcat(fname,flist->files[i].name); + + if (verbose > 2) + fprintf(stderr,"send_files(%d,%s)\n",i,fname); + + if (dry_run) { + if (!am_server && verbose) + printf("%s\n",fname); + write_int(f_out,i); + continue; + } + + s = receive_sums(f_in); + if (!s) { + fprintf(stderr,"receive_sums failed\n"); + return -1; + } + + fd = open(fname,O_RDONLY); + if (fd == -1) { + fprintf(stderr,"send_files failed to open %s: %s\n", + fname,strerror(errno)); + continue; + } + + /* map the local file */ + if (fstat(fd,&st) != 0) { + fprintf(stderr,"fstat failed : %s\n",strerror(errno)); + return -1; + } + + if (st.st_size > 0) { + buf = map_file(fd,st.st_size); + if (!buf) { + fprintf(stderr,"map_file failed : %s\n",strerror(errno)); + return -1; + } + } else { + buf = NULL; + } + + if (verbose > 2) + fprintf(stderr,"send_files mapped %s of size %d\n", + fname,(int)st.st_size); + + write_int(f_out,i); + + write_int(f_out,s->count); + write_int(f_out,s->n); + write_int(f_out,s->remainder); + + if (verbose > 2) + fprintf(stderr,"calling match_sums %s\n",fname); + + if (!am_server && verbose) + printf("%s\n",fname); + + match_sums(f_out,s,buf,st.st_size); + write_flush(f_out); + + unmap_file(buf,st.st_size); + close(fd); + + free_sums(s); + + if (verbose > 2) + fprintf(stderr,"sender finished %s\n",fname); + + total += st.st_size; + } + + match_report(); + + write_int(f_out,-1); + write_flush(f_out); + + return total; +} + + + +void generate_files(int f,struct file_list *flist,char *local_name) +{ + int i; + + if (verbose > 2) + fprintf(stderr,"generator starting pid=%d count=%d\n", + (int)getpid(),flist->count); + + for (i = 0; i < flist->count; i++) { + if (!flist->files[i].name) continue; + if (S_ISDIR(flist->files[i].mode)) { + if (dry_run) continue; + if (mkdir(flist->files[i].name,flist->files[i].mode) != 0 && + errno != EEXIST) { + fprintf(stderr,"mkdir %s : %s\n", + flist->files[i].name,strerror(errno)); + } + continue; + } + recv_generator(local_name?local_name:flist->files[i].name, + flist,i,f); + } + write_int(f,-1); + write_flush(f); + if (verbose > 2) + fprintf(stderr,"generator wrote %d\n",write_total()); +} diff --git a/rsync.h b/rsync.h new file mode 100644 index 00000000..87b96730 --- /dev/null +++ b/rsync.h @@ -0,0 +1,228 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#define BLOCK_SIZE 700 +#define RSYNC_RSH_ENV "RSYNC_RSH" +#define RSYNC_RSH "rsh" +#define RSYNC_NAME "rsync" +#define BACKUP_SUFFIX "~" + +/* update this if you make incompatible changes */ +#define PROTOCOL_VERSION 9 + +/* block size to write files in */ +#define WRITE_BLOCK_SIZE (32*1024) + +#include "config.h" + +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#ifdef HAVE_SYS_PARAM_H +#include +#endif + +#ifdef STDC_HEADERS +# include +# include +#endif + +#ifdef HAVE_COMPAT_H +#include +#endif + +#ifdef HAVE_MALLOC_H +#include +#endif + +#ifdef TIME_WITH_SYS_TIME +#include +#include +#else +#ifdef HAVE_SYS_TIME_H +#include +#else +#include +#endif +#endif + +#ifdef HAVE_FCNTL_H +#include +#else +#ifdef HAVE_SYS_FCNTL_H +#include +#endif +#endif + +#include + +#include +#ifdef HAVE_SYS_WAIT_H +#include +#endif +#ifdef HAVE_CTYPE_H +#include +#endif +#ifdef HAVE_GRP_H +#include +#endif +#include + +#include +#ifdef HAVE_UTIME_H +#include +#endif + +#ifdef HAVE_FNMATCH +#include +#else +#include "lib/fnmatch.h" +#endif + +#ifdef HAVE_GETOPT_LONG +#include +#else +#include "lib/getopt.h" +#endif + + +#ifndef S_ISLNK +#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK) +#endif + +#ifndef uchar +#define uchar unsigned char +#endif + +#ifndef int32 +#if (SIZEOF_INT == 4) +#define int32 int +#elif (SIZEOF_LONG == 4) +#define int32 long +#elif (SIZEOF_SHORT == 4) +#define int32 short +#endif +#endif + +#ifndef uint32 +#define uint32 unsigned int32 +#endif + + +#ifndef MIN +#define MIN(a,b) ((a)<(b)?(a):(b)) +#endif + +#ifndef MAX +#define MAX(a,b) ((a)>(b)?(a):(b)) +#endif + +/* the length of the md4 checksum */ +#define SUM_LENGTH 16 + +#ifndef MAXPATHLEN +#define MAXPATHLEN 1024 +#endif + +struct file_struct { + time_t modtime; + off_t length; + mode_t mode; + dev_t dev; + uid_t uid; + gid_t gid; + char *name; + char *dir; + char *link; + char sum[SUM_LENGTH]; +}; + +struct file_list { + int count; + int malloced; + struct file_struct *files; +}; + +struct sum_buf { + off_t offset; /* offset in file of this chunk */ + int len; /* length of chunk of file */ + int i; /* index of this chunk */ + uint32 sum1; /* simple checksum */ + char sum2[SUM_LENGTH]; /* md4 checksum */ +}; + +struct sum_struct { + off_t flength; /* total file length */ + int count; /* how many chunks */ + int remainder; /* flength % block_length */ + int n; /* block_length */ + struct sum_buf *sums; /* points to info for each chunk */ +}; + + +#include "byteorder.h" +#include "version.h" +#include "proto.h" +#include "md4.h" + +#if !HAVE_STRERROR +extern char *sys_errlist[]; +#define strerror(i) sys_errlist[i] +#endif + +#ifndef HAVE_STRCHR +# define strchr index +# define strrchr rindex +#endif + +#if HAVE_DIRENT_H +# include +#else +# define dirent direct +# if HAVE_SYS_NDIR_H +# include +# endif +# if HAVE_SYS_DIR_H +# include +# endif +# if HAVE_NDIR_H +# include +# endif +#endif + +#ifndef HAVE_ERRNO_DECL +extern int errno; +#endif + +#ifndef HAVE_BCOPY +#define bcopy(src,dest,n) memcpy(dest,src,n) +#endif + +#ifndef HAVE_BZERO +#define bzero(buf,n) memset(buf,0,n) +#endif + +#define SUPPORT_LINKS (HAVE_READLINK && defined(S_ISLNK)) + +#if !SUPPORT_LINKS +#define lstat stat +#endif diff --git a/tech_report.tex b/tech_report.tex new file mode 100644 index 00000000..e1ea5c72 --- /dev/null +++ b/tech_report.tex @@ -0,0 +1,310 @@ +\documentclass[a4paper]{article} +\begin{document} + + +\title{The rsync algorithm} + +\author{Andrew Tridgell \quad\quad Paul Mackerras\\ +Department of Computer Science \\ +Australian National University \\ +Canberra, ACT 0200, Australia} + +\maketitle + +\begin{abstract} + This report presents an algorithm for updating a file on one machine + to be identical to a file on another machine. We assume that the + two machines are connected by a low-bandwidth high-latency + bi-directional communications link. The algorithm identifies parts + of the source file which are identical to some part of the + destination file, and only sends those parts which cannot be matched + in this way. Effectively, the algorithm computes a set of + differences without having both files on the same machine. The + algorithm works best when the files are similar, but will also + function correctly and reasonably efficiently when the files are + quite different. +\end{abstract} + +\section{The problem} + +Imagine you have two files, $A$ and $B$, and you wish to update $B$ to be +the same as $A$. The obvious method is to copy $A$ onto $B$. + +Now imagine that the two files are on machines connected by a slow +communications link, for example a dial up IP link. If $A$ is large, +copying $A$ onto $B$ will be slow. To make it faster you could +compress $A$ before sending it, but that will usually only gain a +factor of 2 to 4. + +Now assume that $A$ and $B$ are quite similar, perhaps both derived +from the same original file. To really speed things up you would need +to take advantage of this similarity. A common method is to send just +the differences between $A$ and $B$ down the link and then use this +list of differences to reconstruct the file. + +The problem is that the normal methods for creating a set of +differences between two files rely on being able to read both files. +Thus they require that both files are available beforehand at one end +of the link. If they are not both available on the same machine, +these algorithms cannot be used (once you had copied the file over, +you wouldn't need the differences). This is the problem that rsync +addresses. + +The rsync algorithm efficiently computes which parts of a source file +match some part of an existing destination file. These parts need not +be sent across the link; all that is needed is a reference to the part +of the destination file. Only parts of the source file which are not +matched in this way need to be sent verbatim. The receiver can then +construct a copy of the source file using the references to parts of +the existing destination file and the verbatim material. + +Trivially, the data sent to the receiver can be compressed using any +of a range of common compression algorithms, for further speed +improvements. + +\section{The rsync algorithm} + +Suppose we have two general purpose computers $\alpha$ and $\beta$. +Computer $\alpha$ has access to a file $A$ and $\beta$ has access to +file $B$, where $A$ and $B$ are ``similar''. There is a slow +communications link between $\alpha$ and $\beta$. + +The rsync algorithm consists of the following steps: + +\begin{enumerate} +\item $\beta$ splits the file $B$ into a series of non-overlapping + fixed-sized blocks of size S bytes\footnote{We have found that + values of S between 500 and 1000 are quite good for most purposes}. + The last block may be shorter than $S$ bytes. + +\item For each of these blocks $\beta$ calculates two checksums: + a weak ``rolling'' 32-bit checksum (described below) and a strong + 128-bit MD4 checksum. + +\item $\beta$ sends these checksums to $\alpha$. + +\item $\alpha$ searches through $A$ to find all blocks of length $S$ + bytes (at any offset, not just multiples of $S$) that have the same + weak and strong checksum as one of the blocks of $B$. This can be + done in a single pass very quickly using a special property of the + rolling checksum described below. + +\item $\alpha$ sends $\beta$ a sequence of instructions for + constructing a copy of $A$. Each instruction is either a reference + to a block of $B$, or literal data. Literal data is sent only for + those sections of $A$ which did not match any of the blocks of $B$. +\end{enumerate} + +The end result is that $\beta$ gets a copy of $A$, but only the pieces +of $A$ that are not found in $B$ (plus a small amount of data for +checksums and block indexes) are sent over the link. The algorithm +also only requires one round trip, which minimises the impact of the +link latency. + +The most important details of the algorithm are the rolling checksum +and the associated multi-alternate search mechanism which allows the +all-offsets checksum search to proceed very quickly. These will be +discussed in greater detail below. + +\section{Rolling checksum} + +The weak rolling checksum used in the rsync algorithm needs to have +the property that it is very cheap to calculate the checksum of a +buffer $X_2 .. X_{n+1}$ given the checksum of buffer $X_1 .. X_n$ and +the values of the bytes $X_1$ and $X_{n+1}$. + +The weak checksum algorithm we used in our implementation was inspired +by Mark Adler's adler-32 checksum. Our checksum is defined by +$$ a(k,l) = (\sum_{i=k}^l X_i) \bmod M $$ +$$ b(k,l) = (\sum_{i=k}^l (l-i+1)X_i) \bmod M $$ +$$ s(k,l) = a(k,l) + 2^{16} b(k,l) $$ + +where $s(k,l)$ is the rolling checksum of the bytes $X_k \ldots X_l$. +For simplicity and speed, we use $M = 2^{16}$. + +The important property of this checksum is that successive values can +be computed very efficiently using the recurrence relations + +$$ a(k+1,l+1) = (a(k,l) - X_k + X_{l+1}) \bmod M $$ +$$ b(k+1,l+1) = (b(k,l) - (l-k+1) X_k + a(k+1,l+1)) \bmod M $$ + +Thus the checksum can be calculated for blocks of length S at all +possible offsets within a file in a ``rolling'' fashion, with very +little computation at each point. + +Despite its simplicity, this checksum was found to be quite adequate as +a first level check for a match of two file blocks. We have found in +practice that the probability of this checksum matching when the +blocks are not equal is quite low. This is important because the much +more expensive strong checksum must be calculated for each block where +the weak checksum matches. + +\section{Checksum searching} + +Once $\alpha$ has received the list of checksums of the blocks of $B$, +it must search $A$ for any blocks at any offset that match the +checksum of some block of $B$. The basic strategy is to compute the +32-bit rolling checksum for a block of length $S$ starting at each +byte of $A$ in turn, and for each checksum, search the list for a +match. To do this our implementation uses a +simple 3 level searching scheme. + +The first level uses a 16-bit hash of the 32-bit rolling checksum and +a $2^{16}$ entry hash table. The list of checksum values (i.e., the +checksums from the blocks of $B$) is sorted according to the 16-bit +hash of the 32-bit rolling checksum. Each entry in the hash table +points to the first element of the list for that hash value, or +contains a null value if no element of the list has that hash value. + +At each offset in the file the 32-bit rolling checksum and its 16-bit +hash are calculated. If the hash table entry for that hash value is +not a null value, the second level check is invoked. + +The second level check involves scanning the sorted checksum list +starting with the entry pointed to by the hash table entry, looking +for an entry whose 32-bit rolling checksum matches the current value. +The scan terminates when it reaches an entry whose 16-bit hash +differs. If this search finds a match, the third level check is +invoked. + +The third level check involves calculating the strong checksum for the +current offset in the file and comparing it with the strong checksum +value in the current list entry. If the two strong checksums match, +we assume that we have found a block of $A$ which matches a block of +$B$. In fact the blocks could be different, but the probability of +this is microscopic, and in practice this is a reasonable assumption. + +When a match is found, $\alpha$ sends $\beta$ the data in $A$ between +the current file offset and the end of the previous match, followed by +the index of the block in $B$ that matched. This data is sent +immediately a match is found, which allows us to overlap the +communication with further computation. + +If no match is found at a given offset in the file, the rolling +checksum is updated to the next offset and the search proceeds. If a +match is found, the search is restarted at the end of the matched +block. This strategy saves a considerable amount of computation for +the common case where the two files are nearly identical. In +addition, it would be a simple matter to encode the block indexes as +runs, for the common case where a portion of $A$ matches a series of +blocks of $B$ in order. + +\section{Pipelining} + +The above sections describe the process for constructing a copy of one +file on a remote system. If we have a several files to copy, we can +gain a considerable latency advantage by pipelining the process. + +This involves $\beta$ initiating two independent processes. One of the +processes generates and sends the checksums to $\alpha$ while the +other receives the difference information from $\alpha$ and +reconstructs the files. + +If the communications link is buffered then these two processes can +proceed independently and the link should be kept fully utilised in +both directions for most of the time. + +\section{Results} + +To test the algorithm, tar files were created of the Linux kernel +sources for two versions of the kernel. The two kernel versions were +1.99.10 and 2.0.0. These tar files are approximately 24MB in size and +are separated by 5 released patch levels. + +Out of the 2441 files in the 1.99.10 release, 291 files had changed in +the 2.0.0 release, 19 files had been removed and 25 files had been +added. + +A ``diff'' of the two tar files using the standard GNU diff utility +produced over 32 thousand lines of output totalling 2.1 MB. + +The following table shows the results for rsync between the two files +with a varying block size.\footnote{All the tests in this section were + carried out using rsync version 0.5} + +\vspace*{5mm} +\begin{tabular}{|l|l|l|l|l|l|l|} \hline +{\bf block} & {\bf matches} & {\bf tag} & {\bf false} & {\bf data} & {\bf written} & {\bf read} \\ +{\bf size} & & {\bf hits} & {\bf alarms} & & & \\ \hline \hline + +300 & 64247 & 3817434 & 948 & 5312200 & 5629158 & 1632284 \\ \hline +500 & 46989 & 620013 & 64 & 1091900 & 1283906 & 979384 \\ \hline +700 & 33255 & 571970 & 22 & 1307800 & 1444346 & 699564 \\ \hline +900 & 25686 & 525058 & 24 & 1469500 & 1575438 & 544124 \\ \hline +1100 & 20848 & 496844 & 21 & 1654500 & 1740838 & 445204 \\ \hline +\end{tabular} +\vspace*{5mm} + +In each case, the CPU time taken was less than the +time it takes to run ``diff'' on the two files.\footnote{The wall + clock time was approximately 2 minutes per run on a 50 MHz SPARC 10 + running SunOS, using rsh over loopback for communication. GNU diff + took about 4 minutes.} + +The columns in the table are as follows: + +\begin{description} +\item [block size] The size in bytes of the checksummed blocks. +\item [matches] The number of times a block of $B$ was found in $A$. +\item [tag hits] The number of times the 16 bit hash of the rolling + checksum matched a hash of one of the checksums from $B$. +\item [false alarms] The number of times the 32 bit rolling checksum + matched but the strong checksum didn't. +\item [data] The amount of file data transferred verbatim, in bytes. +\item [written] The total number of bytes written by $\alpha$ + including protocol overheads. This is almost all file data. +\item [read] The total number of bytes read by $\alpha$ including + protocol overheads. This is almost all checksum information. +\end{description} + +The results demonstrate that for block sizes above 300 bytes, only a +small fraction (around 5\%) of the file was transferred. The amount +transferred was also considerably less than the size of the diff file +that would have been transferred if the diff/patch method of updating +a remote file was used. + +The checksums themselves took up a considerable amount of space, +although much less than the size of the data transferred in each +case. Each pair of checksums consumes 20 bytes: 4 bytes for the +rolling checksum plus 16 bytes for the 128-bit MD4 checksum. + +The number of false alarms was less than $1/1000$ of the number of +true matches, indicating that the 32 bit rolling checksum is quite +good at screening out false matches. + +The number of tag hits indicates that the second level of the +checksum search algorithm was invoked about once every 50 +characters. This is quite high because the total number of blocks in +the file is a large fraction of the size of the tag hash table. For +smaller files we would expect the tag hit rate to be much closer to +the number of matches. For extremely large files, we should probably +increase the size of the hash table. + +The next table shows similar results for a much smaller set of files. +In this case the files were not packed into a tar file first. Rather, +rsync was invoked with an option to recursively descend the directory +tree. The files used were from two source releases of another software +package called Samba. The total source code size is 1.7 MB and the +diff between the two releases is 4155 lines long totalling 120 kB. + +\vspace*{5mm} +\begin{tabular}{|l|l|l|l|l|l|l|} \hline +{\bf block} & {\bf matches} & {\bf tag} & {\bf false} & {\bf data} & {\bf written} & {\bf read} \\ +{\bf size} & & {\bf hits} & {\bf alarms} & & & \\ \hline \hline + +300 & 3727 & 3899 & 0 & 129775 & 153999 & 83948 \\ \hline +500 & 2158 & 2325 & 0 & 171574 & 189330 & 50908 \\ \hline +700 & 1517 & 1649 & 0 & 195024 & 210144 & 36828 \\ \hline +900 & 1156 & 1281 & 0 & 222847 & 236471 & 29048 \\ \hline +1100 & 921 & 1049 & 0 & 250073 & 262725 & 23988 \\ \hline +\end{tabular} +\vspace*{5mm} + + +\section{Availability} + +An implementation of rsync which provides a convenient interface +similar to the common UNIX command rcp has been written and is +available for download from ftp://samba.anu.edu.au/pub/rsync. + +\end{document} diff --git a/util.c b/util.c new file mode 100644 index 00000000..ad4dec95 --- /dev/null +++ b/util.c @@ -0,0 +1,229 @@ +/* + Copyright (C) Andrew Tridgell 1996 + Copyright (C) Paul Mackerras 1996 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +/* + Utilities used in rsync + + tridge, June 1996 + */ +#include "rsync.h" + +static int total_written = 0; +static int total_read = 0; + +extern int verbose; + +int write_total(void) +{ + return total_written; +} + +int read_total(void) +{ + return total_read; +} + +void write_int(int f,int x) +{ + char b[4]; + SIVAL(b,0,x); + if (write(f,b,4) != 4) { + fprintf(stderr,"write_int failed : %s\n",strerror(errno)); + exit(1); + } + total_written += 4; +} + +void write_buf(int f,char *buf,int len) +{ + if (write(f,buf,len) != len) { + fprintf(stderr,"write_buf failed : %s\n",strerror(errno)); + exit(1); + } + total_written += len; +} + +void write_flush(int f) +{ +} + + +int readfd(int fd,char *buffer,int N) +{ + int ret; + int total=0; + + while (total < N) + { + ret = read(fd,buffer + total,N - total); + + if (ret <= 0) + return total; + total += ret; + } + return total; +} + + +int read_int(int f) +{ + char b[4]; + if (readfd(f,b,4) != 4) { + if (verbose > 1) + fprintf(stderr,"Error reading %d bytes : %s\n",4,strerror(errno)); + exit(1); + } + total_read += 4; + return IVAL(b,0); +} + +void read_buf(int f,char *buf,int len) +{ + if (readfd(f,buf,len) != len) { + if (verbose > 1) + fprintf(stderr,"Error reading %d bytes : %s\n",len,strerror(errno)); + exit(1); + } + total_read += len; +} + + +char *map_file(int fd,off_t len) +{ + char *ret = (char *)mmap(NULL,len,PROT_READ,MAP_SHARED,fd,0); + return ret; +} + +void unmap_file(char *buf,off_t len) +{ + if (len > 0) + munmap(buf,len); +} + + +int read_write(int fd_in,int fd_out,int size) +{ + static char *buf=NULL; + static int bufsize = WRITE_BLOCK_SIZE; + int total=0; + + if (!buf) { + buf = (char *)malloc(bufsize); + if (!buf) out_of_memory("read_write"); + } + + while (total < size) { + int n = MIN(size-total,bufsize); + read_buf(fd_in,buf,n); + if (write(fd_out,buf,n) != n) + return total; + total += n; + } + return total; +} + + +/* this is taken from CVS */ +int piped_child(char **command,int *f_in,int *f_out) +{ + int pid; + int to_child_pipe[2]; + int from_child_pipe[2]; + + if (pipe(to_child_pipe) < 0 || + pipe(from_child_pipe) < 0) { + fprintf(stderr,"pipe: %s\n",strerror(errno)); + exit(1); + } + + + pid = fork(); + if (pid < 0) { + fprintf(stderr,"fork: %s\n",strerror(errno)); + exit(1); + } + + if (pid == 0) + { + if (dup2(to_child_pipe[0], STDIN_FILENO) < 0 || + close(to_child_pipe[1]) < 0 || + close(from_child_pipe[0]) < 0 || + dup2(from_child_pipe[1], STDOUT_FILENO) < 0) { + fprintf(stderr,"Failed to dup/close : %s\n",strerror(errno)); + exit(1); + } + execvp(command[0], command); + fprintf(stderr,"Failed to exec %s : %s\n", + command[0],strerror(errno)); + exit(1); + } + + if (close(from_child_pipe[1]) < 0 || + close(to_child_pipe[0]) < 0) { + fprintf(stderr,"Failed to close : %s\n",strerror(errno)); + exit(1); + } + + *f_in = from_child_pipe[0]; + *f_out = to_child_pipe[1]; + + return pid; +} + + +void out_of_memory(char *str) +{ + fprintf(stderr,"out of memory in %s\n",str); + exit(1); +} + + +#ifndef HAVE_STRDUP + char *strdup(char *s) +{ + int l = strlen(s) + 1; + char *ret = (char *)malloc(l); + if (ret) + strcpy(ret,s); + return ret; +} +#endif + + +int set_modtime(char *fname,time_t modtime) +{ +#ifdef HAVE_UTIME_H + struct utimbuf tbuf; + tbuf.actime = time(NULL); + tbuf.modtime = modtime; + return utime(fname,&tbuf); +#elif defined(HAVE_UTIME) + time_t t[2]; + t[0] = time(NULL); + t[1] = modtime; + return utime(fname,t); +#else + struct timeval t[2]; + t[0].tv_sec = time(NULL); + t[0].tv_usec = 0; + t[1].tv_sec = modtime; + t[1].tv_usec = 0; + return utimes(fname,t); +#endif +} diff --git a/version.h b/version.h new file mode 100644 index 00000000..02927cfe --- /dev/null +++ b/version.h @@ -0,0 +1 @@ +#define VERSION "0.9"