Import patchsync version 2

[utils/utils.git] / patchsync
diff --git a/patchsync b/patchsync

index 8c20a1d..07e2f0f 100755 (executable)
--- a/patchsync
+++ b/patchsync
@@ -1,93 +1,445 @@
  #!/bin/bash
-# patchsync: Synchronize a branch with a patch (e.g., acls.diff and rsync-acl).
-# usage: patchsync <staging> {'' | --dry-run} [branch | patch]
-# <staging>: the staging directory, containing a shell script "settings" and an optional rsync filter file "filter"
-#     "settings" must contain:
-#         - $trunk, $patch, $branch paths interpreted relative to the staging dir
-#         - shell function do_diff: trunk, branch => patch
-#             e.g., for rsync: diff -uprN trunk branch | sed -re 's/^(\+\+\+|---) ([^\t]+).*$/\1 \2/' | grep -v '^diff' >patch
-#         - shell function do_patch: branch copied from trunk, patch => branch
-#             e.g., for rsync: patch --no-backup-if-mismatch -d branch/ -p1 <patch
-# --dry-run: don't note anything
-# branch | patch: overwrite the specified thing in case of a conflict (e.g., on the first run)
+# patchsync: Synchronizes a trunk, a branch, and a patch containing the
+# differences between them.
+# Version 2
+# -- Matt McCutchen
+#
+# usage: patchsync [--dry-run] <staging> [branch | patch]
+#
+# Patchsync is invoked on a "staging directory", which holds some configuration
+# (including the locations of the trunk, patch, and branch it is to synchronize)
+# and some synchronization state.  It determines whether each of the trunk,
+# patch, and branch has changed since the last successful synchronization and
+# updates the patch or branch as appropriate:
+# 
+# Changed since last sync   Patchsync's behavior
+# -------------------------------------------------
+# Nothing                   Do nothing
+# Trunk only                Update branch
+# Patch but not branch      Update branch
+# Branch but not patch      Update patch
+# Branch and patch          Complain about conflict
+#
+# <staging>: path to the staging directory
+#
+# --dry-run: show what would happen without actually modifying the trunk, patch,
+#   branch, or synchronization state
+#
+# {branch | patch}: force patchsync to update the specified thing from the
+#   others instead of deciding automatically; you can use this argument to
+#   revert or to resolve a conflict
+#
+# CAVEAT: Patchsync might make a mess if the trunk, patch, or branch is
+# modified in a way not hidden by the filters while patchsync is running!
+#
+# CAVEAT: Patchsync only notices creations, deletions, and modifications of
+# regular files in the trunk and branch, not other changes like empty directory
+# creations.  If you make a change like that to the trunk, you can force
+# patchsync to update the branch.
+#
+# Staging directory format: A staging directory contains the following items:
+#   "trunk", trunk directory or symlink to it
+#   "patch", patch regular file or symlink to it
+#   "branch", branch directory or symlink to it
+#     [Why symlinks?  Expose as much as possible to tools like symlinks(8).]
+#   "settings", shell script defining the following shell functions:
+#     - do_diff <trunk> <branch> <write-patch>: diff the specified trunk and
+#         branch and write the patch to the specified file; define it to use
+#         your favorite diff format
+#       - example: exitoneok diff -urN $1 $2 \
+#                    | sed -re 's/^(\+\+\+|---) ([^\t]+).*$/\1 \2/' \
+#                    | exitoneok grep -v '^diff' >$3
+#     - do_patch <patch> <convert-trunk-to-branch>: apply the patch to the
+#         specified trunk; define it to understand your favorite diff format
+#       - example: patch --no-backup-if-mismatch -d $2/ -p1 <$1
+#     - Note: patchsync runs these functions under "pipefail", but the
+#         "set -e" it uses does not propagate into the functions.  Patchsync
+#         provides an "exitoneok" function you can use to treat an exit code of
+#         1 as 0.  You might want to && successive commands together.
+#     - There are several possible ways to handle failed hunks.  The simplest
+#         and safest is to make do_patch fail, but that's inconvenient for the
+#         user, who must investigate the *.rej files in the staging directory
+#         and either fix the patch or fix the branch and force updating the
+#         patch.  One could make do_patch succeed, but if the user then modifies
+#         the branch, the failed hunks will merely be dropped from the patch,
+#         which is probably unacceptable.  The clever way is to let do_patch
+#         succeed but make do_diff fail if any *.rej files exist in the branch.
+#   "filters" (optional): rsync filters to use when accessing the trunk and
+#     branch; hide filters apply to reading, protect filters to writing;
+#     hint: you probably want to hide and protect build outputs
+#
+# Other usage: patchsync --new <trunk> <patch> <branch> <staging>
+# Mostly sets up a new staging directory for the given trunk, branch, and patch
+# at the given location.  You still have to provide settings, and filters if
+# you want them.
+# - If one of the patch or branch exists, the other will be calculated when
+#   you first synchronize.
+# - If both exist, you will get a conflict when you first synchronize and you
+#   will need to specify which to update.
+# - If neither exists, you get an empty patch and a branch identical to the trunk.
+
+# Disable branch/.patchsync support because it's a bad idea in general, and the
+# cyclic symlink confuses Eclipse in particular. -- Matt 2006.11.30
  
  set -e
+trap "echo 'Patchsync encountered an unexpected error!  ABORTING!' 1>&2; exit 2;" ERR
+set -o errtrace
+set -o pipefail
+
+# Make sure we have rsync.
+type rsync >/dev/null 2>&1 || \
+       { echo "Patchsync requires rsync, but there's no rsync on your path!" 1>&2; exit 1; }
+# If a cp2 is available, use it; otherwise define our own.
+type cp2 >/dev/null 2>&1 || function cp2 { rsync -rltE --chmod=ugo=rwx "$@"; }
+
+function exitoneok {
+       "$@" || [ $? == 1 ]
+}
+
+# wdpp_from <B> ==> the shortest relative prefix-path from directory B to the current directory
+# (prefix-path means it ends in a slash unless it's `' which means '.')
+# "patchsync" uses this to link-dest when copying the branch out.
+# "patchsync --new" uses it to reverse the staging dir path when creating symlinks.
+function wdpp_from {
+       AtoB="$1"
+       # Start with symlink-followed absolute prefix-paths without the initial slash.
+       # NOT bash builtin pwd; it tells us how we got here, not where we are
+       pA="$(/bin/pwd)/"
+       pA="${pA#/}"
+       pB="$( (cd "$AtoB" && /bin/pwd) )/"
+       pB="${pB#/}"
+       # Lop off the longest common prefix of components that we can.
+       # While first components are equal...
+       # (Empty correctly doesn't equal remaining)
+       while { [ -n "$pA" ] || [ -n "$pB" ]; } && [ "${pA%%/*}" == "${pB%%/*}" ]; do
+               # Remove them.
+               pA="${pA#*/}"
+               pB="${pB#*/}"
+       done
+       ans="$pA"
+       # Translate remaining components of $pB to ../s
+       while [ -n "$pB" ]; do
+               ans="$ans../"
+               pB="${pB#*/}"
+       done
+       # Double check; add dot to the end to enforce ending in a slash and handle empty ans
+       (cd "$AtoB" && [ "$ans." -ef /proc/self/fd/3 ]) 3<.
+       # Yay
+       echo "$ans"
+}
+
+function hash_file {
+       # Lop off the filename and binary indicator
+       sha1sum -b "$1" | sed -re 's/^([^ ]*).*$/\1/'
+}
+
+function patchsync_sync {
+
+if [ "$1" == --dry-run ]; then
+       echo "Dry run mode."
+       dryrun=1
+       shift
+fi
  
  staging="$1"
-if ! [ -r "$staging/settings" ]; then
+if [ -r "$staging/settings" ]; then
+       echo "Using staging dir $staging"
+else
         echo "Specify a staging directory containing a settings file!" 1>&2
         exit 1
  fi
-cd "$staging"
-
-dryrun="$2"
-whichtochange="$3"
-CP2t=(cp2 --del)
-CP2in=("${CP2t[@]}" --filter='. filter' --delete-excluded)
-CP2out=("${CP2t[@]}" --filter='. filter' --no-t --checksum) # be nice to mtimes
-export LC_COLLATE="C"
+cd "$staging" || { echo "Failed to enter staging dir!" 1>&2; exit 1; }
+shift
  
  . settings
+type do_diff >/dev/null 2>&1 || { echo "do_diff is not defined!" 1>&2; exit 1; }
+type do_patch >/dev/null 2>&1 || { echo "do_patch is not defined!" 1>&2; exit 1; }
  
-echo "Copying in, checking for changes..."
-"${CP2in[@]}" --link-dest=../trunk-save -i "$trunk/" trunk/
-diff -r trunk{-save,} >/dev/null || trunkch=ch
-"${CP2in[@]}" --link-dest=../branch-save -i "$branch/" branch/
-diff -r branch{-save,} >/dev/null || branchch=ch
-cp2 --link-dest=../patch-save -i "$patch" patch
-diff -r patch{-save,} >/dev/null || patchch=ch
-echo "Done."
-
-if [ $trunkch ] || [ $branchch ] || [ $patchch ]; then
-       # Something changed.
-       # Update either branch or patch, whichever didn't change.
-       # If trunk changes, update branch, not patch.
-       if [ "$whichtochange" == 'branch' ] || ! [ $branchch ]; then
-               echo "Updating branch..."
-               "${CP2t[@]}" trunk/ branch/
-               do_patch
-               echo "Done."
-               copyout=1
-       elif [ "$whichtochange" == 'patch' ] || ! [ $patchch ]; then
-               echo "Updating patch..."
-               do_diff
-               echo "Done."
-               copyout=1
+whichtoupdate="$1"
+# patchsync --new doesn't need this any more except for identical-branch
+#if [ -z "$whichtoupdate" ] && [ -s whichtoupdate ]; then
+#      # Hook for patchsync --new
+#      whichtoupdate="$(< whichtoupdate)"
+#      echo "Updating $whichtoupdate according to staging dir."
+#el
+if [ -n "$whichtoupdate" ]; then
+       echo "Updating $whichtoupdate according to command line argument."
+else
+       echo "Synchronizing."
+fi
+
+filteropts=()
+! [ -e filters ] || filteropts=("${filteropts[@]}" --filter='. filters')
+# 'R *' or 'S *' disables filtering on the staging dir side.
+
+COPYIN=(cp2 --del --filter='R *' "${filteropts[@]}")
+COPYOUT=(cp2 --del --filter='S *' "${filteropts[@]}" --no-t --checksum) # be nice to mtimes
+
+# hash_dir foo/ ==> a hash code covering all of the shown files in foo/
+function hash_dir {
+       # Itemize the dir, extract filenames, hash the files, and hash the list of
+       # hashes.
+       "${COPYIN[@]}" -i -n $1 nonexistent/ \
+               | sed -n -e '/^>f/{ s/^[^ ]* //; p }' \
+               | (cd $1 && xargs --no-run-if-empty --delimiter='\n' sha1sum -b) \
+               | hash_file /dev/stdin
+}
+
+echo "Checking for changes..."
+hash_dir trunk/ >trunk-new-hash
+cmp trunk-{save,new}-hash &>/dev/null || { trunkch=1; echo "Trunk has changed"; }
+hash_file patch >patch-new-hash
+cmp patch-{save,new}-hash &>/dev/null || { patchch=1; echo "Patch has changed"; }
+hash_dir branch/ >branch-new-hash
+cmp branch-{save,new}-hash &>/dev/null || { branchch=1; echo "Branch has changed"; }
+
+# If we're in synchronization mode, decide what to update.
+if [ -z "$whichtoupdate" ] && [[ -n $trunkch || -n $branchch || -n $patchch ]]; then
+       if [ -e identical-branch-flag ] && ! [ $patchch ] && ! [ $branchch ]; then
+               # We still want to create an identical branch.
+               whichtoupdate=identical-branch
+       elif ! [ $branchch ]; then
+               # Trunk, patch, or both changed.  Update branch.
+               whichtoupdate=branch
+       elif ! [ $patchch ]; then
+               # Branch changed, and trunk may have also changed.  Update patch.
+               whichtoupdate=patch
         else
-               echo "Conflict: both branch and patch changed!"
-               echo "Run patchsync <staging> <dry-run> [branch | patch] to"
-               echo "update the specified area from the others."
-               exit 1
+               # Branch and patch both changed.  A message appears later.
+               whichtoupdate=conflict
         fi
-else
-       # Easy case
-       echo "Nothing changed."
+       #echo "Synchronization will update $whichtoupdate."
  fi
  
-if [ $copyout ]; then
-if ! [ $dryrun ]; then
-       ! [ -e lock ] || { echo "Locked!  Please fix!"; exit 1; }
-       echo "patchsync lock file pid $$ date $(date)" >lock
+if [ -n "$whichtoupdate" ]; then
+
+# Always show what would happen if patch-new and branch-new were copied out.
+# (If there was a problem creating one of them, patchsync would have just
+# deleted it.)  But only actually copy them out and update synchronization
+# state if no error.
+error=
+
+# Don't let stuff from an old run confuse us.
+rm -rf patch-new branch-new
+
+function prepare_branch {
+       echo "Preparing updated branch..."
+       # No link-dest because we will modify and then link-dest when copying out
+       "${COPYIN[@]}" trunk/ branch-new/
+       do_patch patch branch-new || \
+               { error=1; echo "Failed to prepare updated branch!" 1>&2; rm -rf branch-new; }
+}
+
+function prepare_patch {
+       echo "Preparing updated patch..."
+       # Link-dest is fine because these are temporary read-only copies
+       "${COPYIN[@]}" --link-dest=../trunk/ trunk/ trunk-tmp/
+       "${COPYIN[@]}" --link-dest=../branch/ branch/ branch-tmp/
+       do_diff trunk-tmp branch-tmp patch-new || \
+               { error=1; echo "Failed to prepare updated patch!" 1>&2; rm -rf patch-new; }
+       rm -rf trunk-tmp branch-tmp
+}
+
+case $whichtoupdate in
+(identical-branch)
+       echo "Creating identical branch..."
+       # No link-dest because we will link-dest when copying out
+       "${COPYIN[@]}" trunk/ branch-new/
+       echo "Creating empty patch..."
+       do_diff branch-new branch-new patch-new || \
+               { error=1; echo "Failed to create empty patch!" 1>&2; rm -rf patch-new; }
+       ;;
+(branch)
+       prepare_branch
+       ;;
+(patch)
+       prepare_patch
+       ;;
+(conflict)
+       error=1
+       cat <<EOF 1>&2
+CONFLICT: both branch and patch changed!
+Run patchsync <staging> {branch | patch} to
+update the specified thing from the others.
+I'll leave updated copies of both branch
+and patch in the staging directory to help
+you decide which way you want to update.
+EOF
+       prepare_branch
+       prepare_patch
+       ;;
+(*)
+       echo "Internal error, whichtoupdate should not be $whichtoupdate!" 1>&2
+       exit 1
+       ;;
+esac
+
+if ! [ $error ] && ! [ $dryrun ]; then
+       # Disable locking for now...
+       # ! [ -e lock ] || { echo "Staging dir is locked!  Delete the file \`lock' if the other instance of patchsync is gone." 1>&2; exit 1; }
+       # echo "patchsync lock file pid $$ date $(date)" >lock
         
         echo "Copying out..."
-       "${CP2out[@]}" -i branch/ "$branch/"
-       cp2 -i --checksum patch "$patch"
-       echo "Done."
-       echo "Noting..."
-       for i in trunk branch patch; do
-               rm -rf $i-save
-               mv $i{,-save}
+       ! [ -e branch-new ] || {
+               hash_dir branch-new/ >branch-new-hash
+               "${COPYOUT[@]}" -i --link-dest="$(wdpp_from branch/)branch-new/" branch-new/ branch/
+               rm -rf branch-new
+       }
+       ! [ -e patch-new ] || cmp -s patch-work patch || {
+               hash_file patch-new >patch-new-hash
+               # Don't use rsync because we might have to write through a symlink.
+               echo "> patch"
+               cp --preserve=timestamps patch-new patch
+               rm -f patch-new
+       }
+       
+       echo "Remembering synchronized state for next time..."
+       for i in trunk patch branch; do
+               mv $i-new-hash $i-save-hash
         done
-       echo "Done."
         
-       rm lock
+       # rm lock
  else
-       echo "Dry run; no action.  You can inspect the results if you want."
-       echo "Fake copying out..."
-       "${CP2out[@]}" -in branch/ "$branch/"
-       cp2 -in --checksum patch "$patch"
-       echo "Done."
+       echo "Would copy out as follows:"
+       ! [ -e branch-new ] || {
+               hash_dir branch-new/ >branch-new-hash
+               "${COPYOUT[@]}" -n -i --link-dest="$(wdpp_from branch/)branch-new/" branch-new/ branch/
+               #rm -rf branch-new
+       }
+       ! [ -e patch-new ] || cmp -s patch-work patch || {
+               hash_file patch-new >patch-new-hash
+               # Don't use rsync because we might have to write through a symlink.
+               echo "> patch"
+               #cp --preserve=timestamps patch-new patch
+               #rm -f patch-new
+       }
+       echo "Would remember synchronized state for next time."
+       echo "I'm leaving \"new\" files in the staging dir so you can inspect them."
+fi
+
+else # whichtoupdate
+       # Easy case
+       echo "Nothing changed."
+       rm -f {trunk,patch,branch}-new-hash
  fi
+
+if [ $error ]; then
+       echo "Synchronization failed." 1>&2
+       exit 1
+else
+       echo "Synchronization finished."
+       if [ -e identical-branch-flag ]; then
+               if ! [ $dryrun ]; then
+                       rm identical-branch-flag
+                       echo "Removed identical-branch-flag."
+               else
+                       echo "Would remove identical-branch-flag."
+               fi
+       fi
+       # Yay!  Done patchsync_sync!
  fi
+}
+
+function patchsync_new {
+       if [ $# != 4 ]; then
+               echo "Expected 4 arguments after --new, got $#." 1>&2
+               echo "usage: patchsync --new <trunk> <patch> <branch> <staging>" 1>&2
+               exit 1
+       fi
+       
+       # Set up arguments.  Open templates because we will change directories.
+       trunk="$1"
+       patch="$2"
+       branch="$3"
+       staging="$4"
+       
+       # What exists?  Whichtochange first?
+       ! [ -e "$staging" ] || { echo "Staging dir already exists!" 1>&2; exit 1; }
+       [ -d "$trunk" ] || { echo "Trunk does not exist!" 1>&2; exit 1; }
+       
+       # Create staging dir.
+       mkdir "$staging"
+       wdpp="$(wdpp_from "$staging")"
+       cd "$staging"
+       echo "Created staging dir at $staging."
+       
+       # Adjust paths appropriately.
+       trunk="$wdpp$trunk"
+       patch="$wdpp$patch"
+       branch="$wdpp$branch"
+       
+       # Create links to areas
+       ln -s "$trunk" trunk
+       ln -s "$patch" patch
+       ln -s "$branch" branch
+       echo "Created links to areas."
+       
+       # This approach is better than setting whichtochange because we'll notice
+       # if the user puts something into one of the areas we created before first
+       # sync.
+       function create_patch {
+               touch "$patch"
+               hash_file patch >patch-save-hash
+               echo "Created empty patch."
+       }
+       function create_branch {
+               mkdir "$branch"
+               # Can't do hash_dir because ${COPYIN[@]} hasn't been set <== no filters
+               hash_file /dev/null >branch-save-hash
+               echo "Created empty branch."
+       }
+       
+       if [ -e "$patch" ] && ! [ -e "$branch" ]; then
+               create_branch
+               echo "Patch exists; branch will be calculated when you first synchronize."
+       elif [ -e "$branch" ] && ! [ -e "$patch" ]; then
+               create_patch
+               echo "Branch exists; patch will be calculated when you first synchronize."
+       elif ! [ -e "$patch" ] && ! [ -e "$branch" ]; then
+               create_patch
+               create_branch
+               echo "Neither branch nor patch exists;"
+               echo "a branch identical to the trunk will be created when you first synchronize."
+               echo flag >identical-branch-flag
+               echo "Created identical-branch-flag to tell first run of patchsync about this."
+       else
+               echo "Both patch and branch exist."
+               echo "You will need to specify whether to overwrite the"
+               echo "patch or the branch when you first synchronize!"
+       fi
+       
+       # Write settings file.
+       cat >settings <<END
+# Define do_diff and do_patch here!
+END
+       echo "Wrote settings file placeholder."
+       
+       echo ""
+       echo "Patchsync initialized."
+       echo "Now add your definitions of do_diff and do_patch to the settings file,"
+       echo "add a filter file if you wish, and perform the first sync."
+}
+
+function patchsync_help {
+       cat <<EOF
+Patchsync version 2 by Matt McCutchen
+usage: patchsync [--dry-run] <staging> [branch | patch]
+       patchsync --new <trunk> <patch> <branch> <staging>
+Please read the top of the script for complete documentation.
+EOF
+}
  
-exit 0
+case "$1" in
+(--help|--version)
+       patchsync_help ;;
+(--dry-run)
+       patchsync_sync "$@" ;;
+(--new)
+       shift
+       patchsync_new "$@" ;;
+(''|--*)
+       patchsync_help 1>&2
+       exit 1 ;;
+(*)
+       patchsync_sync "$@" ;;
+esac