From 0eb79fa72e01f51ab8caa28133aa40c2e2593c9a Mon Sep 17 00:00:00 2001 From: Matt McCutchen Date: Thu, 10 Nov 2016 13:59:34 -0500 Subject: [PATCH] Initial commit of "git subtree-lite". --- LICENSE | 19 ++++ README.md | 7 ++ git-subtree-lite | 220 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 246 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100755 git-subtree-lite diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..68d4673 --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (C) 2016 Matt McCutchen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..8534fc6 --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# `git subtree-lite` + +`git subtree-lite` is a tool to manage modified versions of content imported from other git repositories. It's easier than git submodules and very similar to `git subtree --squash` but avoids some of the bugs. See the documentation at the top of the script for more information. + +For now, please email any bug reports, enhancement requests, or proposed patches to me. I'll consider setting up public issue tracking if and when the demand arises. + +~ Matt McCutchen diff --git a/git-subtree-lite b/git-subtree-lite new file mode 100755 index 0000000..616d533 --- /dev/null +++ b/git-subtree-lite @@ -0,0 +1,220 @@ +#!/bin/bash + +# "set -e" isn't active inside command substitutions unless we use POSIX mode +# (which I don't want to do right now) or inherit_errexit (which would introduce +# a dependency on bash >= 4.4). So implement our own version for now. +# ~ Matt 2016-10-28 +set -o errtrace +trap 'exit $?' ERR + +USAGE="add + or: git subtree-lite update + or: git subtree-lite diff [...] + or: git subtree-lite import []" + +read -r -d '' LONG_USAGE <<'LONG_USAGE' || true +"git subtree-lite" imports the content of another repository (called "upstream") +as of a particular commit into a subtree of your repository. It remembers the +commit ID that was imported in an ".upstream" file in the subtree, so if you +modify the subtree and then import a different upstream commit, the changes will +be merged. You can use "git subtree-lite" to bundle libraries, as an +alternative to "git submodule" or "git subtree". + +Semantically, a subtree managed with this tool is equivalent to a submodule +pointer with a layer of project-specific modifications. Like "git subtree" and +unlike a git submodule, the main project's version of the the content is in the +main tree, so no special handling is needed to read or write it. But like a git +submodule and unlike "git subtree", the upstream commit pointer is just data +that can be merged and reverted, and this tool doesn't clutter the main project +history with extra merge commits. The upstream commit pointer doesn't enjoy any +of the special tool support of submodules, but most of it isn't relevant with +the content in the main tree (an exception might be +{fetch,push}.recurseSubmodules for developers who update or diff the subtree). + +Operations that access the upstream repository require that you have a local +copy of it and set the "subtree-lite..repo" configuration option +to its path. The path may be absolute or relative to the .git directory of the +current repository (the common directory if you use "git worktree"). This +mechanism is subject to change and may be made more sophisticated and automated +(like submodules) in the future. + +Further background: +http://marc.info/?l=git&m=147752326122139&w=2 + +git subtree-lite add + Add a subtree from the given upstream commit. Follow the directions to + configure the path to the upstream repository. + +git subtree-lite update + Update the subtree to be based on the given upstream commit. + +(To remove a subtree, just use "git rm -r".) + +git subtree-lite diff [...] + Diff the subtree against the original upstream content. Diff options are + accepted, but paths to limit the diff currently are not supported. + +git subtree-lite import [] + Low-level command: generate the "imported commit" corresponding to the given + upstream commit (with the content moved to the subtree and the .upstream + file added) and print its ID. Only the tree of this commit is meaningful. +LONG_USAGE + +OPTIONS_SPEC= +# Let's simplify matters for now and not allow running in a subdirectory. +. "$(git --exec-path)/git-sh-setup" +require_work_tree + +function cleanup { + rm -rf "$tmpdir" +} + +tmpdir="$(mktemp --tmpdir -d git-subtree-lite.XXXXXXXXXX)" +trap "cleanup" EXIT + +function ensure_init_tmp_repo { + if [ -z "$tmp_repo" ]; then + tmp_repo="$tmpdir/repo" + git init --quiet --bare "$tmp_repo" + (cd "$subtree_repo" && readlink --canonicalize "$(git rev-parse --git-path objects)") >"$tmp_repo/objects/info/alternates" + fi +} + +function setup_subtree { + # XXX Introduce a name like submodules have? Either abuse .gitmodules and + # call "git submodule--helper name", or reimplement the lookup? + local opt_name="subtree-lite.$subtree_path.repo" + local common_dir="$(git rev-parse --git-common-dir)" + subtree_repo="$(git config "$opt_name")" || die "Please get a local copy of the upstream repository and run: + + git config $opt_name + +before using this tool. The path may be absolute or relative to +$(readlink --canonicalize "$common_dir") ." + subtree_repo="$(cd "$common_dir" && readlink --canonicalize "$subtree_repo")" + + upstream_file="$subtree_path/.upstream" + if [ -f "$upstream_file" ]; then + cur_upstream_commit="$(< "$upstream_file")" + else + cur_upstream_commit="" + fi +} + +function reproducible_commit_tree { + GIT_AUTHOR_NAME='git-subtree-lite' \ + GIT_AUTHOR_EMAIL='git-subtree-lite@invalid' \ + GIT_AUTHOR_DATE='@0 +0000' \ + GIT_COMMITTER_NAME='git-subtree-lite' \ + GIT_COMMITTER_EMAIL='git-subtree-lite@invalid' \ + GIT_COMMITTER_DATE='@0 +0000' \ + git commit-tree "$@" +} + +function canonicalize_upstream_commit { + (cd "$subtree_repo" && git rev-parse --verify "$1^{commit}") +} + +# TODO: We should cache this, but even if we don't stop git from GC-ing the +# underlying commits, how to stop the mapping from growing indefinitely? +function import_commit { + commit="$1" + ensure_init_tmp_repo + ( + cd "$tmp_repo" + rm -f index + git read-tree -i --prefix="$subtree_path/" "$commit" + # Hm, I suppose this would be a great place to add a way to exclude files + # the superproject doesn't care about. But not now. + + # Create the .upstream file. If we do it here, then the right thing ends up + # happening during both add/update and diff without any more code. + uf_blob="$(echo "$commit" | git hash-object -t blob -w --stdin)" + git update-index --add --cacheinfo "100644,$uf_blob,$subtree_path/.upstream" + t="$(git write-tree)" + # Reuse the same object for the same upstream commit until the repository is + # GC-ed. + c="$(reproducible_commit_tree -m "git-subtree-lite temporary commit" "$t")" + git update-ref HEAD "$c" + ) + # XXX: The incremental fetch protocol is only based on detection of common + # commits, so unless we already have the exact same imported commit, this + # fetch will send the entire tree. If we cache previous imported commits in + # the main repository, then we can add it as an alternate of the temporary + # repository (!) and the previous imported commits will be detected as common. + git fetch --quiet "$tmp_repo" HEAD 2>&1 | { grep -v --line-regexp 'warning: no common commits' >&2 || true; } + git rev-parse FETCH_HEAD # to stdout +} + +function cmd_add { + [ $# == 2 ] || usage + subtree_path="$1" + new_upstream_commit_expr="$2" + + setup_subtree + ! [ -e "$subtree_path" ] || die "Error: $subtree_path already exists on the filesystem." + ! [ -n "$(git ls-files "$subtree_path")" ] || die "Error: $subtree_path already exists in the git index." + + new_upstream_commit="$(canonicalize_upstream_commit "$new_upstream_commit_expr")" + new_imported_commit="$(import_commit "$new_upstream_commit")" + git read-tree --prefix= -u "$new_imported_commit" +} + +function cmd_update { + [ $# == 2 ] || usage + subtree_path="$1" + new_upstream_commit_expr="$2" + + setup_subtree + [ -n "$cur_upstream_commit" ] || die "Error: $subtree_path is not a subtree set up with this tool." + + new_upstream_commit="$(canonicalize_upstream_commit "$new_upstream_commit_expr")" + new_imported_commit="$(import_commit "$new_upstream_commit")" + old_imported_commit="$(import_commit "$cur_upstream_commit")" + tree2="$(git write-tree)" + # Note: the .upstream file is already in the imported commits. + git read-tree -mu "$old_imported_commit" "$tree2" "$new_imported_commit" + git merge-index -o git-merge-one-file -a +} + +function cmd_import { + [ $# -ge 1 ] && [ $# -le 2 ] || usage + subtree_path="$1" + setup_subtree + if [ -n "$2" ]; then + new_upstream_commit="$(canonicalize_upstream_commit "$2")" + elif [ -n "$cur_upstream_commit" ]; then + new_upstream_commit="$cur_upstream_commit" + else + die "Error: $subtree_path is not added yet and no upstream commit was given." + fi + import_commit "$new_upstream_commit" +} + +function cmd_diff { + [ $# -ge 1 ] || usage + subtree_path="${@: -1:1}" + setup_subtree + # This could be expensive, but it's the only way to honor uncommitted changes + # (and --cached). Even if we added the subtree repo to + # GIT_ALTERNATE_OBJECT_DIRECTORIES, git has no way to diff a subtree of the + # worktree against the root of a given commit. + cur_imported_commit="$(import_commit "$cur_upstream_commit")" + git diff "${@:1:$#-1}" "$cur_imported_commit" -- "$subtree_path" +} + +[ $# -ge 1 ] || usage +cmd="$1" +shift +case "$cmd" in +(add) + cmd_add "$@";; +(update) + cmd_update "$@";; +(import) + cmd_import "$@";; +(diff) + cmd_diff "$@";; +(*) die "Unknown command $cmd.";; +esac + -- 2.34.1