Initial commit of "git subtree-lite".
[utils/git-subtree-lite.git] / git-subtree-lite
CommitLineData
0eb79fa7
MM
1#!/bin/bash
2
3# "set -e" isn't active inside command substitutions unless we use POSIX mode
4# (which I don't want to do right now) or inherit_errexit (which would introduce
5# a dependency on bash >= 4.4). So implement our own version for now.
6# ~ Matt 2016-10-28
7set -o errtrace
8trap 'exit $?' ERR
9
10USAGE="add <subtree_path> <upstream_commit>
11 or: git subtree-lite update <subtree_path> <upstream_commit>
12 or: git subtree-lite diff [<diff_option>...] <subtree_path>
13 or: git subtree-lite import <subtree_path> [<upstream_commit>]"
14
15read -r -d '' LONG_USAGE <<'LONG_USAGE' || true
16"git subtree-lite" imports the content of another repository (called "upstream")
17as of a particular commit into a subtree of your repository. It remembers the
18commit ID that was imported in an ".upstream" file in the subtree, so if you
19modify the subtree and then import a different upstream commit, the changes will
20be merged. You can use "git subtree-lite" to bundle libraries, as an
21alternative to "git submodule" or "git subtree".
22
23Semantically, a subtree managed with this tool is equivalent to a submodule
24pointer with a layer of project-specific modifications. Like "git subtree" and
25unlike a git submodule, the main project's version of the the content is in the
26main tree, so no special handling is needed to read or write it. But like a git
27submodule and unlike "git subtree", the upstream commit pointer is just data
28that can be merged and reverted, and this tool doesn't clutter the main project
29history with extra merge commits. The upstream commit pointer doesn't enjoy any
30of the special tool support of submodules, but most of it isn't relevant with
31the content in the main tree (an exception might be
32{fetch,push}.recurseSubmodules for developers who update or diff the subtree).
33
34Operations that access the upstream repository require that you have a local
35copy of it and set the "subtree-lite.<subtree_path>.repo" configuration option
36to its path. The path may be absolute or relative to the .git directory of the
37current repository (the common directory if you use "git worktree"). This
38mechanism is subject to change and may be made more sophisticated and automated
39(like submodules) in the future.
40
41Further background:
42http://marc.info/?l=git&m=147752326122139&w=2
43
44git subtree-lite add <subtree_path> <upstream_commit>
45 Add a subtree from the given upstream commit. Follow the directions to
46 configure the path to the upstream repository.
47
48git subtree-lite update <subtree_path> <upstream_commit>
49 Update the subtree to be based on the given upstream commit.
50
51(To remove a subtree, just use "git rm -r".)
52
53git subtree-lite diff [<diff_option>...] <subtree_path>
54 Diff the subtree against the original upstream content. Diff options are
55 accepted, but paths to limit the diff currently are not supported.
56
57git subtree-lite import <subtree_path> [<upstream_commit>]
58 Low-level command: generate the "imported commit" corresponding to the given
59 upstream commit (with the content moved to the subtree and the .upstream
60 file added) and print its ID. Only the tree of this commit is meaningful.
61LONG_USAGE
62
63OPTIONS_SPEC=
64# Let's simplify matters for now and not allow running in a subdirectory.
65. "$(git --exec-path)/git-sh-setup"
66require_work_tree
67
68function cleanup {
69 rm -rf "$tmpdir"
70}
71
72tmpdir="$(mktemp --tmpdir -d git-subtree-lite.XXXXXXXXXX)"
73trap "cleanup" EXIT
74
75function ensure_init_tmp_repo {
76 if [ -z "$tmp_repo" ]; then
77 tmp_repo="$tmpdir/repo"
78 git init --quiet --bare "$tmp_repo"
79 (cd "$subtree_repo" && readlink --canonicalize "$(git rev-parse --git-path objects)") >"$tmp_repo/objects/info/alternates"
80 fi
81}
82
83function setup_subtree {
84 # XXX Introduce a name like submodules have? Either abuse .gitmodules and
85 # call "git submodule--helper name", or reimplement the lookup?
86 local opt_name="subtree-lite.$subtree_path.repo"
87 local common_dir="$(git rev-parse --git-common-dir)"
88 subtree_repo="$(git config "$opt_name")" || die "Please get a local copy of the upstream repository and run:
89
90 git config $opt_name <path_to_upstream_repository>
91
92before using this tool. The path may be absolute or relative to
93$(readlink --canonicalize "$common_dir") ."
94 subtree_repo="$(cd "$common_dir" && readlink --canonicalize "$subtree_repo")"
95
96 upstream_file="$subtree_path/.upstream"
97 if [ -f "$upstream_file" ]; then
98 cur_upstream_commit="$(< "$upstream_file")"
99 else
100 cur_upstream_commit=""
101 fi
102}
103
104function reproducible_commit_tree {
105 GIT_AUTHOR_NAME='git-subtree-lite' \
106 GIT_AUTHOR_EMAIL='git-subtree-lite@invalid' \
107 GIT_AUTHOR_DATE='@0 +0000' \
108 GIT_COMMITTER_NAME='git-subtree-lite' \
109 GIT_COMMITTER_EMAIL='git-subtree-lite@invalid' \
110 GIT_COMMITTER_DATE='@0 +0000' \
111 git commit-tree "$@"
112}
113
114function canonicalize_upstream_commit {
115 (cd "$subtree_repo" && git rev-parse --verify "$1^{commit}")
116}
117
118# TODO: We should cache this, but even if we don't stop git from GC-ing the
119# underlying commits, how to stop the mapping from growing indefinitely?
120function import_commit {
121 commit="$1"
122 ensure_init_tmp_repo
123 (
124 cd "$tmp_repo"
125 rm -f index
126 git read-tree -i --prefix="$subtree_path/" "$commit"
127 # Hm, I suppose this would be a great place to add a way to exclude files
128 # the superproject doesn't care about. But not now.
129
130 # Create the .upstream file. If we do it here, then the right thing ends up
131 # happening during both add/update and diff without any more code.
132 uf_blob="$(echo "$commit" | git hash-object -t blob -w --stdin)"
133 git update-index --add --cacheinfo "100644,$uf_blob,$subtree_path/.upstream"
134 t="$(git write-tree)"
135 # Reuse the same object for the same upstream commit until the repository is
136 # GC-ed.
137 c="$(reproducible_commit_tree -m "git-subtree-lite temporary commit" "$t")"
138 git update-ref HEAD "$c"
139 )
140 # XXX: The incremental fetch protocol is only based on detection of common
141 # commits, so unless we already have the exact same imported commit, this
142 # fetch will send the entire tree. If we cache previous imported commits in
143 # the main repository, then we can add it as an alternate of the temporary
144 # repository (!) and the previous imported commits will be detected as common.
145 git fetch --quiet "$tmp_repo" HEAD 2>&1 | { grep -v --line-regexp 'warning: no common commits' >&2 || true; }
146 git rev-parse FETCH_HEAD # to stdout
147}
148
149function cmd_add {
150 [ $# == 2 ] || usage
151 subtree_path="$1"
152 new_upstream_commit_expr="$2"
153
154 setup_subtree
155 ! [ -e "$subtree_path" ] || die "Error: $subtree_path already exists on the filesystem."
156 ! [ -n "$(git ls-files "$subtree_path")" ] || die "Error: $subtree_path already exists in the git index."
157
158 new_upstream_commit="$(canonicalize_upstream_commit "$new_upstream_commit_expr")"
159 new_imported_commit="$(import_commit "$new_upstream_commit")"
160 git read-tree --prefix= -u "$new_imported_commit"
161}
162
163function cmd_update {
164 [ $# == 2 ] || usage
165 subtree_path="$1"
166 new_upstream_commit_expr="$2"
167
168 setup_subtree
169 [ -n "$cur_upstream_commit" ] || die "Error: $subtree_path is not a subtree set up with this tool."
170
171 new_upstream_commit="$(canonicalize_upstream_commit "$new_upstream_commit_expr")"
172 new_imported_commit="$(import_commit "$new_upstream_commit")"
173 old_imported_commit="$(import_commit "$cur_upstream_commit")"
174 tree2="$(git write-tree)"
175 # Note: the .upstream file is already in the imported commits.
176 git read-tree -mu "$old_imported_commit" "$tree2" "$new_imported_commit"
177 git merge-index -o git-merge-one-file -a
178}
179
180function cmd_import {
181 [ $# -ge 1 ] && [ $# -le 2 ] || usage
182 subtree_path="$1"
183 setup_subtree
184 if [ -n "$2" ]; then
185 new_upstream_commit="$(canonicalize_upstream_commit "$2")"
186 elif [ -n "$cur_upstream_commit" ]; then
187 new_upstream_commit="$cur_upstream_commit"
188 else
189 die "Error: $subtree_path is not added yet and no upstream commit was given."
190 fi
191 import_commit "$new_upstream_commit"
192}
193
194function cmd_diff {
195 [ $# -ge 1 ] || usage
196 subtree_path="${@: -1:1}"
197 setup_subtree
198 # This could be expensive, but it's the only way to honor uncommitted changes
199 # (and --cached). Even if we added the subtree repo to
200 # GIT_ALTERNATE_OBJECT_DIRECTORIES, git has no way to diff a subtree of the
201 # worktree against the root of a given commit.
202 cur_imported_commit="$(import_commit "$cur_upstream_commit")"
203 git diff "${@:1:$#-1}" "$cur_imported_commit" -- "$subtree_path"
204}
205
206[ $# -ge 1 ] || usage
207cmd="$1"
208shift
209case "$cmd" in
210(add)
211 cmd_add "$@";;
212(update)
213 cmd_update "$@";;
214(import)
215 cmd_import "$@";;
216(diff)
217 cmd_diff "$@";;
218(*) die "Unknown command $cmd.";;
219esac
220