# braces, like $G, always takes a single character as the variable name.
G := generated
-# List the most important targets. I think they pull in most of the rest.
-default: $G/logs-by-ip $G/requests
+# I think these targets pull in the rest. ~ 2020-09-17
+default: $G/logs-by-ip $G/requests $G/uas $G/uas-organic
clean:
rm -f $G/*
# \7: referrer (ditto \3)
# \9: user agent (ditto \3)
-# Take the current and previous day's logs. If I look once a day (DreamHost time),
-# this should catch everything with some overlap. TODO: Avoid the overlap?
-# Requires sth more sophisticated to track what I've already seen.
+# Take the previous day's logs. If I look once a day (DreamHost time), this
+# should catch everything with no overlap. I previously included the current
+# day's logs, but I now think the difficulty of maintaining an accurate sense of
+# usage in the presence of overlap is a greater evil than up to a day of extra
+# latency. I might find a better solution in the future.
#
# Filename pattern for recent-logs is locked down in pull-logs, so we shouldn't have shell injection here.
-recent_logs := $(shell ls -t recent-logs/access.log* | head -n 2 | tac)
+recent_logs := $(shell ls -t recent-logs/access.log* | head -n 2 | tail -n 1 | tac)
$G/logs: $(recent_logs)
cat $^ >$@
# the request level. If I want to measure actual server load from bots, I could
# come a little closer by looking at response size and whether CGIs are invoked.
$G/uas: $G/logs-not-me
- $(uas-command)
+ $(uas_command)
# Ditto above but for non-bots or identifying new bots.
$G/uas-organic: $G/logs-organic
- $(uas-command)
+ $(uas_command)
# We don't care about the order of IPs here.
$G/logs-by-ip: $G/logs-organic