X-Git-Url: https://mattmccutchen.net/utils/utils.git/blobdiff_plain/35d3e321c44f7044f53a95d8cf7615c8bcf3e5e9..e05b3ea6ad0a1f63d4dd38e88a2d3382772d6164:/web-logs/Makefile diff --git a/web-logs/Makefile b/web-logs/Makefile index 563ebd6..9db4475 100644 --- a/web-logs/Makefile +++ b/web-logs/Makefile @@ -12,8 +12,8 @@ include config.mk # braces, like $G, always takes a single character as the variable name. G := generated -# List the most important targets. I think they pull in most of the rest. -default: $G/logs-by-ip $G/requests +# I think these targets pull in the rest. ~ 2020-09-17 +default: $G/logs-by-ip $G/requests $G/uas $G/uas-organic clean: rm -f $G/* @@ -43,12 +43,14 @@ log_line_regex := ^([^ ]+)( [^"]*)"(([^"\\]|\\.)*)" ([0-9]+) ([^"]*)"(([^"\\]|\\ # \7: referrer (ditto \3) # \9: user agent (ditto \3) -# Take the current and previous day's logs. If I look once a day (DreamHost time), -# this should catch everything with some overlap. TODO: Avoid the overlap? -# Requires sth more sophisticated to track what I've already seen. +# Take the previous day's logs. If I look once a day (DreamHost time), this +# should catch everything with no overlap. I previously included the current +# day's logs, but I now think the difficulty of maintaining an accurate sense of +# usage in the presence of overlap is a greater evil than up to a day of extra +# latency. I might find a better solution in the future. # # Filename pattern for recent-logs is locked down in pull-logs, so we shouldn't have shell injection here. -recent_logs := $(shell ls -t recent-logs/access.log* | head -n 2 | tac) +recent_logs := $(shell ls -t recent-logs/access.log* | head -n 2 | tail -n 1 | tac) $G/logs: $(recent_logs) cat $^ >$@ @@ -95,11 +97,11 @@ uas_command = <$< sed -re 's,$(log_line_regex),\9,; $(s_unescape_field)' | $(cou # the request level. If I want to measure actual server load from bots, I could # come a little closer by looking at response size and whether CGIs are invoked. $G/uas: $G/logs-not-me - $(uas-command) + $(uas_command) # Ditto above but for non-bots or identifying new bots. $G/uas-organic: $G/logs-organic - $(uas-command) + $(uas_command) # We don't care about the order of IPs here. $G/logs-by-ip: $G/logs-organic