#!/bin/sh

BASEDIR="/disks/data/themis/weblogs"
LOGDIR="zip_archived_misc1"
OUT="results_$LOGDIR.csv"
tmp="/tmp/apache_hits_by_date.$$"

: > "$tmp"

nfiles=0

for f in "$BASEDIR/$LOGDIR"/*.gz
do
    [ -f "$f" ] || continue
    nfiles=`expr $nfiles + 1`

    gzip -cd "$f" | awk '
    {
        if (match($0, /\[[0-9][0-9]\/[A-Za-z][A-Za-z][A-Za-z]\/[0-9][0-9][0-9][0-9]:/)) {
            print substr($0, RSTART + 1, 11)
        }
    }' >> "$tmp"
done

echo "date,hits" > "$OUT"

sort "$tmp" | uniq -c | awk '
{
    print $2 "," $1
}' >> "$OUT"

ndates=`wc -l < "$tmp"`

rm -f "$tmp"

echo "Processed files: $nfiles"
echo "Matched log lines: $ndates"
echo "Wrote $OUT"
