#!/usr/bin/ksh # # process_weblog4carisma.ksh - Process the THEMIS webserver logs # for CARISMA statistics. # Processes weblog files in /disks/themisdata/weblogs, created by grab_weblog.ksh # # File Locations: # original WEBLOGS at danaid: /var/log/httpd/access_log.1.gz # archived WEBLOGS: /disks/themisdata/weblogs/archived # CARISMA WEBLOGS: /disks/themisdata/weblogs/carisma # # ## Set the environment variables # if [[ -z $THMSOC ]] then THMSOC=/disks/socware/thmsoc_dp_current export THMSOC fi . ${THMSOC}/src/config/soc_it_to_me.config # ## Function Definitions # function nmonth { # Input name of month (Jan, Feb, ...) # Return number of the month nmon="00" if [[ $1 = "Dec" ]] then nmon="12" elif [[ $1 = "Nov" ]] then nmon="11" elif [[ $1 = "Oct" ]] then nmon="10" elif [[ $1 = "Sep" ]] then nmon="09" elif [[ $1 = "Aug" ]] then nmon="08" elif [[ $1 = "Jul" ]] then nmon="07" elif [[ $1 = "Jun" ]] then nmon="06" elif [[ $1 = "May" ]] then nmon="05" elif [[ $1 = "Apr" ]] then nmon="04" elif [[ $1 = "Mar" ]] then nmon="03" elif [[ $1 = "Feb" ]] then nmon="02" elif [[ $1 = "Jan" ]] then nmon="01" fi echo $nmon } function ndate { # Input a line of a weblog # Return the date in line as YYYYMMDD # If weblog file format changes, this function will have to change day1=$(echo $4| awk -F/ '{print substr($1,2)}') month1=$(echo $4| awk -F/ '{print $2}') year1=$(echo $4| awk -F/ '{print substr($3,0,4)}') month1n=$(nmonth $month1) echo ${year1}${month1n}${day1} } # ## Handle input arguments # ##NONE # ## Check for lock file # if [[ -a /var/tmp/process_weblog4carisma_lock ]] ; then echo Process in progress, quitting........ exit 0 fi echo $$ > /var/tmp/process_weblog4carisma_lock # ## Process weblogs, for CARISMA entries only # for file in $(ls -1 ${WEBLOGS}/thmweb*) do echo "Script: process_weblog4carisma.ksh. File: $file" topline=$(head -1 $file) bottomline=$(tail -1 $file) startDate=$(ndate $topline) stopDate=$(ndate $bottomline) # New filenames for carisma carismafile=carisma_accesslog_${startDate}_${stopDate} # Save all CARISMA entries in a separate file egrep -i "^.*(mag_atha|mag_fsim|mag_fsmi|mag_gill|mag_pina|mag_rank|mag_snkq|ype=gmag).*$" $file |egrep -v "128.32.18" |egrep -v " 404 " >> $carismafile cp $carismafile ${WEBLOGS}/carisma rm $carismafile echo "${carismafile} was processed and moved to ${WEBLOGS}/carisma" mv $file ${WEBLOGS}/archived echo "${WEBLOGS}/$file was processed and moved to ${WEBLOGS}/archived" echo "=====================" done # ## Cleanup and exit # rm /var/tmp/process_weblog4carisma_lock echo "End of process_weblog4carisma.ksh" exit 0