#!/usr/bin/ksh # gbo_mcmac_rmd_mirror.ksh - Using rsync, mirror the # Ground Magnetometer (GMAG) RMD files that McMAC # collects from the GBO sites. # Heavily based on gbo_uc_rmd_mirror.ksh # # Usage: gbo_mcmac_rmd_mirror.ksh [year] [month] # # Arguments: # $1 = year # $2 = month # # # Creation Date: # # 29 Jan 2013 CLRussell # # # ## Set the environment variables # if [[ -z $THMSOC ]] then THMSOC=/disks/socware/thmsoc_dp_current export THMSOC fi . ${THMSOC}/src/config/soc_it_to_me.config . ${THMSOC}/src/config/gbo_mcmac_rmd_mirror.config # Take input arguments and build mirror directories # year=$1 month=$2 for line in $(cat /home/thmsoc/clrussells_stuff/mcmac/lres_site_list_current.txt) do if [[ $os = "SunOS" ]] then sitefull=$(echo $line | nawk -F_ '{print ($1)}') site=$(echo $line | nawk -F_ '{print ($2)}') siteupper=$(echo $site | nawk '{print toupper($1)}') else sitefull=$(echo $line | awk -F_ '{print ($1)}') site=$(echo $line | awk -F_ '{print ($2)}') siteupper=$(echo $site | awk '{print toupper($1)}') fi echo $sitefull echo $site home_dir_path=${siteupper}/${year}/${month}/ remote_dir_path=${sitefull}/${year}/${month}/ mirror_start_dir="$MCMAC_MIRROR_HOME/${home_dir_path}" mkdir -p $mirror_start_dir cd $mirror_start_dir # echo "$$:" >> $LOGFILE echo "$$:----------- Starting gbo_mcmac_rmd_mirror at $(date) ---------" >> $LOGFILE start_time=$(date -u '+%Y-%m-%d %T') filenum=0 WGET_LOGFILE=${WLOGFILE}_pid$$_${filenum} echo "Starting wget at directory level ${home_dir_path}" >> $LOGFILE echo "Starting wget at directory level ${home_dir_path}" >> $WGET_LOGFILE echo "Wget Path Requested: ${WEBSITE_NAME}/${remote_dir_path}" >> $WGET_LOGFILE localLocation=${MCMAC_MIRROR_HOME}/${home_dir_path} remoteLocation=${WEBSITE_NAME}/${remote_dir_path} wgetTime=$(date '+%Y-%m-%d %T') # -nH and --cut-dirs=4 gets rid of the web host name and top four directory levels # ie. the part: http://spc.igpp.ucla.edu/mcmac/rmd_files # -np stops it from ascending through "directories" /usr/bin/wget -r -nH --cut-dirs=5 -N -o $WGET_LOGFILE -P ${localLocation} -A 'RMD' -np ${remoteLocation} wgetcode=$? if (( $wgetcode != 0 )) then echo "WARNING WGET PROBLEM: Return code = $wgetcode" >> $LOGFILE printf "%s\n" "script: $0" \ "Warning: MCMAC Download Problem:code $wgetcode: path ${remote_dir_path}" | mailx -s "MCMAC Download Problem" $SOC_EMAIL msg="Warning: MCMAC Download Problem:code $wgetcode: path ${remote_dir_path}" thmsoc_dblog.php $(basename $0) 1 "$msg" ### added 2012-06-25 - bsadeghi fi end_time=$(date -u '+%Y-%m-%d %T') ## Process log file created by wget run. ## Check wget results by seeing if any files were ## recovered or updated # look for all lines in the logfile that say "saved" but that don't say "index" or "robot" # this prints the filename and size to a temporary file grep saved $WGET_LOGFILE | grep -v 'index\|robot' | awk '{print $6 " " $8}' > ${TMPDIR}/wget_mcmac_rmd$$ previous=NaN while read line do path=$(echo $line |awk '{print $1}') size=$(echo $line |awk '{print $2}') fileName=$(basename $path) fileName=${fileName%*\'} schName=$(echo $fileName |awk -F_ '{print substr($1,1,4)}') if [[ $schName = $siteupper ]] then # Parse date information from filename year=$(echo $fileName | awk -F_ '{print "20"substr($1,5,2)}') month=$(echo $fileName | awk -F_ '{print substr($1,7,2)}') day=$(echo $fileName |awk -F_ '{print substr($1,9,2)}') hr=$(echo $fileName | awk -F_ '{print $3}') mn=$(echo $fileName | awk -F_ '{print substr($4,1,2)}') dataTime="${year}-${month}-${day} ${hr}:${mn}:00" echo $year echo $month echo $day # Verify file exists if [[ ! -a ${MCMAC_MIRROR_HOME}/${siteupper}/${year}/${month}/${day}/${fileName} ]] then echo "$$:gbo_mcmac_rmd_mirror_log:" >> $LOGFILE echo "$$:gbo_mcmac_rmd_mirror_log: ${MCMAC_MIRROR_HOME}/${siteupper}/${year}/${month}/${day}/${fileName} not found. " >> $LOGFILE echo "$$:gbo_mcmac_rmd_mirror_log: continuing..................." >> $LOGFILE printf "%s\n" "script: $0" \ "Warning: ${MCMAC_MIRROR_HOME}/${siteupper}/${year}/${month}/${day}/${fileName} not found." \ "Check $LOGFILE" | mailx -s "GBO MCMAC Download Discrepancy" $SOC_EMAIL msg="Warning: ${MCMAC_MIRROR_HOME}/${siteupper}/${year}/${month}/${day}/${fileName} not found." thmsoc_dblog.php $(basename $0) 1 "$msg" ### added 2012-06-25 - bsadeghi fi fileSize=${size%\]} fileSize=${fileSize#\[} fileSize=$(echo $fileSize | awk -F/ '{print $1}') #for some reason size format seems to be in form [size/size] unlike for MACCS #here we have removed the brackets and only taken the first value # Update database with stats. # there is no relevant process code for wget (there was for rsync), but it is necessary to pass something to the database processCode='--------' processTime=$wgetTime schName=$(echo $fileName |awk -F_ '{print substr($1,1,4)}') mysql_table_name=mcm_${schName}_rmdfiles echo "$$: Updating MySQL database table $mysql_table_name......." >> $LOGFILE echo "$$: gbo_mcmac_rmd_mirror.php $mysql_table_name $dataTime $fileName \ $processTime $processCode $fileSize" >> $LOGFILE gbo_mcmac_rmd_mirror.php $mysql_table_name $dataTime $fileName $processTime $processCode $fileSize echo "$$: ...........update complete. " >> $LOGFILE # Create a file used for CDF processing # only want one listing for each SITEYYMMDD not one for each hour sub_filename=$(echo $fileName |awk '{print substr($1,1,10)}') if [[ $sub_filename != $previous ]] then echo $sub_filename >> $GMAGMIRROR/MCM_RMDdirs$$ previous=${sub_filename} fi else #if school name didn't match file name then it's not a valid file #so remove it echo "Removing file" echo $path rm -rf ${path} fi done < ${TMPDIR}/wget_mcmac_rmd$$ # ## Cleanup # rm -f ${TMPDIR}/wget_mcmac_rmd$$ #echo "$$:" >> $LOGFILE #echo "$$:----------- Ending gbo_mcm_rmd_mirror_wget_test at $(date) ---------" >> $LOGFILE done exit 0