#!/usr/bin/ksh # # cdf_metadata_updater.ksh # function update_metadata { # positional argument $1 should be the site site=$1 # positional argument $2 should be the CDF filepath cdf_to_update=$2 # positional argument $3 should be the update filepath metadata_updates=$3 # for each site, grep site from diff, create new string or file site_updates="${workdir_path}/tmp_site_updatelist.txt" cat $metadata_updates | grep "${site}" > $site_updates cat $site_updates | while read updateline do echo "Update line = ${updateline} (date = $(date))" # for each line in diff, extract/awk attribute scope (variable/global): attributescope=$(echo $updateline | awk -F "|" '{print $1}' | awk -F "." '{print $2}') # extract/awk attribute: attributename=$(echo $updateline | awk -F "|" '{print $1}' | awk -F "." '{print $3}') echo "Scope = ${attributescope} (date = $(date))" echo "Attribute = ${attributename} (date = $(date))" if [[ "${attributescope}" -eq "global" ]] then # attribute has a global scope echo "Attribute is global" # extract/awk global attribute entry number: entrynum=$(echo $updateline | awk -F "|" '{print $1}' | awk -F "." '{print $4}') # if entry number greater than 0, grep the site.attributescope.attributename lines from the update file: if [[ "${entrynum}" -gt "0" ]] then # grep the site.attributescope.attributename lines from the update file: echo "Attribute has multiple entries " update_array="${workdir_path}/tmp_site_global_multiline_attr_updatelist.txt" cat "${input_metadata_filepath}" | grep "${site}.${attributescope}.${attributename}" > $update_array # for each line in grep output, combine attribute entry values using newline into single value: attribute_value="" cat $update_array | while read update_array_element do update_array_element_entrynum=$(echo $update_array_element | awk -F "|" '{print $1}' | awk -F "." '{print $4}') update_array_attribute_value=$(echo $update_array_element | awk -F "|" '{print $2}') attribute_value="${attribute_value}${update_array_attribute_value} \n" done # remove last newline characters: #attribute_value=${attribute_value:0:$((${#attribute_value}-2))} echo "Multiline attribute value:" echo "${attribute_value}" # update attribute using gattr and gattr -s: cdf_gattr "${cdf_to_update}" "${attributename}" "${attribute_value}" cdf_gattr -s "${cdf_to_update}" "${attributename}" echo "Attribute written! (date = $(date))" else # update attribute using gattr: echo "Attribute has a single entry" attribute_value=$(echo $updateline | awk -F "|" '{print $2}') cdf_gattr "${cdf_to_update}" "${attributename}" "${attribute_value}" echo "Attribute written! (date = $(date))" fi else # attribute has a variable scope # attributescope = variable name echo "Attribute has variable scope" # MAYBE? check number of entries? # update attribute using vattr: attribute_value=$(echo $updateline | awk -F "|" '{print $2}') cdf_vattr "${cdf_to_update}" "${attributescope}" "${attributename}" "${attribute_value}" echo "Attribute written! (date = $(date))" fi done rm $site_updates } if [[ -z $THMSOC ]] then THMSOC=/disks/socware/thmsoc_dp_current export THMSOC fi . $THMSOC/src/config/soc_it_to_me.config . $THMSOC/src/config/setup_themis.bourne . /home/thmsoc/jwls_stuff/tmtools_7_50/tmtools_setup.bourne # Read input options: USAGE="[+NAME?cdf_metadata_updater.ksh]" USAGE+="[+DESCRIPTION?Update GMAG mastercdf and datacdf metadata. Requires the first positional argument to be a file or filepath containing the list of updates. The updates must be separated by a newline and have the format 'site.global.attribute.entrynum|attributevalue' or 'site.variablename.attribute|attributevalue']" USAGE+="[w:workdir?Directory in which to create a temporary processing directory. Current user must have permission to modify this directory.]:[workdir_basedir:=$(pwd)]" USAGE+="[s:site_list?File containing sites to update. If not set, creates one from update file.]:[sitelist_filepath]" USAGE+=$'\n\nupdate_file ...\n\n' #set up workdir workdir_basedir=$(pwd) sitelist_filepath="" while getopts "$USAGE" opt; do case $opt in w) workdir_basedir=$OPTARG ;; s) sitelist_filepath=$OPTARG ;; \?) print "$OPTARG is not a valid switch." print "$USAGE" ;; esac done # check for update file update_filepath=$1 if [[ ! -s "${update_filepath}" ]] then echo "Update file not found or is empty! Exiting... " exit 1 fi workdir_path="${workdir_basedir}/tmp_workdir" if [[ -d "${workdir_path}/tmp_workdir" ]] then rm -r "${workdir_path}/tmp_workdir" fi mkdir -p $workdir_path # define files to contain production and updated metadata used in updating and verification steps input_metadata_filepath="${workdir_path}/mastercdf_metadata_input_updates.txt" production_metadata_filepath="${workdir_path}/mastercdf_metadata_production.txt" verification_metadata_filepath="${workdir_path}/mastercdf_metadata_output_verification.txt" # copying input file to workdir cp "${update_filepath}" "${workdir_path}/mastercdf_metadata_input_updates_unsorted.txt" echo "Removing possible windows line endings from input file (date = $(date))" dos2unix "${workdir_path}/mastercdf_metadata_input_updates_unsorted.txt" echo "Conversion to unix complete! (date = $(date))" echo "Update file found; sorting..." sort "${workdir_path}/mastercdf_metadata_input_updates_unsorted.txt" > "${input_metadata_filepath}" if [[ -z "${sitelist_filepath}" ]] then sitelist_filepath="${workdir_path}/cdf_metadata_updater_site_list.txt" cat $input_metadata_filepath | awk -F "|" '{print $1}' | awk -F "." '{print $1}' | uniq > $sitelist_filepath dos2unix "${sitelist_filepath}" fi time_script_start=$(date) echo "Starting cdf_metadata_updater.ksh at ${time_script_start}" # create directory to hold current mastercdf copies mastercdf_production_path="/disks/socware/thmsoc_dp_current/src/mastercdfs/thg" mastercdf_copy_path="${workdir_path}/mastercdf_copies" mkdir -p "${mastercdf_copy_path}" # for each site, copy production mastercdf to mastercdf copy directory while read site do cp "${mastercdf_production_path}/thg_l2_mag_${site}_00000000_v01.cdf" "${mastercdf_copy_path}/thg_l2_mag_${site}_00000000_v01.cdf" done<$sitelist_filepath # run record_metadata_all.pro on mastercdf copy directory to create metadata txt file for current mastercdfs: # create batchfile batch_script="${workdir_path}/cdf_metadata_updater_idl_batch.bm" echo "record_metadata_all, \"${workdir_path}/mastercdf_metadata_production_unsorted.txt\", \"${sitelist_filepath}\", \"${mastercdf_copy_path}\"" > $batch_script echo "exit" >> $batch_script echo "Running batch script at $(date)" idl $batch_script echo "Batch script completed at $(date)" echo "Removing possible windows line endings from production metdata file (date = $(date))" dos2unix "${workdir_path}/mastercdf_metadata_production_unsorted.txt" echo "Conversion to unix complete! (date = $(date))" echo "Sorting production metadata... " sort "${workdir_path}/mastercdf_metadata_production_unsorted.txt" > "${production_metadata_filepath}" # compute diff on current metadata txt file and update metadata txt file, grep the right diff, extract lines # these lines will be the updates # save diff to string or file (something where each line could get read) metadata_updates="${workdir_path}/update_shortlist.txt" echo "Determining new updates... $(date)" diff "${production_metadata_filepath}" "${input_metadata_filepath}" | grep ">" | awk -F "> " '{print $2}' > $metadata_updates #echo "Diff results for right file: " #diff "${workdir_path}/mastercdf_metadata_production.txt" "${update_filepath}" | grep ">" | awk -F "> " '{print $2}' #exit 0 if [[ -s "${metadata_updates}" ]] then echo "Update shortlist successfully created at $(date)" echo "Updates: " cat $metadata_updates else echo "ERROR: update shortlist couldn't be created; exiting at $(date)" exit 1 fi # update mastercdfs: while read site do echo "Site = ${site} (date = $(date))" cdf_to_update="${mastercdf_copy_path}/thg_l2_mag_${site}_00000000_v01.cdf" update_metadata "${site}" "${cdf_to_update}" "${metadata_updates}" done<$sitelist_filepath echo "Finished writing attributes! (date = $(date))" # verify update worked correctly: # first, run idl script, but this time on the updated mastercdfs: echo "record_metadata_all, \"${workdir_path}/updated_mastercdf_metadata_unsorted.txt\", \"${sitelist_filepath}\", \"${mastercdf_copy_path}\"" > $batch_script echo "exit" >> $batch_script echo "Creating verification file... (date = $(date))" idl $batch_script echo "Verification file created! (date = $(date))" # second, diff the input update file with the updated metadata file # if empty, proceed; if not, throw error # ensuring windows line endings are removed: echo "Removing possible windows line endings from verification file (date = $(date))" dos2unix "${workdir_path}/updated_mastercdf_metadata_unsorted.txt" echo "Conversion to unix complete! (date = $(date))" echo "Sorting updated metadata... " sort "${workdir_path}/updated_mastercdf_metadata_unsorted.txt" > "${verification_metadata_filepath}" # meta updates relocate order of entries; try to suppress common lines: update_errors=$(diff "${input_metadata_filepath}" "${verification_metadata_filepath}") if [[ -n "${update_errors}" ]] then echo "Update unsuccessful; here are the differences between the input update file (left) and the updated metadata file (right):" #echo $update_errors diff "${input_metadata_filepath}" "${verification_metadata_filepath}" | cat -A else # if metadata updated correctly, continue to update the data CDFs: echo "Update successful! No differences detected." # while read site from site list: while read site do # create list file containing filepaths of data CDFs matching site name in-place using find cd "/disks/themisdata/thg/l2/mag/" find . -name \*"${site}"\*.cdf > $tmp_filelist cd "${workdir_path}" cat $tmp_filelist | while read cdf_to_update do # for each filepath in list file, call update_metadata: update_metadata "${site}" "${cdf_to_update}" "${metadata_updates}" done rm $tmp_filelist done<$sitelist_filepath fi time_script_end=$(date) t_s0=$(date --date "$time_script_start" +%s) t_sf=$(date --date "$time_script_end" +%s) ((t_script=$t_sf-$t_s0)) echo "Total script duration: $t_script seconds." echo "Done!" exit 0