;+ ; FUNCTION: FILE_RETRIEVE ; Purpose: ; FILE_RETRIEVE provides a simple, transportable interface to retrieve data files. ; It will download files from a remote web server and copy them into a local (cache) directory ; maintaining the original directory structure. It returns the list of local file names. ; By default files are only downloaded if the remote file is more recent. ; This routine is specifically designed to be used with the same file system that the web server is using to serve files. ; It can look for a MASTER_FILE that indicates the master files system is in use and it then bypasses the download process. ; The file system can be a mix of directories that hold the original files being served as well as a copy of files from external servers. ; The routine correctly handles multiple users sharing (and writing to) the same directories. ; ; Works on LINUX, MAC, and Windows ; ;Usage: ; files = file_retrieve(pathnames, [keyword options]) ; ;Suggested usage: ; ;First get a default structure that specifies where the files come from and where they will be stored locally ; source = file_retrieve(/default_structure,REMOTE_DATA_DIR='http://sprg.ssl.berkeley.edu/data/',master_file='maven/.master') ; ; ; Retrieve a MAVEN mag 1 sec resolution file for 2014-11-18 ; files = file_retrieve( 'maven/data/sci/mag/l2/sav/1sec/2014/11/mvn_mag_l2_pl_1sec_20141118.sav' ,_extra=source) ; ; ; Retrieve an array of filenames within a time range: ; files = file_retrieve( 'maven/data/sci/mag/l2/sav/1sec/YYYY/MM/mvn_mag_l2_pl_1sec_YYYYMMDD.sav',trange=['2014-12-30','2015-1-3'] ,_extra=source) ; ; ; ; Retrieve "globbed files from the SPDF: ; ; A typical URL at SPDF is: 'http://spdf.sci.gsfc.nasa.gov/pub/data/wind/mfi/mfi_h0/2015/wi_h0_mfi_20151008_v03.cdf ; The source of these files is given by: ; source = file_retrieve(/default, REMOTE_DATA_DIR='http://spdf.sci.gsfc.nasa.gov/pub/data/' , LOCAL_DATA_DIR = root_data_dir()+'istp/' ) ; ; The path specifiation is given by: ; pathname = 'wind/mfi/mfi_h0/YYYY/wi_h0_mfi_YYYYMMDD_v??.cdf' ; files = file_retrieve(pathname, trange=['2014-12-25','2015-1-4'],_extra=source,/last_version) ; ; Subsequent calls will be much faster since the files will have been downloaded. ; ; ;Arguments: ; pathnames: String or string array with partial path to the remote file. ; (will be appended to remote_data_dir) ; [newpathnames]: (optional) String or string array with partial path to file destination. ; (Will be appended to local_data_dir) (NOT RECOMMENDED TO USE THIS OPTION. ; ;Keywords: ; REMOTE_DATA_DIR: String defining remote data directory ; Pathnames will be appended to this variable. ; LOCAL_DATA_DIR: String or string array w/ local data directory(s) ; If newpathnames is set it will be appended to this variable; if not, ; pathnames will be appended. ; MASTER_FILE: (file pathname) if the file: LOCAL_DATA_DIR+MASTER_FILE exists then no download or contact with the server is made. (same effect as NO_SERVER - but evaluated at run time) ; NO_SERVER: Set this keyword to prevent any contact with a remote server. ; ; TRANGE: One or two element array indicating the time range of interest. If set, then PATHNAMES will be expanded into an array of pathnames using the special character sequences to translate: ; YYYY, yy, MM, DD, hh, mm, ss, .f, DOY, DOW, TDIFF are special characters that will be substituted with the appropriate date/time field ; Be especially careful of extensions that begin with '.f' since these will be translated into a fractional second. ; See "time_string" TFORMAT keyword for more info. ; ; LAST_VERSION: If set, then only the last of multiple file versions is downloaded and returned. (used in conjuction with "globbed" pathnames and version numbers.) ; ; USER_PASS: Username and password for secured systems; USER_PASS='username:password' ; ARCHIVE_EXT: string; Set archiving extension. (i.e.: ARCHIVE_EXT= '.arc'). to rename old files instead of deleting them. Prevents accidental file deletion. ; ARCHIVE_DIR: string; Set archiving subdirectory. (i.e.: ARCHIVE_DIR = '.archive/') ; ; VALID_ONLY: Set this keyword to return only existing files. ; ; PRESERVE_MTIME: if set, the local file will be given a modification time that is the same as the modification time of the remote server's file modification time. ; This keyword is ignored on (windows) machines that don't have touch installed. (No cygwin or GNU utils) Default is 1 ; ; VERBOSE: Set Verbosity - 0 print almost nothing , 2 is typical, 4 and above is for debugging. ; ; if_modified_since: Set to 0 to force download ; user_agent: String - User agent text to be sent to web server. ; file_mode: permissions for new files. Default is '666'o ; ; dir_mode: permissions for newly created directories. Default is '777'o ; ; progobj: Experimental option for a progress bar widget. (please ignore for now) ; min_age_limit: Files younger than this age (in seconds) are assumed current (avoids the need to recheck server) Default is 300 ; no_download:0 , $ ; similar to NO_SERVER keyword. Should still allow remote directory retrieval - but not files. ; no_update:0 , $ ; Set to 1 to prevent contact to server if local file already exists. (this is similar to no_clobber) ; ;History: ; 2012-6-25: local_data_dir and remote_data_dir accept array inputs ; with the same # of elements as pathnames/newpathnames -DO NOT USE this option! ; ;$LastChangedBy: davin-mac $ ;$LastChangedDate: 2019-02-13 17:49:40 -0800 (Wed, 13 Feb 2019) $ ;$LastChangedRevision: 26627 $ ;$URL: svn+ssh://thmsvn@ambrosia.ssl.berkeley.edu/repos/spdsoft/tags/spedas_4_1/general/misc/file_retrieve.pro $ ;- ; The following is a crude function to determine if the internet is available. ; returns null string if no server can be reached function server_available,servers,verbose=verbose if ~keyword_set(servers) then servers=['sprg.ssl.berkeley.edu','google.com','amazon.com','ssl.berkeley.edu'] for i=0,n_elements(servers)-1 do begin server = servers[i] port = 80 unit = 0 socket, unit, Server, Port, /get_lun,/swap_if_little_endian,error=error,read_timeout=5,connect_timeout=5 dprint,dlevel=2,verbose=verbose,server,error ;,!error_state.msg if keyword_set(unit) then free_lun,unit if ~keyword_set(error) then return,server endfor return,'' end pro file_retrieve_reset_internet,delay=delay common file_retrieve_com, no_internet_until,wait_time if ~keyword_set(delay) then delay =-1 no_internet_until = systime(1) + delay end function file_retrieve,pathnames, newpathnames, source=source, psource=psource, structure_format=structure_format,default_structure=default_structure, $ use_wget=use_wget, nowait=nowait, $ local_data_dir=local_data_dir,remote_data_dir=remote_data_dir, $ master_file=master_file, $ trange=trange,monthly_res=monthly_res,daily_res=daily_res,hourly_res=hourly_res,resolution=resolution,phase_shift=phase_shift, $ min_age_limit=min_age_limit , $ valid_only=valid_only, $ file_mode = file_mode, $ ; permissions for new files. (if non-zero) dir_mode = dir_mode, $ ; permissions for newly created directories. recurse_limit=recurse_limit, $ user_agent=user_agent, $ user_pass=user_pass, $ preserve_mtime=preserve_mtime, $ restore_mtime=restore_mtime, $ ascii_mode=ascii_mode, $ strict_html=strict_html, $ no_download=no_download,no_server=no_server, $ no_update=no_update, $ update_after = update_after, $ if_modified_since=if_modified_since, $ archive_ext=archive_ext, $ archive_dir=archive_dir, $ last_version = last_version , $ oldversion_dir = oldversion_dir, $ oldversion_ext = oldversion_ext, $ force_download=force_download,$ no_clobber=no_clobber, ignore_filesize=ignore_filesize, $ verbose=verbose,progress=progress,progobj=progobj,links=links common file_retrieve_com, no_internet_until,wait_time if ~keyword_set(wait_time) then wait_time = 180 if ~keyword_set(no_internet_until) then no_internet_until = systime(1)-1. dprint,dlevel=4,verbose=verbose,'Start; $Id: file_retrieve.pro 26627 2019-02-14 01:49:40Z davin-mac $' if size(/type, local_data_dir) ne 7 then local_data_dir = root_data_dir() if keyword_set(structure_format) && structure_format eq 1 then begin ; Old version maintained for legacy code - don't use this any more. ; swver = strsplit('$Id: file_retrieve.pro 26627 2019-02-14 01:49:40Z davin-mac $',/extract) ; user_agent = strjoin(swver[1:3],' ')+' IDL'+!version.release + ' ' + !VERSION.OS + '/' + !VERSION.ARCH+ ' (' + (getenv('USER') ? getenv('USER') : getenv('USERNAME'))+')' if n_elements(user_agent) eq 0 then user_agent='' str= { $ retrieve_struct, $ init:0, $ local_data_dir:local_data_dir, $ ;getenv('ROOT_DATA_DIR'), remote_data_dir:'', $ progress: 1 , $ ; Currently unused keyword (progress is printed by default) user_agent:user_agent, $ ; User agent text to be sent to web server. file_mode:'666'o , $ ; permissions for new files. (if non-zero) dir_mode: '777'o , $ ; permissions for newly created directories. preserve_mtime: 1 , $ ; Set file modification to same as on file on server (uses file_touch executable) progobj: obj_new(), $ ; Experimental option for a progress bar widget. (please ignore for now) min_age_limit: 300L , $ ; Files younger than this age (in seconds) are assumed current (avoids the need to recheck server) no_server:0 , $ ; Set to 1 to prevent any contact with a remote server. no_download:0 , $ ; similar to NO_SERVER keyword. Should still allow remote directory retrieval - but not files. no_update:0 , $ ; Set to 1 to prevent contact to server if local file already exists. (this is similar to no_clobber) no_clobber:0 , $ ; Set to 1 to prevent existing files from being overwritten. (A warning message will be displayed if remote server has) archive_ext:'' , $ ; Set archiving extension. (i.e.: '.arc'). to rename old files instead of deleting them. Prevents accidental file deletion. archive_dir:'' , $ ; Set archiving subdirectory. (i.e.: 'archive/') ignore_filesize:0 , $ ; Set to 1 to ignore the remote/local file sizes when determining if updates are needed. ignore_filedate:0 , $ ; Not yet operational. downloadonly:0 , $ ; Set to 1 to only download files but not load files into memory. use_wget:0 , $ ; Experimental option (uses the routine SSL_WGET instead of file_http_copy) nowait:0 , $ ; Used with wget to download files in the background. verbose:2 , $ force_download: 0 $ ;Allows download to be forced no matter modification time. Useful when moving between different repositories(e.g. QA and production data) } return, str endif if keyword_set(default_structure) then begin ; pathnames not provided - return a default source structure if not keyword_set(psource) then psource = { $ local_data_dir: local_data_dir, $ remote_data_dir: size(/type,remote_data_dir) eq 7 ? remote_data_dir : '', $ ; verbose:2 , $ no_server:0 $ ; Set to 1 to prevent any contact with a remote server. } str_element,/add,psource,'MASTER_FILE',master_file str_element,/add,psource,'VERBOSE',verbose str_element,/add,psource,'MIN_AGE_LIMIT',min_age_limit str_element,/add,psource,'USER_PASS',user_pass str_element,/add,psource,'VALID_ONLY',valid_only return,psource endif if keyword_set(source) then return, file_retrieve(pathnames,newpathnames,_extra=source,links=links) ;if keyword_set(no_download) then no_server = no_download ; Leave this line commented out. The keyword NO_SERVER is independent of the NO_DOWNLOAD keyword ;if not keyword_set(local_data_dir) then local_data_dir = './' ;if not keyword_set(remote_data_dir) then remote_data_dir = '' vb = keyword_set(verbose) ? verbose : 0 if n_elements(progress) eq 0 then progress=1 ;if keyword_set(progress) then begin ; progobj = obj_new('progressbar') ;endif ; This section will generate filenames based on a time range (and time resolution defaults to 1 day) if keyword_set(trange) then begin filenames = '' for i=0,n_elements(pathnames)-1 do begin pathnames_expanded = time_intervals(trange=trange,monthly_res=monthly_res,daily_res=daily_res,resolution=resolution,phase_shift=phase_shift,tformat=pathnames[i]) num_pn = n_elements(pathnames_expanded) dprint,dlevel=(num_pn gt 1) ? 2 : 3,verbose=verbose,strtrim(num_pn,2)+' Pathnames expanded from "'+pathnames[i]+'" using TRANGE from: '+strjoin( time_string(trange) ,' to: ') fns = file_retrieve(pathnames_expanded,local_data_dir=local_data_dir,remote_data_dir=remote_data_dir, $ use_wget=use_wget, nowait=nowait, $ min_age_limit=min_age_limit , valid_only=valid_only, $ file_mode = file_mode, dir_mode = dir_mode, $ recurse_limit=recurse_limit, $ user_agent=user_agent, user_pass=user_pass, $ preserve_mtime=preserve_mtime, restore_mtime=restore_mtime, $ ascii_mode=ascii_mode, strict_html=strict_html, $ no_download=no_download,no_server=no_server, $ no_update=no_update, if_modified_since=if_modified_since, update_after=update_after, $ archive_ext=archive_ext, archive_dir=archive_dir, $ last_version = last_version , $ oldversion_dir = oldversion_dir, oldversion_ext = oldversion_ext, $ force_download=force_download, no_clobber=no_clobber, ignore_filesize=ignore_filesize, $ verbose=verbose,progress=progress,progobj=progobj) if keyword_set(fns) then append_array, filenames,fns endfor return,filenames endif ;fullnames = filepath(root_dir=local_data_dir, pathnames) fullnames = local_data_dir + pathnames ; trailing '/' is recommended, but not required on local_data_dir n0 = n_elements(fullnames) if keyword_set(use_wget) and total(/preserv,strmatch(pathnames,'*[ \* \? \[ \] ]*') ) ne 0 then begin use_wget=0 dprint,dlevel=1,verbose=verbose,'Warning! WGET can not be used with wildcards!' endif if keyword_set(remote_data_dir) && ~(keyword_set(no_server) || keyword_set(no_download) || ( (size(/type,master_file) eq 7) && file_test(local_data_dir+master_file) ) ) then begin if systime(1) gt no_internet_until then begin if keyword_set(use_wget) then $ ssl_wget,serverdir=remote_data_dir,localdir=local_data_dir,pathname=pathnames,verbose=verbose ,nowait=nowait $ else begin ;Set some defaults that are really essential for proper working of the system: if ~keyword_set(dir_mode) then dir_mode ='777'o if ~keyword_set(file_mode) then file_mode ='666'o if n_elements(min_age_limit) eq 0 then min_age_limit=300L ; Wait a reasonable time (5 minutes) before trying again if n_elements(progress) eq 0 then progress = 1 ; Display progress on file downloads periodically if n_elements(preserve_mtime) eq 0 then preserve_mtime=1 ; Set the local file modification time to the servers modification time http0 = strmid(remote_data_dir,0,7) eq 'http://' If obj_valid(progobj) Then progobj -> update, 0.0, text = string(format="('Retrieving ',i0,' files from ',a)",n0,remote_data_dir) ;jmm, 15-may-2007 for i = 0l,n0-1 do begin fn = fullnames[i] pn = pathnames[i] npn = keyword_set(newpathnames) ? newpathnames[i] : '' ;2012-6-25: these variables may be single value or array (Who made this change? - Might not be consistent with other options/ methods!) ; error checks should probably be added to check # of elements between local_data_dir and ; remote_data_dir (if arrays), pathnames, and newpathnames http = n_elements(http0) gt 1 ? http0[i]:http0 ldd = n_elements(local_data_dir) gt 1 ? local_data_dir[i]:local_data_dir rdd = n_elements(remote_data_dir) gt 1 ? remote_data_dir[i]:remote_data_dir ; if keyword_set(no_update) and file_test(fn,/regular) then continue if http then begin file_http_copy,pn,npn,url_info=url_info,serverdir=rdd,localdir=ldd,verbose=verbose, $ no_clobber=no_clobber,no_update=no_update, update_after=update_after , $ ignore_filesize=ignore_filesize,progobj=progobj, progress=progress, $ no_download = no_download, archive_ext=archive_ext,archive_dir=archive_dir, $ ascii_mode=ascii_mode, recurse_limit=recurse_limit, if_modified_since=if_modified_since, $ user_agent=user_agent, user_pass=user_pass, strict_html=strict_html , $ preserve_mtime = preserve_mtime, restore_mtime=restore_mtime, $ file_mode=file_mode,dir_mode=dir_mode,last_version=last_version, $ min_age_limit=min_age_limit,force_download=force_download, $ error =error,links=links if keyword_set(error) then begin dprint,dlevel=1,verbose=verbose,'Network Connection Error detected- Will use local copies only. ',error if ~server_available() then begin ; This is a dangerous solution - Can't distiguish between "Remote server down" and "No connection to internet" no_internet_until = systime(1) + wait_time dprint,dlevel=0,verbose=verbose,'Disabling checks of server for '+strtrim(wait_time,2)+' seconds' endif break endif if url_info[0].io_error ne 0 then begin dprint, "File or URL i/o error detected. See !error_state for more info" printdat,!error_state return,'' endif endif else begin file_copy2,serverdir=remote_data_dir,localdir=local_data_dir,pathname=pn,verbose=verbose,no_clobber=no_update endelse endfor endelse endif else begin dprint,'No Internet available until '+time_string(no_internet_until,/local)+ ' Unable to check: '+remote_data_dir+pathnames[0] endelse endif ; The following bit of code should find the highest version number if globbing is used. fullnames2 = '' for i=0,n_elements(fullnames)-1 do begin ff = file_search(fullnames[i],count=c) case c of 0: begin dprint,dlevel=3,verbose=vb,'No matching file: "'+fullnames[i]+'"' if ~keyword_set(valid_only) then append_array,fullnames2,fullnames[i] end 1: begin dprint,dlevel=3,verbose=vb,'Found: "'+ff[0]+'"' ; fullnames[i] = ff[0] append_array,fullnames2,ff[0] end else: begin if keyword_set(last_version) then begin ; dprint,dlevel=2,verbose=vb,strtrim(c,2)+' matches found for: "'+fullnames[i]+'" Using last version.' dprint,dlevel=3,verbose=vb,'Using last version of '+strtrim(c,2)+' matches: '+ff[c-1] append_array,fullnames2,ff[c-1] file_archive,ff[0:c-2],verbose=verbose,archive_dir=oldversion_dir,archive_ext=oldversion_ext endif else begin dprint,dlevel=2,verbose=vb,strtrim(c,2)+' matches found for: "'+fullnames[i]+'"' append_array,fullnames2,ff ; fullnames = ff endelse end endcase endfor return,fullnames2 end