;+
;PROCEDURE: THM_SST_ERANGE_BIN_VAL
;
;Purpose:
;   This routine generates the values that will be subtracted to remove electronic noise.  The actual subtraction is done by thm_sst_remove_sunpulse.pro
;   A separate value will be calculated for each combination of bin and energy, but a single value will be calculated for all times(at a particular bin/energy).  
;   This value will be generated using one of several different functions of the values across time.  The functions can be specified by the user. 
;   The user should never call this routine directly, but should instead provide the appropriate arguments to thm_part_moments,thm_part_moments2, & thm_part_getspec
;   These routines will guarantee that this routine is called correctly.
;   
;Arguments(note these arguments are generated correctly in thm_part_moments & thm_part_moments2)
;   thx:  a string representing a probe prefix(ie 'tha')
;
;   instrument:  a string representing the instrument(ie 'psif')  Note that this routine will only perform an operation if
;   it is passed a string representing sst full distribution data.  (ie 'psif', 'psef')
;
;   times:  a list of times for the sst measurements, this list is generated by thm_part_dist
;
;Keywords:
;
; method_clean: If set to 'manual', this keyword disables the enoise code, as another method will be used by thm_sst_remove_sunpulse.pro
;
; enoise_bins:  A 0-1 array that indicates which bins should be used to calculate electronic noise.  A 0 indicates that the
;               bin should be used for electronic noise calculations.  This is basically the output from the bins argument of edit3dbins.
;               It should have dimensions 16x4.
;
; enoise_bgnd_times:  This should be either a 2 element array or a 2xN element array(where n is the number of elements in enoise_bins).  
;                    The arguments represents the start and end times over which the electronic background will be calculated for each
;                    bin.  If you pass a 2 element array the same start and end times can be used for each bin.  If you pass a 2xN element
;                    array, then the Ith bin in enoise_bins will use the time enoise_bgnd_time[0,I] as the start time and enoise_bgnd_time[1,I] as
;                    the end time for the calculation of the background for that bin.  If this keyword is not set then electronic noise will not 
;                    be subtracted.
;
; 
;
; enoise_remove_method(default: 'fit_median') set the keyword to a string specifying the method you want to use to calculate the electronic noise that will be subtracted 
;                      This function combines values across time.  The allowable options are:
;                      'min':  Use the minimum value in the time interval for each bin/energy combination.
;                      'average': Use the average value in the time interval for each bin/energy combination.
;                      'median': Use the median value in the time interval for each bin/energy combination.
;                      'fit_average': Fill in selected bins with a value that is interpolated across phi then subtracts the average of the difference
;                      between the interpolated value and the actual value from each selected bin/energy combination.
;                      'fit_median' :Fill in selected bins with a value that is interpolated across phi then subtracts the median of the difference
;                      between the interpolated value and the actual value from each selected bin/energy combination.
;                      'fill':  Fill the selected bins using the technique specified by enoise_remove_method_fit, or interpolation by default. (note that if this method is used, enoise_bgnd_time is not required)
;                      
; enoise_remove_fit_method(default:'interpolation'):  Set this keyword to control the method used in 'fit_average' & 'fit_median' to
;                    fit across phi. Options are 'interpolation' & 'spin_fit' By default, missing bins are interpolated across phi.  Setting
;                    enoise_remove_fit_method='spin_fit' will instead try to fill by fitting to a curve of the form A+B*sin(phi)+C*cos(phi).
;                      
;        
;SEE ALSO:
;  thm_part_moments.pro, thm_part_moments2.pro, thm_part_getspec.pro
;  thm_part_dist.pro, thm_sst_psif.pro, thm_sst_psef.pro,thm_crib_sst_contamination.pro
;  thm_sst_find_masking.pro, thm_sst_remove_sunpulse.pro
;
; $LastChangedBy: pcruce $
; $LastChangedDate: 2012-02-22 10:15:21 -0800 (Wed, 22 Feb 2012) $
; $LastChangedRevision: 9805 $
; $URL: svn+ssh://thmsvn@ambrosia.ssl.berkeley.edu/repos/thmsoc/tags/tdas_7_00/idl/themis/spacecraft/particles/SST/thm_sst_erange_bin_val.pro $
;
;
;-

;HELPER function
;take the dat component of a structure and splits it into an array
;ordered in terms of theta =  energy*angle->energy*theta*phi
;dimensions 16*64->16*4*16,  phi is guaranteed to be contiguous but
;not necessarily ascending(some phi may be out of phase by 180 degrees)
;returns indices to perform this transformation
function dat2angsplit,dat

  compile_opt idl2,hidden
  
  index = indgen(16,64)
  t_sort = bsort(dat.theta[0,*])
  index = index[*,t_sort]
  return,transpose(reform(index,16,16,4),[0,2,1]) ; this magic properly reforms and orders the dimensions  

end

;HELPER function
;performs filling using an svd fit
function do_enoise_spin_fill,dat,ref,phi

  compile_opt idl2,hidden

  dat2 = dat

  good = where(finite(dat2))
  bad = ssl_set_complement(good,indgen(n_elements(dat)))
  
  if good[0] ne -1 && bad[0] ne -1 then begin

    idx = where(dat2[good] ne 0)
    
    if idx[0] eq -1 then begin  ;if the system of equations is not-invertible because it is all zeros then add a constant offset
      dat2[good] += 1
    endif
      
    except_val = !EXCEPT  ;svdfit annoyingly reports a floating point error EVERY TIME IT OCCURS if you don't disable them entirely
    !EXCEPT = 0  
    result = svdfit(phi[good],dat2[good],a=[1,1,1],function_name='svd_func',status=stat) ;fit the data
    !EXCEPT = except_val
                     
    if stat eq 0 then begin
    ; replace values with fit points
      dat2[bad] = result[0] + result[1]*sin(phi[bad]*!DTOR)+result[2]*cos(phi[bad]*!DTOR)         
    endif else begin
      dat2[bad] = ref[bad] ;replace values with ref values to ensure there are no NaNs in the output
    endelse
      
    
    if idx[0] eq -1 then begin ;remove a constant offset if one was added
      dat2 -= 1
    endif
  endif
  
  idx = where(~finite(dat2))
  
  return,dat2

end

function thm_sst_erange_bin_val,thx,instrument,times,enoise_bins=enoise_bins,enoise_bgnd_times=enoise_bgnd_time,enoise_remove_method=enoise_remove_method,enoise_remove_fit_method=enoise_remove_fit_method,method_clean=method_clean,zs2=zs2,_extra=ex

  compile_opt idl2 
 
  ;Validate inputs
  
  if keyword_set(method_clean) && strlowcase(method_clean) eq 'manual' then begin
    return,-1
  endif else if (keyword_set(method_clean) && strlowcase(method_clean) eq 'automatic') then begin
   
    if ~strmatch(instrument,'ps?f') && ~strmatch(instrument,'pseb') then begin
      dprint,'Automatic sun contamination  not enabled for instrument ' + instrument
      return,-1
    endif
   
    trange = times ; note that this is not nec the user specified time range. Is at least a full day.
    n_tm = n_elements(trange)
    format = thx+'_'+instrument
   
    if keyword_set(limit_sunpulse_clean) && is_num(limit_sunpulse_clean) then begin
      zscore_max = limit_sunpulse_clean
    endif else begin
      zscore_max = 10 ;In standard statistical tests, this is an extremely large limit, making for a weak outlier test.  
                      ;But for sun contamination outliers generally have gigantic scores, so it shouldn't be a problem.
                      ;What tricks the test isn't the threshold, but the heterogenous natural of the data.
                      ;A better method in the future would test attenuator open and closed data separately
    endelse
     
    totalzscore = dblarr(64)
    ; The modified z score for each angular bin (summed over energy) is calculated for each time.
    ; The truncated mean of these z scores over time is then used to find outliers.
    ; The test is applied for all energies summed and for bin 9 separately.  
    ; For some reason, there is sometimes a pattern of contamination the hits bin 9 only

    data_arr = dblarr(n_tm,16,64)
    
    for j = 0, n_tm-1 do begin ; loop over times
      dat = thm_part_dist(format,index=j,_extra=ex,/no_sun)  ;get data for this time
      if is_struct(dat) then begin
        data_arr[j,*,*]=dat.data/thm_sst_atten_scale(dat.atten,dimen(dat.data))
      endif
    endfor
    
    totaloverenergy = total(data_arr,2)
    ;totaloverenergy = reform(data_arr,n_tm,16*64)
    zscr_med = median(totaloverenergy+1,dimension=2); Adding 1 here prevents infinities in the case that all totaloverenergy=0
    ;zscr_sigma = median(abs(totaloverenergy-(zscr_med#(dblarr(64)+1))),dimension=2)
    zscr_sigma = median(abs(totaloverenergy-(zscr_med#(dblarr(64)+1))),dimension=2)
    zscr_arr = .6745*(totaloverenergy-zscr_med#(dblarr(64)+1))/(zscr_sigma#(dblarr(64)+1))
    ;totalzscore = total(zscr_arr,1)
    
    lim1=0.1 ;all energies
    lim2=0.1 ;bin 9
    
    ;sort so that we can do a truncated mean
    ;This removes any outliers in the z-score days due to glitches
    ;Glitches can create z-scores that are big enough that they throw off an entire day's average even though they represent only a few points out of 10-20k points
    for j = 0,64-1 do begin
      sort_idx=bsort(zscr_arr[*,j])
      zscr_arr[*,j]=zscr_arr[sort_idx,j]  
    endfor
    
    low_ten=floor(n_tm*lim1)
    high_ten=floor(n_tm*(1-lim1))
    
    ;truncated mean removes outliers due to glitches.
    averagezscore = total(zscr_arr[low_ten:high_ten,*],1,/nan)/(high_ten-low_ten)
    ;averagezscore=total(zscr_arr,1,/nan)/n_tm
        
    ;averagezscore = totalzscore/n_tm
    zscore_bad = where(averagezscore gt zscore_max,c); not using abs value as we are really interested in values too high, not too low (masking is removed separately)
    out = dblarr(16,64)
    if c gt 0 then begin
      dprint,'Sun Contaminated Bins Flagged: ' + strjoin(strtrim(zscore_bad,2),' '),dlevel=2
      out[*,zscore_bad] = !VALUES.D_NAN ;store result
    endif
    
    ;We do a separate test on energy bin 9.  For a reason that is not understood, there is a pattern of sun contamination that only affects bin 9 on occassion.
    ;Possible explanations include idiosyncrasies of the electronics & glint off of the side of the collimator.
    totaloverenergy=reform(data_arr[*,9,*])
    ;totaloverenergy = reform(data_arr,n_tm,16*64)
    zscr_med = median(totaloverenergy+1,dimension=2); Adding 1 here prevents infinities in the case that all totaloverenergy=0
    zscr_sigma = median(abs(totaloverenergy-(zscr_med#(dblarr(64)+1))),dimension=2)
    zscr_arr = .6745*(totaloverenergy-zscr_med#(dblarr(64)+1))/(zscr_sigma#(dblarr(64)+1))
    ;totalzscore = total(zscr_arr,1)
    
    ;sort so that we can do a truncated mean
    for j = 0,64-1 do begin
      sort_idx=bsort(zscr_arr[*,j])
      zscr_arr[*,j]=zscr_arr[sort_idx,j]  
    endfor
    
    low_ten=floor(n_tm*lim2)
    high_ten=floor(n_tm*(1-lim2))
    
    ;truncated mean removes outliers due to glitches.
    averagezscore = total(zscr_arr[low_ten:high_ten,*],1,/nan)/(high_ten-low_ten)
    ;averagezscore=total(zscr_arr,1,/nan)/n_tm
        
    ;averagezscore = totalzscore/n_tm
    zscore_bad = where(averagezscore gt zscore_max,c); not using abs value as we are really interested in values too high, not too low (masking is removed separately)
    if c gt 0 then begin
      dprint,'Earth Shine Contaminated Bins Flagged: ' + strjoin(strtrim(zscore_bad,2),' '),dlevel=2
      out[*,zscore_bad] = !VALUES.D_NAN ;store result
    endif

    return,out 
   
  endif else if keyword_set(method_clean) then begin
    dprint,'Unrecognized value for keyword method_clean: ' + method_clean
    dprint,'Did you mean to use method_sunpulse_clean?"
  endif 
  
  
; enoise_bins rather than automatic clean

  if ~keyword_set(enoise_bins) then begin
    return,-1
  endif

  if ~strmatch(instrument,'ps?f') && ~strmatch(instrument,'pseb') then begin
    dprint,'Electronic removal not enabled for instrument ' + instrument
    return,-1
  endif

  ;fix to use 0-1 array rather than indexes
  enoise_bins2 = where(enoise_bins eq 0,bin_cnt)
  
  if enoise_bins2[0] eq -1 then begin
    dprint,'No bins selected for ' + thx + '/' + instrument + '  Enoise removal not applied'
    return,-1
  endif
  
  if keyword_set(enoise_remove_method) && strlowcase(enoise_remove_method) eq 'fill' then begin
 
    dprint,'Enoise Filling ' + strtrim(bin_cnt,2) + ' angle bins for ' + thx + '/' + instrument
    out = dblarr(16,64) ;output array
    out[*,enoise_bins2] = !VALUES.D_NAN
    return,out
  
  endif
  
  if ~keyword_set(enoise_bgnd_time) then begin
    dprint,'Cannot apply selected enoise remove method for '+ thx + '/' + instrument + '.  Enoise_bgnd_time not set'
  endif
  
  d = dimen(enoise_bgnd_time)
  
  if d[0] ne 2 then begin
    dprint,'enoise_bgnd_time has illegal dimensions, electronic noise removal will not be performed'
    return,-1
  endif
 
  n_bin = n_elements(enoise_bins2)
  
  if n_elements(d) eq 1 then begin 
    e_times = make_array(2,n_bin,type=size(enoise_bgnd_time,/type))
    e_times[0,*] = enoise_bgnd_time[0]
    e_times[1,*] = enoise_bgnd_time[1]
  endif else begin
    e_times = enoise_bgnd_time
  
    if d[1] ne n_bin then begin
      dprint,'number of enoise_bins does not match number of enoise_times, electronic noise removal will not be performed'
      return,-1
    endif
    
  endelse
    

  
  ;convert to numeric format if input is not numeric
  if is_string(e_times) then begin
    e_times = time_double(e_times)
  endif
  
  ;Identify method
  if ~keyword_set(enoise_remove_method) then begin
    enoise_remove_method = 'fit_median'
  endif
  
  if strlowcase(enoise_remove_method) eq 'fit_median' then begin
    fit_median = 1
  endif else if strlowcase(enoise_remove_method) eq 'fit_average' then begin
    fit_average = 1
  endif else if strlowcase(enoise_remove_method) eq 'median' then begin
    median = 1
  endif else if strlowcase(enoise_remove_method) eq 'average' then begin
    average = 1
  endif else if strlowcase(enoise_remove_method) eq 'min' then begin
    min = 1
  endif else begin
    dprint,'enoise remove method "' + enoise_remove_method + '" unrecognized, electronic noise removal will not be performed'
    return,-1
  endelse
  
  if (keyword_set(min) || keyword_set(average) || keyword_set(min)) && $
     keyword_set(enoise_remove_fit_method) then begin
     dprint,'Enoise fit method does not have an effect if remove_method is not fit_average,fit_median, or fill'
  endif 
  
  if ~keyword_set(enoise_remove_fit_method) then begin
    enoise_remove_fit_method = 'interpolation'
  endif
  
  if strlowcase(enoise_remove_fit_method) eq 'interpolation' then begin
    interpol_fit = 1
  endif else if strlowcase(enoise_remove_fit_method) eq 'spin_fit' then begin
    spin_fit = 1
  endif else begin
    dprint,'enoise fit method "' + enoise_remove_fit_method + '" unrecognized, electronic noise removal will not be performed'
    return,-1
  endelse
 
  
  format = thx+'_'+instrument
  
  out = dblarr(16,64) ;output array
  
  for i = 0,n_bin-1 do begin ; loop over angle bins
  
    trange = e_times[*,i]  ; get times and bins
    ind = where(times ge trange[0] and times le trange[1],cnt)      
    bin = enoise_bins2[i]
      
    if cnt eq 0 then begin
      dprint,'No valid times found for bin: ' + strcompress(string(bin))
      continue
    endif
      
    n_tm = n_elements(ind)  ;loop upper bound
    totd = dblarr(16,n_tm)   ;temporary storage of processed values
    totm = dblarr(16,n_tm)
      
;    print,'TIME!!! ',n_tm
;    print,ind
;    print,time_string(times[ind])
      
    for j = 0, n_tm-1 do begin ; loop over times
      
      dat = thm_part_dist(format,index=ind[j],_extra=ex)  ;get data for this time
      
      ;copy of Jim M's quick fix, to guarantee time matchup
      If(size(dat, /type) Eq 8) Then Begin
        mid_time = (dat.time+dat.end_time)/2.0
        trange2 = [dat.time, dat.end_time]             
        dat.time = mid_time
        str_element, dat, 'end_time', /delete
        str_element, dat, 'trange', trange2, /add_replace
      Endif
      
      if is_struct(dat) then begin
           
        if keyword_set(fit_median) || keyword_set(fit_average) then begin
       
          ang_split = dat2angsplit(dat)   ;indexes to turn the data from 16x64 to 16x4x64
          edat = dat
          edat.data[*,enoise_bins2] = !VALUES.F_NAN ;these bins are going to be interpolated over 
          data = dat.data[ang_split]
          edata = edat.data[ang_split]
          phi = dat.phi[ang_split]
          
          thetas = dat.theta[uniq(dat.theta,sort(dat.theta))]    
          theta = where(dat.theta[0,bin] eq thetas)    ;which theta is the bin in question in 
          
          phi[*,[0,2],0:7] -= 360  ; make phi's monotonic      
  
          for k = 0, 16-1 do begin  ;loop over energy
          
             phis = reform(phi[k,theta,*])
             edatas = reform(edata[k,theta,*])
             ref_datas = reform(data[k,theta,*])
             
             triple_phi = [phis-360,phis,phis+360]
             triple_dat = [edatas,edatas,edatas]
             triple_ref = [ref_datas,ref_datas,ref_datas]
             
             if keyword_set(interpol_fit) then begin
               interp_gap, triple_phi, triple_dat
             endif else begin
               triple_dat = do_enoise_spin_fill(triple_dat,triple_ref,triple_phi)
             endelse
             ;triple_dat = do_spin_fill(triple_dat,triple_ref,triple_phi)          
       
             edata[k,theta,*] = triple_dat[16:31]
             ;data[k,theta,*] = edatas
         endfor
         
         ;replace data
         inv_ang_split = bsort(ang_split)
         edat.data = edata[inv_ang_split]        
         totm[*,j] = edat.data[*,bin]
         
        endif
       
        totd[*,j] = dat.data[*,bin]  ;store the data for this part of the time interval
     
      endif
    endfor
    
    if keyword_set(fit_median) then begin  ;perform requested processing operation  
      out[*,bin] = median(totd,dimension=2) - median(totm,dimension=2)
    endif else if keyword_set(fit_average) then begin
      out[*,bin] = average(totd,2,/nan) - average(totm,2,/nan)
    endif else if keyword_set(median) then begin
      out[*,bin] = median(totd,dimension=2)
    endif else if keyword_set(average) then begin
      out[*,bin] = average(totd,2,/nan)
    endif else if keyword_set(min) then begin
      out[*,bin] = min(totd,dimension=2,/nan)
    endif
    
  endfor
  
;  idx = where(out lt 0,c)
;  
;  if c gt 0 then begin
;    out[idx] = 0
;  endif
   
  return,out
   
end